How to train a reward model?

from torch import optim
from torch.utils.data import DataLoader, random_split

import pytorch_lightning as pl
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset

from instruct_goose.reward import RewardModel, PairwiseLoss
from instruct_goose.dataset import PairDataset

Step 1: Create a reward model from a pre-trained language model

tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token
reward_model = RewardModel(checkpoint="gpt2")

Step 2: Create a Pairwise dataset

dataset = load_dataset("CarperAI/openai_summarize_comparisons", split="train")
dataset, _ = random_split(dataset, lengths=[10, len(dataset) - 10]) # for demo purposes
Using custom data configuration CarperAI--openai_summarize_comparisons-79d2c222a15dc8fb
Found cached dataset parquet (/Users/education/.cache/huggingface/datasets/CarperAI___parquet/CarperAI--openai_summarize_comparisons-79d2c222a15dc8fb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
pair_dataset = PairDataset(dataset, tokenizer)
dataloader = DataLoader(pair_dataset, batch_size=2)
100%|██████████| 10/10 [00:00<00:00, 822.85it/s]

Step 3: Write a training loop

N_EPOCHS = 1 # for demo purposes
LEARNING_RATE = 1e-3

pairwise_loss = PairwiseLoss()
class LitRewardModel(pl.LightningModule):
    def __init__(
        self, model, loss_func, lr
    ):
        super().__init__()
        self.model = model
        self.loss_func = loss_func
        self.lr = lr
    
    def training_step(self, batch, batch_idx: int):
        chosen_input_ids, chosen_attention_mask,\
        rejected_input_ids, rejected_attention_mask = batch
        
        chosen_rewards = self.model(chosen_input_ids, chosen_attention_mask)
        rejected_rewards = self.model(rejected_input_ids, rejected_attention_mask)
        
        loss = self.loss_func(chosen_rewards, rejected_rewards)
        
        print(f"loss={loss}")
        
        return loss
    
    def configure_optimizers(self):
        optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
        return optimizer
lit_model = LitRewardModel(reward_model, pairwise_loss, lr=1e-3)
trainer = pl.Trainer(max_epochs=1, log_every_n_steps=1)
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
trainer.fit(model=lit_model, train_dataloaders=dataloader)
Missing logger folder: /Users/education/DATA/projects/ai/RLHF/instructGOOSE/nbs/lightning_logs

  | Name      | Type         | Params
-------------------------------------------
0 | model     | RewardModel  | 124 M 
1 | loss_func | PairwiseLoss | 0     
-------------------------------------------
124 M     Trainable params
0         Non-trainable params
124 M     Total params
497.762   Total estimated model params size (MB)
/Users/education/DATA/projects/ai/RLHF/instructGOOSE/env/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:224: PossibleUserWarning: The dataloader, train_dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 8 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.
  rank_zero_warn(
loss=-0.2531266510486603
loss=-0.2498958855867386
loss=-0.24884334206581116
loss=-0.2499789297580719
loss=-0.23997953534126282
`Trainer.fit` stopped: `max_epochs=1` reached.