from torch import optim
from torch.utils.data import DataLoader, random_split
import pytorch_lightning as pl
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
from instruct_goose.reward import RewardModel, PairwiseLoss
from instruct_goose.dataset import PairDataset
How to train a reward model?
Step 1: Create a reward model from a pre-trained language model
= AutoTokenizer.from_pretrained("gpt2")
tokenizer = tokenizer.eos_token tokenizer.pad_token
= RewardModel(checkpoint="gpt2") reward_model
Step 2: Create a Pairwise dataset
= load_dataset("CarperAI/openai_summarize_comparisons", split="train")
dataset = random_split(dataset, lengths=[10, len(dataset) - 10]) # for demo purposes dataset, _
Using custom data configuration CarperAI--openai_summarize_comparisons-79d2c222a15dc8fb
Found cached dataset parquet (/Users/education/.cache/huggingface/datasets/CarperAI___parquet/CarperAI--openai_summarize_comparisons-79d2c222a15dc8fb/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
= PairDataset(dataset, tokenizer)
pair_dataset = DataLoader(pair_dataset, batch_size=2) dataloader
100%|██████████| 10/10 [00:00<00:00, 822.85it/s]
Step 3: Write a training loop
= 1 # for demo purposes
N_EPOCHS = 1e-3
LEARNING_RATE
= PairwiseLoss() pairwise_loss
class LitRewardModel(pl.LightningModule):
def __init__(
self, model, loss_func, lr
):super().__init__()
self.model = model
self.loss_func = loss_func
self.lr = lr
def training_step(self, batch, batch_idx: int):
\
chosen_input_ids, chosen_attention_mask,= batch
rejected_input_ids, rejected_attention_mask
= self.model(chosen_input_ids, chosen_attention_mask)
chosen_rewards = self.model(rejected_input_ids, rejected_attention_mask)
rejected_rewards
= self.loss_func(chosen_rewards, rejected_rewards)
loss
print(f"loss={loss}")
return loss
def configure_optimizers(self):
= optim.Adam(self.model.parameters(), lr=self.lr)
optimizer return optimizer
= LitRewardModel(reward_model, pairwise_loss, lr=1e-3) lit_model
= pl.Trainer(max_epochs=1, log_every_n_steps=1) trainer
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
=lit_model, train_dataloaders=dataloader) trainer.fit(model
Missing logger folder: /Users/education/DATA/projects/ai/RLHF/instructGOOSE/nbs/lightning_logs
| Name | Type | Params
-------------------------------------------
0 | model | RewardModel | 124 M
1 | loss_func | PairwiseLoss | 0
-------------------------------------------
124 M Trainable params
0 Non-trainable params
124 M Total params
497.762 Total estimated model params size (MB)
/Users/education/DATA/projects/ai/RLHF/instructGOOSE/env/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:224: PossibleUserWarning: The dataloader, train_dataloader, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 8 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.
rank_zero_warn(
loss=-0.2531266510486603
loss=-0.2498958855867386
loss=-0.24884334206581116
loss=-0.2499789297580719
loss=-0.23997953534126282
`Trainer.fit` stopped: `max_epochs=1` reached.