Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix pipeline's context overflow #87

Merged
merged 2 commits into from
Nov 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 22 additions & 5 deletions examples/ilql_sentiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,26 @@
from transformers import pipeline

import trlx
from typing import List, Dict

if __name__ == "__main__":
sentiment_fn = pipeline("sentiment-analysis", "lvwerra/distilbert-imdb")

def metric_fn(samples):
outputs = sentiment_fn(samples, return_all_scores=True)
sentiments = [output[1]["score"] for output in outputs]
def get_positive_score(scores):
"Extract value associated with a positive sentiment from pipeline's output"
return dict(map(lambda x: tuple(x.values()), scores))["POSITIVE"]


def main():
sentiment_fn = pipeline(
"sentiment-analysis",
"lvwerra/distilbert-imdb",
top_k=2,
truncation=True,
batch_size=256,
device=-1,
)

def metric_fn(samples: List[str]) -> Dict[str, List[float]]:
sentiments = list(map(get_positive_score, sentiment_fn(samples)))
return {"sentiments": sentiments}

imdb = load_dataset("imdb", split="train+test")
Expand All @@ -21,3 +34,7 @@ def metric_fn(samples):
eval_prompts=["I don't know much about Hungarian underground"] * 64,
metric_fn=metric_fn,
)


if __name__ == "__main__":
main()
36 changes: 30 additions & 6 deletions examples/ppo_sentiments.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,37 @@
# Generates positive movie reviews by tuning a pretrained on IMDB model
# Generates positive movie reviews by tuning a pretrained model on IMDB dataset
# with a sentiment reward function

from datasets import load_dataset
from transformers import pipeline
import os

import trlx
import torch
from typing import List


def get_positive_score(scores):
"Extract value associated with a positive sentiment from pipeline's output"
return dict(map(lambda x: tuple(x.values()), scores))["POSITIVE"]

if __name__ == "__main__":
sentiment_fn = pipeline("sentiment-analysis", "lvwerra/distilbert-imdb", device=-1)

def reward_fn(samples):
outputs = sentiment_fn(samples, return_all_scores=True)
sentiments = [output[1]["score"] for output in outputs]
def main():
if torch.cuda.is_available():
device = int(os.environ.get("LOCAL_RANK", 0))
else:
device = -1

sentiment_fn = pipeline(
"sentiment-analysis",
"lvwerra/distilbert-imdb",
top_k=2,
truncation=True,
batch_size=256,
device=device,
)

def reward_fn(samples: List[str]) -> List[float]:
sentiments = list(map(get_positive_score, sentiment_fn(samples)))
return sentiments

# Take few words off of movies reviews as prompts
Expand All @@ -24,3 +44,7 @@ def reward_fn(samples):
prompts=prompts,
eval_prompts=["I don't know much about Hungarian underground"] * 64,
)


if __name__ == "__main__":
main()