CarperAI · Dahoas · Nov 10, 2022 · Nov 10, 2022 · Nov 10, 2022
diff --git a/examples/ilql_sentiments.py b/examples/ilql_sentiments.py
@@ -4,13 +4,26 @@
 from transformers import pipeline
 
 import trlx
+from typing import List, Dict
 
-if __name__ == "__main__":
-    sentiment_fn = pipeline("sentiment-analysis", "lvwerra/distilbert-imdb")
 
-    def metric_fn(samples):
-        outputs = sentiment_fn(samples, return_all_scores=True)
-        sentiments = [output[1]["score"] for output in outputs]
+def get_positive_score(scores):
+    "Extract value associated with a positive sentiment from pipeline's output"
+    return dict(map(lambda x: tuple(x.values()), scores))["POSITIVE"]
+
+
+def main():
+    sentiment_fn = pipeline(
+        "sentiment-analysis",
+        "lvwerra/distilbert-imdb",
+        top_k=2,
+        truncation=True,
+        batch_size=256,
+        device=-1,
+    )
+
+    def metric_fn(samples: List[str]) -> Dict[str, List[float]]:
+        sentiments = list(map(get_positive_score, sentiment_fn(samples)))
         return {"sentiments": sentiments}
 
     imdb = load_dataset("imdb", split="train+test")
@@ -21,3 +34,7 @@ def metric_fn(samples):
         eval_prompts=["I don't know much about Hungarian underground"] * 64,
         metric_fn=metric_fn,
     )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/ppo_sentiments.py b/examples/ppo_sentiments.py
@@ -1,17 +1,37 @@
-# Generates positive movie reviews by tuning a pretrained on IMDB model
+# Generates positive movie reviews by tuning a pretrained model on IMDB dataset
 # with a sentiment reward function
 
 from datasets import load_dataset
 from transformers import pipeline
+import os
 
 import trlx
+import torch
+from typing import List
+
+
+def get_positive_score(scores):
+    "Extract value associated with a positive sentiment from pipeline's output"
+    return dict(map(lambda x: tuple(x.values()), scores))["POSITIVE"]
 
-if __name__ == "__main__":
-    sentiment_fn = pipeline("sentiment-analysis", "lvwerra/distilbert-imdb", device=-1)
 
-    def reward_fn(samples):
-        outputs = sentiment_fn(samples, return_all_scores=True)
-        sentiments = [output[1]["score"] for output in outputs]
+def main():
+    if torch.cuda.is_available():
+        device = int(os.environ.get("LOCAL_RANK", 0))
+    else:
+        device = -1
+
+    sentiment_fn = pipeline(
+        "sentiment-analysis",
+        "lvwerra/distilbert-imdb",
+        top_k=2,
+        truncation=True,
+        batch_size=256,
+        device=device,
+    )
+
+    def reward_fn(samples: List[str]) -> List[float]:
+        sentiments = list(map(get_positive_score, sentiment_fn(samples)))
         return sentiments
 
     # Take few words off of movies reviews as prompts
@@ -24,3 +44,7 @@ def reward_fn(samples):
         prompts=prompts,
         eval_prompts=["I don't know much about Hungarian underground"] * 64,
     )
+
+
+if __name__ == "__main__":
+    main()