From c2cc68edfb2f09c86fd387d2b4e6dcb231c725b0 Mon Sep 17 00:00:00 2001 From: Tyler Date: Mon, 10 Jul 2023 17:04:11 -0700 Subject: [PATCH 1/2] added individual retrying for failed batches --- src/autolabel/models/anthropic.py | 15 +++++++++++---- src/autolabel/models/cohere.py | 12 ++++++++++-- src/autolabel/models/hf_pipeline.py | 12 ++++++++++-- src/autolabel/models/openai.py | 12 ++++++++++-- src/autolabel/models/palm.py | 18 +++++++++++++++--- 5 files changed, 56 insertions(+), 13 deletions(-) diff --git a/src/autolabel/models/anthropic.py b/src/autolabel/models/anthropic.py index 00958cb3..35b61272 100644 --- a/src/autolabel/models/anthropic.py +++ b/src/autolabel/models/anthropic.py @@ -40,11 +40,18 @@ def __init__(self, config: AutolabelConfig, cache: BaseCache = None) -> None: def _label(self, prompts: List[str]) -> LLMResult: prompts = [[HumanMessage(content=prompt)] for prompt in prompts] try: - response = self.llm.generate(prompts) - return response + return self.llm.generate(prompts) except Exception as e: - print(f"Error generating from LLM: {e}, returning empty result") - generations = [[Generation(text="")] for _ in prompts] + print(f"Error generating from LLM: {e}, retrying each prompt individually") + generations = [] + for i, prompt in enumerate(prompts): + try: + response = self.llm.generate([prompt]) + generations.append(response.generations[0]) + except Exception as e: + print(f"Error generating from LLM: {e}, returning empty generation") + generations.append([Generation(text="")]) + return LLMResult(generations=generations) def get_cost(self, prompt: str, label: Optional[str] = "") -> float: diff --git a/src/autolabel/models/cohere.py b/src/autolabel/models/cohere.py index be550dd4..03433a60 100644 --- a/src/autolabel/models/cohere.py +++ b/src/autolabel/models/cohere.py @@ -41,8 +41,16 @@ def _label(self, prompts: List[str]) -> LLMResult: try: return self.llm.generate(prompts) except Exception as e: - print(f"Error generating from LLM: {e}, returning empty result") - generations = [[Generation(text="")] for _ in prompts] + print(f"Error generating from LLM: {e}, retrying each prompt individually") + generations = [] + for i, prompt in enumerate(prompts): + try: + response = self.llm.generate([prompt]) + generations.append(response.generations[0]) + except Exception as e: + print(f"Error generating from LLM: {e}, returning empty generation") + generations.append([Generation(text="")]) + return LLMResult(generations=generations) def get_cost(self, prompt: str, label: Optional[str] = "") -> float: diff --git a/src/autolabel/models/hf_pipeline.py b/src/autolabel/models/hf_pipeline.py index a3e6e2ba..56600e92 100644 --- a/src/autolabel/models/hf_pipeline.py +++ b/src/autolabel/models/hf_pipeline.py @@ -69,8 +69,16 @@ def _label(self, prompts: List[str]) -> LLMResult: try: return self.llm.generate(prompts) except Exception as e: - print(f"Error generating from LLM: {e}, returning empty result") - generations = [[Generation(text="")] for _ in prompts] + print(f"Error generating from LLM: {e}, retrying each prompt individually") + generations = [] + for i, prompt in enumerate(prompts): + try: + response = self.llm.generate([prompt]) + generations.append(response.generations[0]) + except Exception as e: + print(f"Error generating from LLM: {e}, returning empty generation") + generations.append([Generation(text="")]) + return LLMResult(generations=generations) def get_cost(self, prompt: str, label: Optional[str] = "") -> float: diff --git a/src/autolabel/models/openai.py b/src/autolabel/models/openai.py index d4b6607c..6dc39513 100644 --- a/src/autolabel/models/openai.py +++ b/src/autolabel/models/openai.py @@ -137,8 +137,16 @@ def _label(self, prompts: List[str]) -> LLMResult: try: return self.llm.generate(prompts) except Exception as e: - print(f"Error generating from LLM: {e}, returning empty result") - generations = [[Generation(text="")] for _ in prompts] + print(f"Error generating from LLM: {e}, retrying each prompt individually") + generations = [] + for i, prompt in enumerate(prompts): + try: + response = self.llm.generate([prompt]) + generations.append(response.generations[0]) + except Exception as e: + print(f"Error generating from LLM: {e}, returning empty generation") + generations.append([Generation(text="")]) + return LLMResult(generations=generations) def get_cost(self, prompt: str, label: Optional[str] = "") -> float: diff --git a/src/autolabel/models/palm.py b/src/autolabel/models/palm.py index 7fe2a5f5..31baf81d 100644 --- a/src/autolabel/models/palm.py +++ b/src/autolabel/models/palm.py @@ -94,9 +94,21 @@ def _label(self, prompts: List[str]) -> LLMResult: ) return result except Exception as e: - logger.error(f"Error generating from LLM: {e}.") - generations = [[Generation(text="")] for _ in prompts] - return LLMResult(generations=generations) + print(f"Error generating from LLM: {e}, retrying each prompt individually") + generations = [] + for i, prompt in enumerate(prompts): + try: + response = self._label_with_retry([prompt]) + for generation in response.generations[0]: + generation.text = generation.text.replace( + self.SEP_REPLACEMENT_TOKEN, "\n" + ) + generations.append(response.generations[0]) + except Exception as e: + print(f"Error generating from LLM: {e}, returning empty generation") + generations.append([Generation(text="")]) + + return LLMResult(generations=generations) def get_cost(self, prompt: str, label: Optional[str] = "") -> float: if self.model_name is None: From 301f58e2c10faac5a79ba9779fe96da9fcf44b8f Mon Sep 17 00:00:00 2001 From: Tyler Date: Tue, 11 Jul 2023 10:10:42 -0700 Subject: [PATCH 2/2] consolidated _label_individually to base llm --- src/autolabel/models/anthropic.py | 13 ++-------- src/autolabel/models/base.py | 22 +++++++++++++++- src/autolabel/models/cohere.py | 11 +------- src/autolabel/models/hf_pipeline.py | 13 ++-------- src/autolabel/models/openai.py | 13 ++-------- src/autolabel/models/palm.py | 39 ++++++++++++++++++----------- 6 files changed, 53 insertions(+), 58 deletions(-) diff --git a/src/autolabel/models/anthropic.py b/src/autolabel/models/anthropic.py index 35b61272..3177f97d 100644 --- a/src/autolabel/models/anthropic.py +++ b/src/autolabel/models/anthropic.py @@ -5,7 +5,7 @@ from autolabel.models import BaseModel from autolabel.cache import BaseCache from langchain.chat_models import ChatAnthropic -from langchain.schema import Generation, LLMResult, HumanMessage +from langchain.schema import LLMResult, HumanMessage class AnthropicLLM(BaseModel): @@ -43,16 +43,7 @@ def _label(self, prompts: List[str]) -> LLMResult: return self.llm.generate(prompts) except Exception as e: print(f"Error generating from LLM: {e}, retrying each prompt individually") - generations = [] - for i, prompt in enumerate(prompts): - try: - response = self.llm.generate([prompt]) - generations.append(response.generations[0]) - except Exception as e: - print(f"Error generating from LLM: {e}, returning empty generation") - generations.append([Generation(text="")]) - - return LLMResult(generations=generations) + return self._label_individually(prompts) def get_cost(self, prompt: str, label: Optional[str] = "") -> float: num_prompt_toks = tokenizer.count_tokens(prompt) diff --git a/src/autolabel/models/base.py b/src/autolabel/models/base.py index c0ee8742..4eaca991 100644 --- a/src/autolabel/models/base.py +++ b/src/autolabel/models/base.py @@ -3,7 +3,7 @@ from abc import ABC, abstractmethod from typing import List, Optional, Dict, Tuple -from langchain.schema import LLMResult +from langchain.schema import LLMResult, Generation from autolabel.configs import AutolabelConfig from autolabel.schema import CacheEntry @@ -52,6 +52,26 @@ def label(self, prompts: List[str]) -> Tuple[LLMResult, float]: generations = [existing_prompts[i] for i in range(len(prompts))] return LLMResult(generations=generations, llm_output=llm_output), cost + def _label_individually(self, prompts: List[str]) -> LLMResult: + """Label each prompt individually. Should be used only after trying as a batch first. + + Args: + prompts (List[str]): List of prompts to label + + Returns: + LLMResult: LLMResult object with generations + """ + generations = [] + for prompt in prompts: + try: + response = self.llm.generate([prompt]) + generations.append(response.generations[0]) + except Exception as e: + print(f"Error generating from LLM: {e}, returning empty generation") + generations.append([Generation(text="")]) + + return LLMResult(generations=generations) + @abstractmethod def _label(self, prompts: List[str]) -> LLMResult: # TODO: change return type to do parsing in the Model class diff --git a/src/autolabel/models/cohere.py b/src/autolabel/models/cohere.py index 03433a60..cf9eb197 100644 --- a/src/autolabel/models/cohere.py +++ b/src/autolabel/models/cohere.py @@ -42,16 +42,7 @@ def _label(self, prompts: List[str]) -> LLMResult: return self.llm.generate(prompts) except Exception as e: print(f"Error generating from LLM: {e}, retrying each prompt individually") - generations = [] - for i, prompt in enumerate(prompts): - try: - response = self.llm.generate([prompt]) - generations.append(response.generations[0]) - except Exception as e: - print(f"Error generating from LLM: {e}, returning empty generation") - generations.append([Generation(text="")]) - - return LLMResult(generations=generations) + return self._label_individually(prompts) def get_cost(self, prompt: str, label: Optional[str] = "") -> float: num_prompt_toks = len(self.co.tokenize(prompt).tokens) diff --git a/src/autolabel/models/hf_pipeline.py b/src/autolabel/models/hf_pipeline.py index 56600e92..bf7de21d 100644 --- a/src/autolabel/models/hf_pipeline.py +++ b/src/autolabel/models/hf_pipeline.py @@ -1,6 +1,6 @@ from typing import List, Optional from langchain.llms import HuggingFacePipeline -from langchain.schema import LLMResult, Generation +from langchain.schema import LLMResult from autolabel.models import BaseModel from autolabel.configs import AutolabelConfig @@ -70,16 +70,7 @@ def _label(self, prompts: List[str]) -> LLMResult: return self.llm.generate(prompts) except Exception as e: print(f"Error generating from LLM: {e}, retrying each prompt individually") - generations = [] - for i, prompt in enumerate(prompts): - try: - response = self.llm.generate([prompt]) - generations.append(response.generations[0]) - except Exception as e: - print(f"Error generating from LLM: {e}, returning empty generation") - generations.append([Generation(text="")]) - - return LLMResult(generations=generations) + return self._label_individually(prompts) def get_cost(self, prompt: str, label: Optional[str] = "") -> float: # Model inference for this model is being run locally diff --git a/src/autolabel/models/openai.py b/src/autolabel/models/openai.py index 6dc39513..9912d042 100644 --- a/src/autolabel/models/openai.py +++ b/src/autolabel/models/openai.py @@ -4,7 +4,7 @@ from langchain.chat_models import ChatOpenAI from langchain.llms import OpenAI -from langchain.schema import LLMResult, HumanMessage, Generation +from langchain.schema import LLMResult, HumanMessage import tiktoken from autolabel.models import BaseModel @@ -138,16 +138,7 @@ def _label(self, prompts: List[str]) -> LLMResult: return self.llm.generate(prompts) except Exception as e: print(f"Error generating from LLM: {e}, retrying each prompt individually") - generations = [] - for i, prompt in enumerate(prompts): - try: - response = self.llm.generate([prompt]) - generations.append(response.generations[0]) - except Exception as e: - print(f"Error generating from LLM: {e}, returning empty generation") - generations.append([Generation(text="")]) - - return LLMResult(generations=generations) + return self._label_individually(prompts) def get_cost(self, prompt: str, label: Optional[str] = "") -> float: encoding = tiktoken.encoding_for_model(self.model_name) diff --git a/src/autolabel/models/palm.py b/src/autolabel/models/palm.py index 31baf81d..252a2d6c 100644 --- a/src/autolabel/models/palm.py +++ b/src/autolabel/models/palm.py @@ -66,6 +66,30 @@ def __init__(self, config: AutolabelConfig, cache: BaseCache = None) -> None: def _label_with_retry(self, prompts: List[str]) -> LLMResult: return self.llm.generate(prompts) + def _label_individually(self, prompts: List[str]) -> LLMResult: + """Label each prompt individually. Should be used only after trying as a batch first. + + Args: + prompts (List[str]): List of prompts to label + + Returns: + LLMResult: LLMResult object with generations + """ + generations = [] + for i, prompt in enumerate(prompts): + try: + response = self._label_with_retry([prompt]) + for generation in response.generations[0]: + generation.text = generation.text.replace( + self.SEP_REPLACEMENT_TOKEN, "\n" + ) + generations.append(response.generations[0]) + except Exception as e: + print(f"Error generating from LLM: {e}, returning empty generation") + generations.append([Generation(text="")]) + + return LLMResult(generations=generations) + def _label(self, prompts: List[str]) -> LLMResult: for prompt in prompts: if self.SEP_REPLACEMENT_TOKEN in prompt: @@ -95,20 +119,7 @@ def _label(self, prompts: List[str]) -> LLMResult: return result except Exception as e: print(f"Error generating from LLM: {e}, retrying each prompt individually") - generations = [] - for i, prompt in enumerate(prompts): - try: - response = self._label_with_retry([prompt]) - for generation in response.generations[0]: - generation.text = generation.text.replace( - self.SEP_REPLACEMENT_TOKEN, "\n" - ) - generations.append(response.generations[0]) - except Exception as e: - print(f"Error generating from LLM: {e}, returning empty generation") - generations.append([Generation(text="")]) - - return LLMResult(generations=generations) + self._label_individually(prompts) def get_cost(self, prompt: str, label: Optional[str] = "") -> float: if self.model_name is None: