From 95cadd4f2feb62d4dda9c0d42cef7cec2403cda7 Mon Sep 17 00:00:00 2001 From: sjagtap1803 Date: Tue, 6 Feb 2024 09:15:05 +0530 Subject: [PATCH 1/5] added task, ignore_eos and changed output format --- examples/text-generation/text-generation-pipeline/pipeline.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/text-generation/text-generation-pipeline/pipeline.py b/examples/text-generation/text-generation-pipeline/pipeline.py index 0c2905a731..f1998ce8e3 100644 --- a/examples/text-generation/text-generation-pipeline/pipeline.py +++ b/examples/text-generation/text-generation-pipeline/pipeline.py @@ -7,11 +7,13 @@ class GaudiTextGenerationPipeline(TextGenerationPipeline): def __init__(self, args, logger): self.model, self.tokenizer, self.generation_config = initialize_model(args, logger) + self.task = "text-generation" self.device = args.device if args.do_sample: self.generation_config.temperature = args.temperature self.generation_config.top_p = args.top_p + self.generation_config.ignore_eos = False self.max_padding_length = args.max_input_tokens if args.max_input_tokens > 0 else 100 self.use_hpu_graphs = args.use_hpu_graphs @@ -44,4 +46,4 @@ def __call__(self, prompt: str): ).cpu() output_text = self.tokenizer.decode(output[0], skip_special_tokens=True) - return output_text + return [{"generated_text": output_text}] From a2e959c95367a502e125333bc18ee6aca056dd04 Mon Sep 17 00:00:00 2001 From: sjagtap1803 Date: Tue, 6 Feb 2024 09:21:23 +0530 Subject: [PATCH 2/5] extract output from updated pipeline --- .../text-generation/text-generation-pipeline/run_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/text-generation/text-generation-pipeline/run_pipeline.py b/examples/text-generation/text-generation-pipeline/run_pipeline.py index 03bbaa6e91..22d313388f 100644 --- a/examples/text-generation/text-generation-pipeline/run_pipeline.py +++ b/examples/text-generation/text-generation-pipeline/run_pipeline.py @@ -40,7 +40,7 @@ def main(): for input_sentence in input_sentences: print(f"Prompt: {input_sentence}") t0 = time.perf_counter() - output = pipe(input_sentence) + output = (pipe(input_sentence))[0]["generated_text"] duration = time.perf_counter() - t0 throughput = args.max_new_tokens / duration print(f"Generated Text: {repr(output)}") From 34445195ac55cc244b733ab0ffc9f70cb5ac2723 Mon Sep 17 00:00:00 2001 From: sjagtap1803 Date: Tue, 6 Feb 2024 13:30:44 +0530 Subject: [PATCH 3/5] decide output format based on use_with_langchain optional argument --- .../text-generation-pipeline/pipeline.py | 13 ++++++++++--- .../text-generation-pipeline/run_pipeline.py | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/examples/text-generation/text-generation-pipeline/pipeline.py b/examples/text-generation/text-generation-pipeline/pipeline.py index f1998ce8e3..e2b987cd47 100644 --- a/examples/text-generation/text-generation-pipeline/pipeline.py +++ b/examples/text-generation/text-generation-pipeline/pipeline.py @@ -4,7 +4,7 @@ class GaudiTextGenerationPipeline(TextGenerationPipeline): - def __init__(self, args, logger): + def __init__(self, args, logger, use_with_langchain=False): self.model, self.tokenizer, self.generation_config = initialize_model(args, logger) self.task = "text-generation" @@ -13,13 +13,16 @@ def __init__(self, args, logger): if args.do_sample: self.generation_config.temperature = args.temperature self.generation_config.top_p = args.top_p - self.generation_config.ignore_eos = False self.max_padding_length = args.max_input_tokens if args.max_input_tokens > 0 else 100 self.use_hpu_graphs = args.use_hpu_graphs self.profiling_steps = args.profiling_steps self.profiling_warmup_steps = args.profiling_warmup_steps + self.use_with_langchain = use_with_langchain + if self.use_with_langchain: + self.generation_config.ignore_eos = False + import habana_frameworks.torch.hpu as torch_hpu logger.info("Graph compilation...") @@ -46,4 +49,8 @@ def __call__(self, prompt: str): ).cpu() output_text = self.tokenizer.decode(output[0], skip_special_tokens=True) - return [{"generated_text": output_text}] + + if self.use_with_langchain: + return [{"generated_text": output_text}] + + return output_text diff --git a/examples/text-generation/text-generation-pipeline/run_pipeline.py b/examples/text-generation/text-generation-pipeline/run_pipeline.py index 22d313388f..03bbaa6e91 100644 --- a/examples/text-generation/text-generation-pipeline/run_pipeline.py +++ b/examples/text-generation/text-generation-pipeline/run_pipeline.py @@ -40,7 +40,7 @@ def main(): for input_sentence in input_sentences: print(f"Prompt: {input_sentence}") t0 = time.perf_counter() - output = (pipe(input_sentence))[0]["generated_text"] + output = pipe(input_sentence) duration = time.perf_counter() - t0 throughput = args.max_new_tokens / duration print(f"Generated Text: {repr(output)}") From 6a0050ab13de337a9d4a855e1b4b3168be2d817e Mon Sep 17 00:00:00 2001 From: sjagtap1803 Date: Tue, 6 Feb 2024 13:58:34 +0530 Subject: [PATCH 4/5] applied code formatting --- examples/text-generation/text-generation-pipeline/pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/text-generation/text-generation-pipeline/pipeline.py b/examples/text-generation/text-generation-pipeline/pipeline.py index e2b987cd47..5ad7d38871 100644 --- a/examples/text-generation/text-generation-pipeline/pipeline.py +++ b/examples/text-generation/text-generation-pipeline/pipeline.py @@ -52,5 +52,5 @@ def __call__(self, prompt: str): if self.use_with_langchain: return [{"generated_text": output_text}] - + return output_text From 76945a59523495e1f48df170042942babbe15a80 Mon Sep 17 00:00:00 2001 From: sjagtap1803 Date: Wed, 7 Feb 2024 11:44:42 +0530 Subject: [PATCH 5/5] update README --- .../text-generation-pipeline/README.md | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/examples/text-generation/text-generation-pipeline/README.md b/examples/text-generation/text-generation-pipeline/README.md index 39aa462384..e73243dc8f 100644 --- a/examples/text-generation/text-generation-pipeline/README.md +++ b/examples/text-generation/text-generation-pipeline/README.md @@ -31,6 +31,11 @@ If you plan to use [DeepSpeed-inference](https://docs.habana.ai/en/latest/PyTorc pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0 ``` +If you would like to use the pipeline with LangChain classes, you can install LangChain as follows: +```bash +pip install langchain==0.0.191 +``` + ## Usage To run generation with DeepSpeed-inference, you must launch the script as follows: @@ -125,3 +130,40 @@ python ../../gaudi_spawn.py --use_deepspeed --world_size 8 run_pipeline.py \ --top_p 0.95 \ --prompt "Hello world" "How are you?" "Here is my prompt" "Once upon a time" ``` + +### Usage with LangChain + +The text-generation pipeline can be fed as input to LangChain classes via the `use_with_langchain` constructor argument. Here is a sample snippet that shows how the pipeline class can be used with LangChain. +```python +from langchain.llms import HuggingFacePipeline +from langchain.prompts import PromptTemplate +from langchain.chains import LLMChain + +# Initialize the pipeline +pipe = GaudiTextGenerationPipeline(args, logger, use_with_langchain=True) + +# Create LangChain object +llm = HuggingFacePipeline(pipeline=pipe) + +template = """Use the following pieces of context to answer the question at the end. If you don't know the answer,\ +just say that you don't know, don't try to make up an answer. + +Context: Large Language Models (LLMs) are the latest models used in NLP. +Their superior performance over smaller models has made them incredibly +useful for developers building NLP enabled applications. These models +can be accessed via Hugging Face's `transformers` library, via OpenAI +using the `openai` library, and via Cohere using the `cohere` library. + +Question: {question} +Answer: """ + +prompt = PromptTemplate(input_variables=["question"], template=template) +llm_chain = LLMChain(prompt=prompt, llm=llm) + +# Use LangChain object +question = "Which libraries and model providers offer LLMs?" +response = llm_chain(prompt.format(question=question)) +print(f"Question: {question}") +print(f"Response: {response['text']}") +``` +> The pipeline class has been validated for LangChain version 0.0.191 and may not work with other versions of the package.