From 95cadd4f2feb62d4dda9c0d42cef7cec2403cda7 Mon Sep 17 00:00:00 2001
From: sjagtap1803 <siddhant.jagtap@intel.com>
Date: Tue, 6 Feb 2024 09:15:05 +0530
Subject: [PATCH 1/5] added task, ignore_eos and changed output format

---
 examples/text-generation/text-generation-pipeline/pipeline.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/examples/text-generation/text-generation-pipeline/pipeline.py b/examples/text-generation/text-generation-pipeline/pipeline.py
index 0c2905a731..f1998ce8e3 100644
--- a/examples/text-generation/text-generation-pipeline/pipeline.py
+++ b/examples/text-generation/text-generation-pipeline/pipeline.py
@@ -7,11 +7,13 @@ class GaudiTextGenerationPipeline(TextGenerationPipeline):
     def __init__(self, args, logger):
         self.model, self.tokenizer, self.generation_config = initialize_model(args, logger)
 
+        self.task = "text-generation"
         self.device = args.device
 
         if args.do_sample:
             self.generation_config.temperature = args.temperature
             self.generation_config.top_p = args.top_p
+        self.generation_config.ignore_eos = False
 
         self.max_padding_length = args.max_input_tokens if args.max_input_tokens > 0 else 100
         self.use_hpu_graphs = args.use_hpu_graphs
@@ -44,4 +46,4 @@ def __call__(self, prompt: str):
         ).cpu()
 
         output_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
-        return output_text
+        return [{"generated_text": output_text}]

From a2e959c95367a502e125333bc18ee6aca056dd04 Mon Sep 17 00:00:00 2001
From: sjagtap1803 <siddhant.jagtap@intel.com>
Date: Tue, 6 Feb 2024 09:21:23 +0530
Subject: [PATCH 2/5] extract output from updated pipeline

---
 .../text-generation/text-generation-pipeline/run_pipeline.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/text-generation/text-generation-pipeline/run_pipeline.py b/examples/text-generation/text-generation-pipeline/run_pipeline.py
index 03bbaa6e91..22d313388f 100644
--- a/examples/text-generation/text-generation-pipeline/run_pipeline.py
+++ b/examples/text-generation/text-generation-pipeline/run_pipeline.py
@@ -40,7 +40,7 @@ def main():
     for input_sentence in input_sentences:
         print(f"Prompt: {input_sentence}")
         t0 = time.perf_counter()
-        output = pipe(input_sentence)
+        output = (pipe(input_sentence))[0]["generated_text"]
         duration = time.perf_counter() - t0
         throughput = args.max_new_tokens / duration
         print(f"Generated Text: {repr(output)}")

From 34445195ac55cc244b733ab0ffc9f70cb5ac2723 Mon Sep 17 00:00:00 2001
From: sjagtap1803 <siddhant.jagtap@intel.com>
Date: Tue, 6 Feb 2024 13:30:44 +0530
Subject: [PATCH 3/5] decide output format based on use_with_langchain optional
 argument

---
 .../text-generation-pipeline/pipeline.py            | 13 ++++++++++---
 .../text-generation-pipeline/run_pipeline.py        |  2 +-
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/examples/text-generation/text-generation-pipeline/pipeline.py b/examples/text-generation/text-generation-pipeline/pipeline.py
index f1998ce8e3..e2b987cd47 100644
--- a/examples/text-generation/text-generation-pipeline/pipeline.py
+++ b/examples/text-generation/text-generation-pipeline/pipeline.py
@@ -4,7 +4,7 @@
 
 
 class GaudiTextGenerationPipeline(TextGenerationPipeline):
-    def __init__(self, args, logger):
+    def __init__(self, args, logger, use_with_langchain=False):
         self.model, self.tokenizer, self.generation_config = initialize_model(args, logger)
 
         self.task = "text-generation"
@@ -13,13 +13,16 @@ def __init__(self, args, logger):
         if args.do_sample:
             self.generation_config.temperature = args.temperature
             self.generation_config.top_p = args.top_p
-        self.generation_config.ignore_eos = False
 
         self.max_padding_length = args.max_input_tokens if args.max_input_tokens > 0 else 100
         self.use_hpu_graphs = args.use_hpu_graphs
         self.profiling_steps = args.profiling_steps
         self.profiling_warmup_steps = args.profiling_warmup_steps
 
+        self.use_with_langchain = use_with_langchain
+        if self.use_with_langchain:
+            self.generation_config.ignore_eos = False
+
         import habana_frameworks.torch.hpu as torch_hpu
 
         logger.info("Graph compilation...")
@@ -46,4 +49,8 @@ def __call__(self, prompt: str):
         ).cpu()
 
         output_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
-        return [{"generated_text": output_text}]
+
+        if self.use_with_langchain:
+            return [{"generated_text": output_text}]
+        
+        return output_text
diff --git a/examples/text-generation/text-generation-pipeline/run_pipeline.py b/examples/text-generation/text-generation-pipeline/run_pipeline.py
index 22d313388f..03bbaa6e91 100644
--- a/examples/text-generation/text-generation-pipeline/run_pipeline.py
+++ b/examples/text-generation/text-generation-pipeline/run_pipeline.py
@@ -40,7 +40,7 @@ def main():
     for input_sentence in input_sentences:
         print(f"Prompt: {input_sentence}")
         t0 = time.perf_counter()
-        output = (pipe(input_sentence))[0]["generated_text"]
+        output = pipe(input_sentence)
         duration = time.perf_counter() - t0
         throughput = args.max_new_tokens / duration
         print(f"Generated Text: {repr(output)}")

From 6a0050ab13de337a9d4a855e1b4b3168be2d817e Mon Sep 17 00:00:00 2001
From: sjagtap1803 <siddhant.jagtap@intel.com>
Date: Tue, 6 Feb 2024 13:58:34 +0530
Subject: [PATCH 4/5] applied code formatting

---
 examples/text-generation/text-generation-pipeline/pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/text-generation/text-generation-pipeline/pipeline.py b/examples/text-generation/text-generation-pipeline/pipeline.py
index e2b987cd47..5ad7d38871 100644
--- a/examples/text-generation/text-generation-pipeline/pipeline.py
+++ b/examples/text-generation/text-generation-pipeline/pipeline.py
@@ -52,5 +52,5 @@ def __call__(self, prompt: str):
 
         if self.use_with_langchain:
             return [{"generated_text": output_text}]
-        
+
         return output_text

From 76945a59523495e1f48df170042942babbe15a80 Mon Sep 17 00:00:00 2001
From: sjagtap1803 <siddhant.jagtap@intel.com>
Date: Wed, 7 Feb 2024 11:44:42 +0530
Subject: [PATCH 5/5] update README

---
 .../text-generation-pipeline/README.md        | 42 +++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/examples/text-generation/text-generation-pipeline/README.md b/examples/text-generation/text-generation-pipeline/README.md
index 39aa462384..e73243dc8f 100644
--- a/examples/text-generation/text-generation-pipeline/README.md
+++ b/examples/text-generation/text-generation-pipeline/README.md
@@ -31,6 +31,11 @@ If you plan to use [DeepSpeed-inference](https://docs.habana.ai/en/latest/PyTorc
 pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0
 ```
 
+If you would like to use the pipeline with LangChain classes, you can install LangChain as follows:
+```bash
+pip install langchain==0.0.191
+```
+
 ## Usage
 
 To run generation with DeepSpeed-inference, you must launch the script as follows:
@@ -125,3 +130,40 @@ python ../../gaudi_spawn.py --use_deepspeed --world_size 8 run_pipeline.py \
 --top_p 0.95 \
 --prompt "Hello world" "How are you?" "Here is my prompt" "Once upon a time"
 ```
+
+### Usage with LangChain
+
+The text-generation pipeline can be fed as input to LangChain classes via the `use_with_langchain` constructor argument. Here is a sample snippet that shows how the pipeline class can be used with LangChain.
+```python
+from langchain.llms import HuggingFacePipeline
+from langchain.prompts import PromptTemplate
+from langchain.chains import LLMChain
+
+# Initialize the pipeline
+pipe = GaudiTextGenerationPipeline(args, logger, use_with_langchain=True)
+
+# Create LangChain object
+llm = HuggingFacePipeline(pipeline=pipe)
+
+template = """Use the following pieces of context to answer the question at the end. If you don't know the answer,\
+just say that you don't know, don't try to make up an answer.
+
+Context: Large Language Models (LLMs) are the latest models used in NLP.
+Their superior performance over smaller models has made them incredibly
+useful for developers building NLP enabled applications. These models
+can be accessed via Hugging Face's `transformers` library, via OpenAI
+using the `openai` library, and via Cohere using the `cohere` library.
+
+Question: {question}
+Answer: """
+
+prompt = PromptTemplate(input_variables=["question"], template=template)
+llm_chain = LLMChain(prompt=prompt, llm=llm)
+
+# Use LangChain object
+question = "Which libraries and model providers offer LLMs?"
+response = llm_chain(prompt.format(question=question))
+print(f"Question: {question}")
+print(f"Response: {response['text']}")
+```
+> The pipeline class has been validated for LangChain version 0.0.191 and may not work with other versions of the package.