huggingface · regisss · Feb 7, 2024 · Feb 6, 2024 · Feb 6, 2024 · Feb 6, 2024
@@ -31,6 +31,11 @@ If you plan to use [DeepSpeed-inference](https://docs.habana.ai/en/latest/PyTorc
 pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0
 ```
 
+If you would like to use the pipeline with LangChain classes, you can install LangChain as follows:
+```bash
+pip install langchain==0.0.191
+```
+
 ## Usage
 
 To run generation with DeepSpeed-inference, you must launch the script as follows:
@@ -125,3 +130,40 @@ python ../../gaudi_spawn.py --use_deepspeed --world_size 8 run_pipeline.py \
 --top_p 0.95 \
 --prompt "Hello world" "How are you?" "Here is my prompt" "Once upon a time"
 ```
+
+### Usage with LangChain
+
+The text-generation pipeline can be fed as input to LangChain classes via the `use_with_langchain` constructor argument. Here is a sample snippet that shows how the pipeline class can be used with LangChain.
+```python
+from langchain.llms import HuggingFacePipeline
+from langchain.prompts import PromptTemplate
+from langchain.chains import LLMChain
+
+# Initialize the pipeline
+pipe = GaudiTextGenerationPipeline(args, logger, use_with_langchain=True)
+
+# Create LangChain object
+llm = HuggingFacePipeline(pipeline=pipe)
+
+template = """Use the following pieces of context to answer the question at the end. If you don't know the answer,\
+just say that you don't know, don't try to make up an answer.
+
+Context: Large Language Models (LLMs) are the latest models used in NLP.
+Their superior performance over smaller models has made them incredibly
+useful for developers building NLP enabled applications. These models
+can be accessed via Hugging Face's `transformers` library, via OpenAI
+using the `openai` library, and via Cohere using the `cohere` library.
+
+Question: {question}
+Answer: """
+
+prompt = PromptTemplate(input_variables=["question"], template=template)
+llm_chain = LLMChain(prompt=prompt, llm=llm)
+
+# Use LangChain object
+question = "Which libraries and model providers offer LLMs?"
+response = llm_chain(prompt.format(question=question))
+print(f"Question: {question}")
+print(f"Response: {response['text']}")
+```
+> The pipeline class has been validated for LangChain version 0.0.191 and may not work with other versions of the package.
@@ -4,9 +4,10 @@
 
 
 class GaudiTextGenerationPipeline(TextGenerationPipeline):
-    def __init__(self, args, logger):
+    def __init__(self, args, logger, use_with_langchain=False):
         self.model, self.tokenizer, self.generation_config = initialize_model(args, logger)
 
+        self.task = "text-generation"
         self.device = args.device
 
         if args.do_sample:
@@ -18,6 +19,10 @@ def __init__(self, args, logger):
         self.profiling_steps = args.profiling_steps
         self.profiling_warmup_steps = args.profiling_warmup_steps
 
+        self.use_with_langchain = use_with_langchain
+        if self.use_with_langchain:
+            self.generation_config.ignore_eos = False
+
         import habana_frameworks.torch.hpu as torch_hpu
 
         logger.info("Graph compilation...")
@@ -44,4 +49,8 @@ def __call__(self, prompt: str):
         ).cpu()
 
         output_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
+
+        if self.use_with_langchain:
+            return [{"generated_text": output_text}]
+
         return output_text