Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions examples/text-generation/text-generation-pipeline/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ If you plan to use [DeepSpeed-inference](https://docs.habana.ai/en/latest/PyTorc
pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0
```

If you would like to use the pipeline with LangChain classes, you can install LangChain as follows:
```bash
pip install langchain==0.0.191
```

## Usage

To run generation with DeepSpeed-inference, you must launch the script as follows:
Expand Down Expand Up @@ -125,3 +130,40 @@ python ../../gaudi_spawn.py --use_deepspeed --world_size 8 run_pipeline.py \
--top_p 0.95 \
--prompt "Hello world" "How are you?" "Here is my prompt" "Once upon a time"
```

### Usage with LangChain

The text-generation pipeline can be fed as input to LangChain classes via the `use_with_langchain` constructor argument. Here is a sample snippet that shows how the pipeline class can be used with LangChain.
```python
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

# Initialize the pipeline
pipe = GaudiTextGenerationPipeline(args, logger, use_with_langchain=True)

# Create LangChain object
llm = HuggingFacePipeline(pipeline=pipe)

template = """Use the following pieces of context to answer the question at the end. If you don't know the answer,\
just say that you don't know, don't try to make up an answer.

Context: Large Language Models (LLMs) are the latest models used in NLP.
Their superior performance over smaller models has made them incredibly
useful for developers building NLP enabled applications. These models
can be accessed via Hugging Face's `transformers` library, via OpenAI
using the `openai` library, and via Cohere using the `cohere` library.

Question: {question}
Answer: """

prompt = PromptTemplate(input_variables=["question"], template=template)
llm_chain = LLMChain(prompt=prompt, llm=llm)

# Use LangChain object
question = "Which libraries and model providers offer LLMs?"
response = llm_chain(prompt.format(question=question))
print(f"Question: {question}")
print(f"Response: {response['text']}")
```
> The pipeline class has been validated for LangChain version 0.0.191 and may not work with other versions of the package.
11 changes: 10 additions & 1 deletion examples/text-generation/text-generation-pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@


class GaudiTextGenerationPipeline(TextGenerationPipeline):
def __init__(self, args, logger):
def __init__(self, args, logger, use_with_langchain=False):
self.model, self.tokenizer, self.generation_config = initialize_model(args, logger)

self.task = "text-generation"
self.device = args.device

if args.do_sample:
Expand All @@ -18,6 +19,10 @@ def __init__(self, args, logger):
self.profiling_steps = args.profiling_steps
self.profiling_warmup_steps = args.profiling_warmup_steps

self.use_with_langchain = use_with_langchain
if self.use_with_langchain:
self.generation_config.ignore_eos = False

import habana_frameworks.torch.hpu as torch_hpu

logger.info("Graph compilation...")
Expand All @@ -44,4 +49,8 @@ def __call__(self, prompt: str):
).cpu()

output_text = self.tokenizer.decode(output[0], skip_special_tokens=True)

if self.use_with_langchain:
return [{"generated_text": output_text}]

return output_text