-
Notifications
You must be signed in to change notification settings - Fork 1
/
create_llm_gen_default.py
35 lines (31 loc) · 1.53 KB
/
create_llm_gen_default.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import os
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from setup import Config
def create_llm_gen_default():
llm_gen = HuggingFacePipeline.from_model_id(
# https://api.python.langchain.com/en/latest/llms/langchain_community.llms.huggingface_pipeline
# .HuggingFacePipeline.html
model_id=os.path.join(Config.MODEL_SOURCE, Config.HF_LLM_NAME),
task="text-generation",
device=-1, # -1 stands for CPU
pipeline_kwargs={
# full list of parameters for this section with explanation:
# https://huggingface.co/docs/transformers/en/main_classes/text_generation
# Note: some of them (depends on the specific model) should go to the model_kwargs attribute
"max_new_tokens": 512, # How long could be generated answer
"return_full_text": False,
# "return_full_text": True if you want to return within generation answer also all prompts,
# contexts and other serving instrumentals
},
model_kwargs={
# full list of parameters for this section with explanation:
# https://huggingface.co/docs/transformers/en/main_classes/text_generation
# Note: some of them (depends on the specific model) should go to the pipeline_kwargs attribute
"do_sample": True,
"top_k": 10,
"temperature": 0.0,
"repetition_penalty": 1.03, # 1.0 means no penalty
"max_length": 20,
},
)
return llm_gen