We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ubuntu 22.04 server; GPU A10 24G; attention_sinks 0.4.0
`import torch from transformers import AutoTokenizer, TextStreamer, GenerationConfig from attention_sinks import AutoModelForCausalLM
model_id = "/home/work/projects/model/Qwen-7B" model = AutoModelForCausalLM.from_pretrained( model_id, device_map="auto", torch_dtype=torch.float16, attention_sink_size=4, attention_sink_window_size=252, trust_remote_code=True, use_flash_attn=False ) model.eval() tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) tokenizer.pad_token_id = tokenizer.eos_token_id
text = "保持身体健康有多种方式" input_ids = tokenizer.encode(text, return_tensors="pt").to(model.device)
streamer = TextStreamer(tokenizer) generation_config=GenerationConfig( use_cache=True, min_new_tokens=100_000, max_new_tokens=1_000_000, penalty_alpha=0.6, top_k=5, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id, ) generated_tokens = model.generate( input_ids, generation_config, streamer=streamer, )
output_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)`
The model generation is over normally,do not generate long text output:
But when I use the finetuned Qwen-base model to generate, after generating some text, the error is coming:
My code is as follows:
import torch from transformers import AutoTokenizer, TextStreamer, GenerationConfig from attention_sinks import AutoModelForCausalLM
model_id = "/home/work/projects/LLaMA-Factory/output/qwen_base"
model = AutoModelForCausalLM.from_pretrained( model_id, # for efficiency: device_map="auto", torch_dtype=torch.float16, attention_sink_size=16, attention_sink_window_size=1024, trust_remote_code=True, use_flash_attn=False )
model.eval() tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) tokenizer.pad_token_id = tokenizer.eos_token_id
text = "写一篇文章,主题为:5G网络架构探讨"
input_ids = tokenizer.encode(text, return_tensors="pt").to(model.device)
with torch.no_grad(): streamer = TextStreamer(tokenizer) generated_tokens = model.generate( input_ids, generation_config=GenerationConfig( use_cache=True, min_new_tokens=100_000, max_new_tokens=1_000_000, penalty_alpha=0.6, top_k=5, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id, ), streamer=streamer, ) output_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
The text was updated successfully, but these errors were encountered:
No branches or pull requests
ubuntu 22.04 server; GPU A10 24G; attention_sinks 0.4.0
`import torch
from transformers import AutoTokenizer, TextStreamer, GenerationConfig
from attention_sinks import AutoModelForCausalLM
model_id = "/home/work/projects/model/Qwen-7B"
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
torch_dtype=torch.float16,
attention_sink_size=4,
attention_sink_window_size=252,
trust_remote_code=True,
use_flash_attn=False
)
model.eval()
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.pad_token_id = tokenizer.eos_token_id
text = "保持身体健康有多种方式"
input_ids = tokenizer.encode(text, return_tensors="pt").to(model.device)
streamer = TextStreamer(tokenizer)
generation_config=GenerationConfig(
use_cache=True,
min_new_tokens=100_000,
max_new_tokens=1_000_000,
penalty_alpha=0.6,
top_k=5,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
)
generated_tokens = model.generate(
input_ids,
generation_config,
streamer=streamer,
)
output_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)`
The model generation is over normally,do not generate long text
![image](https://private-user-images.githubusercontent.com/66725845/285351630-ce4223d8-fa02-46e3-8b01-a47b3b75c99e.png?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3MzkxNzk2MTAsIm5iZiI6MTczOTE3OTMxMCwicGF0aCI6Ii82NjcyNTg0NS8yODUzNTE2MzAtY2U0MjIzZDgtZmEwMi00NmUzLThiMDEtYTQ3YjNiNzVjOTllLnBuZz9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAyMTAlMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMjEwVDA5MjE1MFomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTYwZGZhZDNmYjE0NDljYmJhZGJhNmMxZTYwMWNhOTQ4NDE4NmE5ZGE5NDNiOTAxMmQ5YWFjY2Q2MmYxZTc2MjImWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.arYYp8qH5I7AiF3UxkbkSH0m3EyEurJ_8YEXiwZcgFY)
output:
But when I use the finetuned Qwen-base model to generate, after generating some text, the error is coming:
![image](https://private-user-images.githubusercontent.com/66725845/285351564-622156a1-9885-48b5-b749-c73375933277.png?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3MzkxNzk2MTAsIm5iZiI6MTczOTE3OTMxMCwicGF0aCI6Ii82NjcyNTg0NS8yODUzNTE1NjQtNjIyMTU2YTEtOTg4NS00OGI1LWI3NDktYzczMzc1OTMzMjc3LnBuZz9YLUFtei1BbGdvcml0aG09QVdTNC1ITUFDLVNIQTI1NiZYLUFtei1DcmVkZW50aWFsPUFLSUFWQ09EWUxTQTUzUFFLNFpBJTJGMjAyNTAyMTAlMkZ1cy1lYXN0LTElMkZzMyUyRmF3czRfcmVxdWVzdCZYLUFtei1EYXRlPTIwMjUwMjEwVDA5MjE1MFomWC1BbXotRXhwaXJlcz0zMDAmWC1BbXotU2lnbmF0dXJlPTIzODU1NDEyYzgwODU5N2I3ZWMzNjIwNGM2MGFiNjQwMTZlNWZjNmY4YTQ2MTAzYTBjOTAxZWUxNGEwN2U2OWMmWC1BbXotU2lnbmVkSGVhZGVycz1ob3N0In0.lYA9T64aXTHIK552_6TCvn_RO1eHQoQa0o8T98lShsA)
My code is as follows:
import torch
from transformers import AutoTokenizer, TextStreamer, GenerationConfig
from attention_sinks import AutoModelForCausalLM
model_id = "/home/work/projects/LLaMA-Factory/output/qwen_base"
model = AutoModelForCausalLM.from_pretrained(
model_id,
# for efficiency:
device_map="auto",
torch_dtype=torch.float16,
attention_sink_size=16,
attention_sink_window_size=1024,
trust_remote_code=True,
use_flash_attn=False
)
model.eval()
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.pad_token_id = tokenizer.eos_token_id
text = "写一篇文章,主题为:5G网络架构探讨"
input_ids = tokenizer.encode(text, return_tensors="pt").to(model.device)
with torch.no_grad():
streamer = TextStreamer(tokenizer)
generated_tokens = model.generate(
input_ids,
generation_config=GenerationConfig(
use_cache=True,
min_new_tokens=100_000,
max_new_tokens=1_000_000,
penalty_alpha=0.6,
top_k=5,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
),
streamer=streamer,
)
output_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
The text was updated successfully, but these errors were encountered: