-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
--------- Co-authored-by: ffloresy <[email protected]> Co-authored-by: Eliot Li <[email protected]> Co-authored-by: vbayanag <[email protected]> Co-authored-by: Jeffrey Novotny <[email protected]> Co-authored-by: Danny213123 <[email protected]>
- Loading branch information
1 parent
a1aa665
commit c9a603f
Showing
9 changed files
with
634 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
import ctranslate2 | ||
import transformers | ||
|
||
generator = ctranslate2.Generator("gpt2_ct2", device="cuda") | ||
tokenizer = transformers.AutoTokenizer.from_pretrained("gpt2") | ||
|
||
# Unconditional generation. | ||
start_tokens = [tokenizer.bos_token] | ||
results = generator.generate_batch([start_tokens], max_length=30, sampling_topk=10) | ||
print(tokenizer.decode(results[0].sequences_ids[0])) | ||
|
||
# Conditional generation. | ||
start_tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode("It is")) | ||
results = generator.generate_batch([start_tokens], max_length=30, sampling_topk=10) | ||
print(tokenizer.decode(results[0].sequences_ids[0])) |
Binary file not shown.
35 changes: 35 additions & 0 deletions
35
blogs/artificial-intelligence/ctranslate2/src/speech_recognition.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import ctranslate2 | ||
import librosa | ||
import transformers | ||
|
||
# Load and resample the audio file. | ||
audio, _ = librosa.load("src/sample2.flac", sr=16000, mono=True) | ||
|
||
# Compute the features of the first 30 seconds of audio. | ||
processor = transformers.WhisperProcessor.from_pretrained("openai/whisper-tiny") | ||
inputs = processor(audio, return_tensors="np", sampling_rate=16000) | ||
features = ctranslate2.StorageView.from_array(inputs.input_features) | ||
|
||
# Load the model on GPU. | ||
model = ctranslate2.models.Whisper("whisper-tiny-ct2", device="cuda") | ||
|
||
# Detect the language. | ||
results = model.detect_language(features) | ||
language, probability = results[0][0] | ||
print("Detected language %s with probability %f" % (language, probability)) | ||
|
||
# Describe the task in the prompt. | ||
# See the prompt format in https://github.com/openai/whisper. | ||
prompt = processor.tokenizer.convert_tokens_to_ids( | ||
[ | ||
"<|startoftranscript|>", | ||
language, | ||
"<|transcribe|>", | ||
"<|notimestamps|>", # Remove this token to generate timestamps. | ||
] | ||
) | ||
|
||
# Run generation for the 30-second window. | ||
results = model.generate(features, [prompt]) | ||
transcription = processor.decode(results[0].sequences_ids[0]) | ||
print(transcription) |
15 changes: 15 additions & 0 deletions
15
blogs/artificial-intelligence/ctranslate2/src/translate.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
import ctranslate2 | ||
import sentencepiece as spm | ||
|
||
translator = ctranslate2.Translator("ende_ctranslate2/", device="cuda") | ||
sp = spm.SentencePieceProcessor("sentencepiece.model") | ||
|
||
input_text = "Good Morning!" | ||
input_tokens = sp.encode(input_text, out_type=str) | ||
|
||
results = translator.translate_batch([input_tokens]) | ||
|
||
output_tokens = results[0].hypotheses[0] | ||
output_text = sp.decode(output_tokens) | ||
|
||
print(output_text) |
54 changes: 54 additions & 0 deletions
54
blogs/artificial-intelligence/ctranslate2/src/translate_compare.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import ctranslate2 | ||
import sentencepiece as spm | ||
import time | ||
|
||
# Load the SentencePiece model | ||
sp = spm.SentencePieceProcessor(model_file="sentencepiece.model") | ||
|
||
# Input text to translate | ||
input_text = "Hello world!" | ||
input_tokens = sp.encode(input_text, out_type=str) | ||
|
||
# Function to perform translation and measure latency and tokens per second | ||
def translate_and_time(translator): | ||
start_time = time.time() | ||
results = translator.translate_batch([input_tokens]) | ||
end_time = time.time() | ||
latency = end_time - start_time | ||
|
||
# Decode the translated tokens | ||
output_tokens = results[0].hypotheses[0] | ||
output_text = sp.decode(output_tokens) | ||
|
||
# Calculate tokens per second | ||
num_output_tokens = len(output_tokens) | ||
tokens_per_second = num_output_tokens / latency | ||
|
||
return output_text, latency, tokens_per_second | ||
|
||
# Load the default (float32) model | ||
translator_float32 = ctranslate2.Translator( | ||
"ende_ctranslate2/", device="cuda", compute_type="float32" | ||
) | ||
output_text_float32, latency_float32, tps_float32 = translate_and_time(translator_float32) | ||
|
||
# Load the int8 quantized model | ||
translator_int8 = ctranslate2.Translator( | ||
"ende_ctranslate2_int8/", device="cuda", compute_type="int8" | ||
) | ||
output_text_int8, latency_int8, tps_int8 = translate_and_time(translator_int8) | ||
|
||
# Print the results | ||
print("Default (float32) model translation:") | ||
print(f"Output: {output_text_float32}") | ||
print(f"Latency: {latency_float32:.4f} seconds") | ||
print(f"Tokens per second: {tps_float32:.2f}\n") | ||
|
||
print("Int8 quantized model translation:") | ||
print(f"Output: {output_text_int8}") | ||
print(f"Latency: {latency_int8:.4f} seconds") | ||
print(f"Tokens per second: {tps_int8:.2f}\n") | ||
|
||
# Calculate the speedup in tokens per second | ||
speedup_tps = tps_int8 / tps_float32 | ||
print(f"Speedup in tokens per second with int8 quantization: {speedup_tps:.2f}x faster") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
<head> | ||
<meta charset="UTF-8"> | ||
<meta name="description" content="Michael Zhang"> | ||
<meta name="keywords" content="AMD GPU, MI300, MI250, ROCm, blog, contributor, blog author"> | ||
</head> | ||
|
||
(mzhang)= | ||
|
||
# Michael Zhang | ||
|
||
Michael is a Machine Learning Engineer at AMD. Michael specializes in generative AI, large language models (LLMs), computer vision, autonomous driving, and robotics. He has published 10+ papers in AI top conference and journals and has a Google Scholar citation count of over 650 as of October 2024. He holds a master's degree in Computer Engineering from the University of Illinois at Urbana-Champaign (UIUC). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters