Skip to content

Commit

Permalink
CTranslate2 Blog
Browse files Browse the repository at this point in the history
---------

Co-authored-by: ffloresy <[email protected]>
Co-authored-by: Eliot Li <[email protected]>
Co-authored-by: vbayanag <[email protected]>
Co-authored-by: Jeffrey Novotny <[email protected]>
Co-authored-by: Danny213123 <[email protected]>
  • Loading branch information
6 people committed Oct 24, 2024
1 parent a1aa665 commit c9a603f
Show file tree
Hide file tree
Showing 9 changed files with 634 additions and 0 deletions.
494 changes: 494 additions & 0 deletions blogs/artificial-intelligence/ctranslate2/README.md

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions blogs/artificial-intelligence/ctranslate2/src/gpt2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import ctranslate2
import transformers

generator = ctranslate2.Generator("gpt2_ct2", device="cuda")
tokenizer = transformers.AutoTokenizer.from_pretrained("gpt2")

# Unconditional generation.
start_tokens = [tokenizer.bos_token]
results = generator.generate_batch([start_tokens], max_length=30, sampling_topk=10)
print(tokenizer.decode(results[0].sequences_ids[0]))

# Conditional generation.
start_tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode("It is"))
results = generator.generate_batch([start_tokens], max_length=30, sampling_topk=10)
print(tokenizer.decode(results[0].sequences_ids[0]))
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import ctranslate2
import librosa
import transformers

# Load and resample the audio file.
audio, _ = librosa.load("src/sample2.flac", sr=16000, mono=True)

# Compute the features of the first 30 seconds of audio.
processor = transformers.WhisperProcessor.from_pretrained("openai/whisper-tiny")
inputs = processor(audio, return_tensors="np", sampling_rate=16000)
features = ctranslate2.StorageView.from_array(inputs.input_features)

# Load the model on GPU.
model = ctranslate2.models.Whisper("whisper-tiny-ct2", device="cuda")

# Detect the language.
results = model.detect_language(features)
language, probability = results[0][0]
print("Detected language %s with probability %f" % (language, probability))

# Describe the task in the prompt.
# See the prompt format in https://github.com/openai/whisper.
prompt = processor.tokenizer.convert_tokens_to_ids(
[
"<|startoftranscript|>",
language,
"<|transcribe|>",
"<|notimestamps|>", # Remove this token to generate timestamps.
]
)

# Run generation for the 30-second window.
results = model.generate(features, [prompt])
transcription = processor.decode(results[0].sequences_ids[0])
print(transcription)
15 changes: 15 additions & 0 deletions blogs/artificial-intelligence/ctranslate2/src/translate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import ctranslate2
import sentencepiece as spm

translator = ctranslate2.Translator("ende_ctranslate2/", device="cuda")
sp = spm.SentencePieceProcessor("sentencepiece.model")

input_text = "Good Morning!"
input_tokens = sp.encode(input_text, out_type=str)

results = translator.translate_batch([input_tokens])

output_tokens = results[0].hypotheses[0]
output_text = sp.decode(output_tokens)

print(output_text)
54 changes: 54 additions & 0 deletions blogs/artificial-intelligence/ctranslate2/src/translate_compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import ctranslate2
import sentencepiece as spm
import time

# Load the SentencePiece model
sp = spm.SentencePieceProcessor(model_file="sentencepiece.model")

# Input text to translate
input_text = "Hello world!"
input_tokens = sp.encode(input_text, out_type=str)

# Function to perform translation and measure latency and tokens per second
def translate_and_time(translator):
start_time = time.time()
results = translator.translate_batch([input_tokens])
end_time = time.time()
latency = end_time - start_time

# Decode the translated tokens
output_tokens = results[0].hypotheses[0]
output_text = sp.decode(output_tokens)

# Calculate tokens per second
num_output_tokens = len(output_tokens)
tokens_per_second = num_output_tokens / latency

return output_text, latency, tokens_per_second

# Load the default (float32) model
translator_float32 = ctranslate2.Translator(
"ende_ctranslate2/", device="cuda", compute_type="float32"
)
output_text_float32, latency_float32, tps_float32 = translate_and_time(translator_float32)

# Load the int8 quantized model
translator_int8 = ctranslate2.Translator(
"ende_ctranslate2_int8/", device="cuda", compute_type="int8"
)
output_text_int8, latency_int8, tps_int8 = translate_and_time(translator_int8)

# Print the results
print("Default (float32) model translation:")
print(f"Output: {output_text_float32}")
print(f"Latency: {latency_float32:.4f} seconds")
print(f"Tokens per second: {tps_float32:.2f}\n")

print("Int8 quantized model translation:")
print(f"Output: {output_text_int8}")
print(f"Latency: {latency_int8:.4f} seconds")
print(f"Tokens per second: {tps_int8:.2f}\n")

# Calculate the speedup in tokens per second
speedup_tps = tps_int8 / tps_float32
print(f"Speedup in tokens per second with int8 quantization: {speedup_tps:.2f}x faster")
11 changes: 11 additions & 0 deletions blogs/authors/michael-zhang.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<head>
<meta charset="UTF-8">
<meta name="description" content="Michael Zhang">
<meta name="keywords" content="AMD GPU, MI300, MI250, ROCm, blog, contributor, blog author">
</head>

(mzhang)=

# Michael Zhang

Michael is a Machine Learning Engineer at AMD. Michael specializes in generative AI, large language models (LLMs), computer vision, autonomous driving, and robotics. He has published 10+ papers in AI top conference and journals and has a Google Scholar citation count of over 650 as of October 2024. He holds a master's degree in Computer Engineering from the University of Illinois at Urbana-Champaign (UIUC).
4 changes: 4 additions & 0 deletions blogs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,10 @@
"Maria Ruiz Varela",
"http://rocm.blogs.amd.com/authors/maria-ruiz-varela.html",
),
"Michael Zhang": (
"Michael Zhang",
"http://rocm.blogs.amd.com/authors/michael-zhang.html",
),
"Nicholas Curtis": (
"Nicholas Curtis",
"https://rocm.blogs.amd.com/authors/nicholas-curtis.html",
Expand Down
6 changes: 6 additions & 0 deletions blogs/contributor-bios.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,12 @@ Technical Marketing Engineer at AMD
Senior Member of Technical Staff at AMD
:::

:::{grid-item-card} [Michael Zhang](./authors/michael-zhang.md)
:padding: 1

Machine Learning Engineer at AMD
:::

:::{grid-item-card} [Nicholas Curtis](./authors/nicholas-curtis.md)
:padding: 1

Expand Down

0 comments on commit c9a603f

Please sign in to comment.