diff --git a/examples/models/whisper/main.cpp b/examples/models/whisper/main.cpp index d4b156a413a..080106c8915 100644 --- a/examples/models/whisper/main.cpp +++ b/examples/models/whisper/main.cpp @@ -39,10 +39,6 @@ DEFINE_string( audio_path, "", "Path to input audio file. Accepts .wav or raw float .bin."); -DEFINE_string( - model_name, - "base", - "Whisper model name (base, small, medium, large, large-v2, large-v3, large-v3-turbo)."); DEFINE_double( temperature, 0.0, @@ -114,21 +110,10 @@ int main(int argc, char** argv) { config.max_new_tokens = FLAGS_max_new_tokens; config.temperature = static_cast(FLAGS_temperature); - // Set decoder_start_token_id based on model version - if (FLAGS_model_name == "large-v2" || FLAGS_model_name == "large-v3" || - FLAGS_model_name == "large-v3-turbo") { - config.decoder_start_token_id = 50258; - ET_LOG( - Info, - "Using decoder_start_token_id=50258 for model: %s", - FLAGS_model_name.c_str()); - } else { - config.decoder_start_token_id = 50257; - ET_LOG( - Info, - "Using decoder_start_token_id=50257 for model: %s", - FLAGS_model_name.c_str()); - } + // All Whisper models from HuggingFace now use the v3 tokenizer format + // where token 50257 = <|endoftext|> and token 50258 = <|startoftranscript|> + config.decoder_start_token_id = 50258; + ET_LOG(Info, "Using decoder_start_token_id=50258"); auto result = runner.transcribe(features, config, [&](const std::string& piece) {