-
Notifications
You must be signed in to change notification settings - Fork 5.8k
[OpenAI] Whisper TSP definition for AOAI #25513
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
fa346cd
0f8e412
5629f9f
4f52b58
8ab710a
2c95a24
fcc5366
17f7628
2612005
4a7a831
a1774cc
ccdc8ec
2b49764
03e9605
b30446d
bd63134
2b80885
f54c52e
23f2094
5522abe
918fc52
5a7f34b
eab0747
079bbf6
90600ba
89ee481
53f2828
17ef0ef
7beb47a
e8e3300
7f0465b
d33d47d
82c07f9
12baae1
9c7f16c
fb4f558
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| import "@typespec/rest"; | ||
| import "@typespec/http"; | ||
|
|
||
| using TypeSpec.Rest; | ||
| using TypeSpec.Http; | ||
|
|
||
| namespace Azure.OpenAI; | ||
|
|
||
| @doc(""" | ||
| Transcription request. | ||
| Requesting format 'json' will result on only the 'text' field being set. | ||
| For more output data use 'verbose_json. | ||
| """) | ||
| model AudioTranscriptionsOptions { | ||
| @doc("The audio file object to transcribe.") | ||
| // how do we handle binary format for a member? | ||
|
jpalvarezl marked this conversation as resolved.
Outdated
|
||
| file: string; | ||
|
jpalvarezl marked this conversation as resolved.
Outdated
|
||
|
|
||
| @doc("An optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language.") | ||
| prompt?: string; | ||
|
|
||
| @doc("The format of the transcription output, in one of these options: json, text, srt, verbose_json, or vtt.") | ||
| @projectedName("json", "response_format") | ||
| responseFormat?: AudioTranscriptionFormat = AudioTranscriptionFormat.json; | ||
|
|
||
| @doc(""" | ||
| The sampling temperature, between 0 and 1. | ||
| Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. | ||
| If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit. | ||
| """) | ||
| temperature?: float32 = 0; | ||
|
|
||
| @doc("The language of the input audio. Supplying the input language in ISO-639-1 format will improve accuracy and latency.") | ||
| language?: string; | ||
| } | ||
|
|
||
| @doc("Transcription response.") | ||
| model AudioTranscription { | ||
| @doc("Transcription task.") | ||
| task?: string; | ||
|
|
||
| @doc("Transcribed text.") | ||
| text: string; | ||
|
jpalvarezl marked this conversation as resolved.
Outdated
|
||
|
|
||
| @doc("Language.") | ||
| language?: string; | ||
|
|
||
| @doc("Duration.") | ||
|
jpalvarezl marked this conversation as resolved.
Outdated
|
||
| duration?: duration; | ||
|
|
||
| @doc("Segments.") | ||
| segments?: AudioTranscriptionSegment[]; | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,52 @@ | ||
| import "@typespec/rest"; | ||
| import "@typespec/http"; | ||
|
|
||
| import "./whisper.common.tsp"; | ||
|
|
||
| using TypeSpec.Rest; | ||
| using TypeSpec.Http; | ||
|
|
||
| namespace Azure.OpenAI; | ||
|
|
||
| @doc(""" | ||
| Translation request. | ||
| Requesting format 'json' will result on only the 'text' field being set. | ||
| For more output data use 'verbose_json. | ||
| """) | ||
| model AudioTranslationOptions { | ||
| @doc("The audio file to translate.") | ||
| // how do we handle binary format for a member? | ||
| file: string; | ||
|
|
||
| @doc("An optional text to guide the model's style or continue a previous audio segment. The prompt should be in English.") | ||
| prompt?: string; | ||
|
|
||
| @doc("The format of the transcription output, in one of these options: json, text, srt, verbose_json, or vtt.") | ||
| @projectedName("json", "response_format") | ||
| responseFormat?: AudioTranscriptionFormat = AudioTranscriptionFormat.json; | ||
|
|
||
| @doc(""" | ||
| The sampling temperature, between 0 and 1. | ||
| Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. | ||
| If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit. | ||
| """) | ||
| temperature?: float32 = 0; | ||
| } | ||
|
|
||
| @doc("Translation response.") | ||
| model AudioTranslation { | ||
| @doc("Translation task.") | ||
| task?: string; | ||
|
|
||
| @doc("Translated text.") | ||
| text: string; | ||
|
|
||
| @doc("Language.") | ||
| language?: string; | ||
|
|
||
| @doc("Duration.") | ||
| duration?: duration; | ||
|
jpalvarezl marked this conversation as resolved.
Outdated
|
||
|
|
||
| @doc("Transcription segments.") | ||
| segments?: AudioTranscriptionSegment[]; | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,57 @@ | ||
| import "@typespec/rest"; | ||
| import "@typespec/http"; | ||
|
|
||
| import "./transcription.create.tsp"; | ||
|
|
||
| using TypeSpec.Rest; | ||
| using TypeSpec.Http; | ||
|
|
||
| namespace Azure.OpenAI; | ||
|
|
||
| @doc("Defines the format of the output.") | ||
| enum AudioTranscriptionFormat { | ||
| @doc("JSON format. The translation/transcription response will only contain 'text'.") | ||
| json: "json", | ||
|
|
||
| @doc("Text format. The translation/transcription response will be of type text/plain.") | ||
| text: "text", | ||
|
|
||
| @doc("SRT format. The translation/transcription response will be of type text/plain.") | ||
| srt: "srt", | ||
|
|
||
| @doc("Verbose JSON format.The translation/transcription response will contain additional information.") | ||
| verbose_json: "verbose_json", | ||
|
|
||
| @doc("VTT format. The translation/transcription response will be of type text/plain.") | ||
| vtt: "vtt", | ||
| } | ||
|
|
||
| @doc("Transcription segment.") | ||
| model AudioTranscriptionSegment { | ||
|
jpalvarezl marked this conversation as resolved.
Outdated
|
||
| @doc("Segment identifier.") | ||
| id?: string; | ||
|
jpalvarezl marked this conversation as resolved.
Outdated
|
||
|
|
||
| @doc("Segment start offset.") | ||
| start?: int32; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think start and end may be
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. would we need the |
||
|
|
||
| @doc("Segment end offset.") | ||
| end?: int32; | ||
|
|
||
| @doc("Segment text.") | ||
| text?: string; | ||
|
|
||
| @doc("Temperature.") | ||
| temperature?: float32; | ||
|
|
||
| @doc("Average log probability.") | ||
| @projectedName("json", "avg_logprob") | ||
| averageLogProb?: float32; | ||
|
|
||
| @doc("Compression ratio.") | ||
| @projectedName("json", "compression_ratio") | ||
| compressionRatio?: float32; | ||
|
|
||
| @doc("Probability of 'no speech'.") | ||
| @projectedName("json", "no_speech_prob") | ||
| noSpeechProb?: float32; | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.