Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 20 additions & 8 deletions .dotnet/api/OpenAI.netstandard2.0.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1084,13 +1084,25 @@ public class AudioTranslationOptions : IJsonModel<AudioTranslationOptions>, IPer
string IPersistableModel<AudioTranslationOptions>.GetFormatFromOptions(ModelReaderWriterOptions options);
BinaryData IPersistableModel<AudioTranslationOptions>.Write(ModelReaderWriterOptions options);
}
public enum GeneratedSpeechFormat {
Mp3 = 0,
Opus = 1,
Aac = 2,
Flac = 3,
Wav = 4,
Pcm = 5
public readonly partial struct GeneratedSpeechFormat : IEquatable<GeneratedSpeechFormat> {
private readonly object _dummy;
private readonly int _dummyPrimitive;
public GeneratedSpeechFormat(string value);
public static GeneratedSpeechFormat Aac { get; }
public static GeneratedSpeechFormat Flac { get; }
public static GeneratedSpeechFormat Mp3 { get; }
public static GeneratedSpeechFormat Opus { get; }
public static GeneratedSpeechFormat Pcm { get; }
public static GeneratedSpeechFormat Wav { get; }
public readonly bool Equals(GeneratedSpeechFormat other);
[EditorBrowsable(EditorBrowsableState.Never)]
public override readonly bool Equals(object obj);
[EditorBrowsable(EditorBrowsableState.Never)]
public override readonly int GetHashCode();
public static bool operator ==(GeneratedSpeechFormat left, GeneratedSpeechFormat right);
public static implicit operator GeneratedSpeechFormat(string value);
public static bool operator !=(GeneratedSpeechFormat left, GeneratedSpeechFormat right);
public override readonly string ToString();
}
public readonly partial struct GeneratedSpeechVoice : IEquatable<GeneratedSpeechVoice> {
private readonly object _dummy;
Expand Down Expand Up @@ -1120,7 +1132,7 @@ public static class OpenAIAudioModelFactory {
}
public class SpeechGenerationOptions : IJsonModel<SpeechGenerationOptions>, IPersistableModel<SpeechGenerationOptions> {
public GeneratedSpeechFormat? ResponseFormat { get; set; }
public float? Speed { get; set; }
public float? SpeedRatio { get; set; }
SpeechGenerationOptions IJsonModel<SpeechGenerationOptions>.Create(ref Utf8JsonReader reader, ModelReaderWriterOptions options);
void IJsonModel<SpeechGenerationOptions>.Write(Utf8JsonWriter writer, ModelReaderWriterOptions options);
SpeechGenerationOptions IPersistableModel<SpeechGenerationOptions>.Create(BinaryData data, ModelReaderWriterOptions options);
Expand Down
6 changes: 3 additions & 3 deletions .dotnet/examples/CombinationExamples.cs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public void AlpacaArtAssessor()
GeneratedSpeechVoice.Fable,
new SpeechGenerationOptions()
{
Speed = 0.9f,
SpeedRatio = 0.9f,
ResponseFormat = GeneratedSpeechFormat.Opus,
});
FileInfo ttsFileInfo = new($"{chatCompletion.Id}.opus");
Expand Down Expand Up @@ -89,7 +89,7 @@ public async Task CuriousCreatureCreator()
GeneratedSpeechVoice.Onyx,
new SpeechGenerationOptions()
{
Speed = 1.1f,
SpeedRatio = 1.1f,
ResponseFormat = GeneratedSpeechFormat.Opus,
});
_ = Task.Run(async () =>
Expand Down Expand Up @@ -136,7 +136,7 @@ public async Task CuriousCreatureCreator()
GeneratedSpeechVoice.Fable,
new SpeechGenerationOptions()
{
Speed = 0.9f,
SpeedRatio = 0.9f,
ResponseFormat = GeneratedSpeechFormat.Opus,
});
FileInfo criticAudioFileInfo = new($"{criticalAppraisalResult.Value.Id}-appraisal.opus");
Expand Down
29 changes: 2 additions & 27 deletions .dotnet/src/Custom/Audio/GeneratedSpeechFormat.cs
Original file line number Diff line number Diff line change
@@ -1,32 +1,7 @@
namespace OpenAI.Audio;

/// <summary>
/// Represents an audio data format available as either input or output into an audio operation.
/// </summary>
/// <summary> The audio format in which to generate the speech. </summary>
[CodeGenModel("CreateSpeechRequestResponseFormat")]
public enum GeneratedSpeechFormat
public readonly partial struct GeneratedSpeechFormat
{
/// <summary> MP3. /// </summary>
[CodeGenMember("Mp3")]
Mp3,

/// <summary> Opus. /// </summary>
[CodeGenMember("Opus")]
Opus,

/// <summary> AAC (advanced audio coding). /// </summary>
[CodeGenMember("Aac")]
Aac,

/// <summary> FLAC (free lossless audio codec). /// </summary>
[CodeGenMember("Flac")]
Flac,

/// <summary> WAV. /// </summary>
[CodeGenMember("Wav")]
Wav,

/// <summary> PCM (pulse-code modulation). /// </summary>
[CodeGenMember("Pcm")]
Pcm,
}
22 changes: 12 additions & 10 deletions .dotnet/src/Custom/Audio/SpeechGenerationOptions.cs
Original file line number Diff line number Diff line change
@@ -1,38 +1,40 @@
namespace OpenAI.Audio;

/// <summary>
/// A representation of additional options available to control the behavior of a text-to-speech audio generation
/// operation.
/// </summary>
/// <summary> The options to configure text-to-speech audio generation. </summary>
[CodeGenModel("CreateSpeechRequest")]
[CodeGenSuppress("SpeechGenerationOptions", typeof(InternalCreateSpeechRequestModel), typeof(string), typeof(GeneratedSpeechVoice))]
public partial class SpeechGenerationOptions
{
// CUSTOM:
// - Made internal. The model is specified by the client.
// - Added setter.
/// <summary> One of the available [TTS models](/docs/models/tts): `tts-1` or `tts-1-hd`. </summary>
[CodeGenMember("Model")]
internal InternalCreateSpeechRequestModel Model { get; set; }

// CUSTOM:
// - Made internal. This value comes from a parameter on the client method.
// - Added setter.
/// <summary> The text to generate audio for. The maximum length is 4096 characters. </summary>
[CodeGenMember("Input")]
internal string Input { get; set; }

// CUSTOM:
// - Made internal. This value comes from a parameter on the client method.
// - Added setter.
/// <summary>
/// The voice to use when generating the audio. Supported voices are `alloy`, `echo`, `fable`,
/// `onyx`, `nova`, and `shimmer`. Previews of the voices are available in the
/// [Text to speech guide](/docs/guides/text-to-speech/voice-options).
/// </summary>
[CodeGenMember("Voice")]
internal GeneratedSpeechVoice Voice { get; set; }

// CUSTOM: Made public now that there are no required properties.
/// <summary> Initializes a new instance of <see cref="SpeechGenerationOptions"/>. </summary>
public SpeechGenerationOptions()
{
}

// CUSTOM: Renamed.
/// <summary>
/// The speed of the generated audio expressed as a ratio between 0.5 and 2.0. The default is 1.0.
/// </summary>
[CodeGenMember("Speed")]

public float? SpeedRatio { get; set; }
}

This file was deleted.

44 changes: 44 additions & 0 deletions .dotnet/src/Generated/Models/GeneratedSpeechFormat.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// <auto-generated/>

#nullable disable

using System;
using System.ComponentModel;

namespace OpenAI.Audio
{
public readonly partial struct GeneratedSpeechFormat : IEquatable<GeneratedSpeechFormat>
{
private readonly string _value;

public GeneratedSpeechFormat(string value)
{
_value = value ?? throw new ArgumentNullException(nameof(value));
}

private const string Mp3Value = "mp3";
private const string OpusValue = "opus";
private const string AacValue = "aac";
private const string FlacValue = "flac";
private const string WavValue = "wav";
private const string PcmValue = "pcm";

public static GeneratedSpeechFormat Mp3 { get; } = new GeneratedSpeechFormat(Mp3Value);
public static GeneratedSpeechFormat Opus { get; } = new GeneratedSpeechFormat(OpusValue);
public static GeneratedSpeechFormat Aac { get; } = new GeneratedSpeechFormat(AacValue);
public static GeneratedSpeechFormat Flac { get; } = new GeneratedSpeechFormat(FlacValue);
public static GeneratedSpeechFormat Wav { get; } = new GeneratedSpeechFormat(WavValue);
public static GeneratedSpeechFormat Pcm { get; } = new GeneratedSpeechFormat(PcmValue);
public static bool operator ==(GeneratedSpeechFormat left, GeneratedSpeechFormat right) => left.Equals(right);
public static bool operator !=(GeneratedSpeechFormat left, GeneratedSpeechFormat right) => !left.Equals(right);
public static implicit operator GeneratedSpeechFormat(string value) => new GeneratedSpeechFormat(value);

[EditorBrowsable(EditorBrowsableState.Never)]
public override bool Equals(object obj) => obj is GeneratedSpeechFormat other && Equals(other);
public bool Equals(GeneratedSpeechFormat other) => string.Equals(_value, other._value, StringComparison.InvariantCultureIgnoreCase);

[EditorBrowsable(EditorBrowsableState.Never)]
public override int GetHashCode() => _value != null ? StringComparer.InvariantCultureIgnoreCase.GetHashCode(_value) : 0;
public override string ToString() => _value;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@ void IJsonModel<SpeechGenerationOptions>.Write(Utf8JsonWriter writer, ModelReade
if (SerializedAdditionalRawData?.ContainsKey("response_format") != true && Optional.IsDefined(ResponseFormat))
{
writer.WritePropertyName("response_format"u8);
writer.WriteStringValue(ResponseFormat.Value.ToSerialString());
writer.WriteStringValue(ResponseFormat.Value.ToString());
}
if (SerializedAdditionalRawData?.ContainsKey("speed") != true && Optional.IsDefined(Speed))
if (SerializedAdditionalRawData?.ContainsKey("speed") != true && Optional.IsDefined(SpeedRatio))
{
writer.WritePropertyName("speed"u8);
writer.WriteNumberValue(Speed.Value);
writer.WriteNumberValue(SpeedRatio.Value);
}
if (SerializedAdditionalRawData != null)
{
Expand Down Expand Up @@ -118,7 +118,7 @@ internal static SpeechGenerationOptions DeserializeSpeechGenerationOptions(JsonE
{
continue;
}
responseFormat = property.Value.GetString().ToGeneratedSpeechFormat();
responseFormat = new GeneratedSpeechFormat(property.Value.GetString());
continue;
}
if (property.NameEquals("speed"u8))
Expand Down
5 changes: 2 additions & 3 deletions .dotnet/src/Generated/Models/SpeechGenerationOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,15 @@ public partial class SpeechGenerationOptions
{
internal IDictionary<string, BinaryData> SerializedAdditionalRawData { get; set; }

internal SpeechGenerationOptions(InternalCreateSpeechRequestModel model, string input, GeneratedSpeechVoice voice, GeneratedSpeechFormat? responseFormat, float? speed, IDictionary<string, BinaryData> serializedAdditionalRawData)
internal SpeechGenerationOptions(InternalCreateSpeechRequestModel model, string input, GeneratedSpeechVoice voice, GeneratedSpeechFormat? responseFormat, float? speedRatio, IDictionary<string, BinaryData> serializedAdditionalRawData)
{
Model = model;
Input = input;
Voice = voice;
ResponseFormat = responseFormat;
Speed = speed;
SpeedRatio = speedRatio;
SerializedAdditionalRawData = serializedAdditionalRawData;
}
public GeneratedSpeechFormat? ResponseFormat { get; set; }
public float? Speed { get; set; }
}
}
30 changes: 20 additions & 10 deletions .dotnet/tests/Audio/TextToSpeechTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,29 @@ public async Task BasicTextToSpeechWorks()

[Test]
[TestCase(null)]
[TestCase(GeneratedSpeechFormat.Mp3)]
[TestCase(GeneratedSpeechFormat.Opus)]
[TestCase(GeneratedSpeechFormat.Aac)]
[TestCase(GeneratedSpeechFormat.Flac)]
[TestCase(GeneratedSpeechFormat.Wav)]
[TestCase(GeneratedSpeechFormat.Pcm)]
public async Task OutputFormatWorks(GeneratedSpeechFormat? responseFormat)
[TestCase("mp3")]
[TestCase("opus")]
[TestCase("aac")]
[TestCase("flac")]
[TestCase("wav")]
[TestCase("pcm")]
public async Task OutputFormatWorks(string responseFormat)
{
AudioClient client = GetTestClient<AudioClient>(TestScenario.Audio_TTS);

SpeechGenerationOptions options = responseFormat == null
? new()
: new() { ResponseFormat = responseFormat };
SpeechGenerationOptions options = new()
{
ResponseFormat = responseFormat switch
{
"mp3" => GeneratedSpeechFormat.Mp3,
"opus" => GeneratedSpeechFormat.Opus,
"aac" => GeneratedSpeechFormat.Aac,
"flac" => GeneratedSpeechFormat.Flac,
"wav" => GeneratedSpeechFormat.Wav,
"pcm" => GeneratedSpeechFormat.Pcm,
_ => null
}
};

BinaryData audio = IsAsync
? await client.GenerateSpeechAsync("Hello, world!", GeneratedSpeechVoice.Alloy, options)
Expand Down