diff --git a/spring-ai-alibaba-audio-example/dashscope-audio/.gitignore b/spring-ai-alibaba-audio-example/dashscope-audio/.gitignore new file mode 100644 index 0000000..22efa57 --- /dev/null +++ b/spring-ai-alibaba-audio-example/dashscope-audio/.gitignore @@ -0,0 +1 @@ +src/main/resouces/gen diff --git a/spring-ai-alibaba-audio-example/dashscope-audio/README.md b/spring-ai-alibaba-audio-example/dashscope-audio/README.md new file mode 100644 index 0000000..6c54766 --- /dev/null +++ b/spring-ai-alibaba-audio-example/dashscope-audio/README.md @@ -0,0 +1,3 @@ +# Spring AI Alibaba Audio Example + +演示使用阿里通义大模型进行音频处理的例子。包含 STT(语音识别) 和 TTS(文生语音)。 diff --git a/spring-ai-alibaba-audio-example/audio-example/pom.xml b/spring-ai-alibaba-audio-example/dashscope-audio/pom.xml similarity index 51% rename from spring-ai-alibaba-audio-example/audio-example/pom.xml rename to spring-ai-alibaba-audio-example/dashscope-audio/pom.xml index 1acb7b9..16f113a 100644 --- a/spring-ai-alibaba-audio-example/audio-example/pom.xml +++ b/spring-ai-alibaba-audio-example/dashscope-audio/pom.xml @@ -1,7 +1,7 @@ - + 4.0.0 - org.springframework.boot - spring-boot-starter-parent - 3.3.3 - + com.alibaba.cloud.ai + spring-ai-alibaba-audio-example + ${revision} + ../pom.xml - com.alibaba.cloud.ai - audio-example - 0.0.1-SNAPSHOT - audio-example - Audio Example project for Spring AI Alibaba + dashscope-audio + ${revision} - - UTF-8 - UTF-8 - 17 - 17 - 3.1.1 - - - 1.0.0-M3.3 - + Spring AI Alibaba Dashscope Audio Example + Spring AI Alibaba Dashscope Audio Examples - com.alibaba.cloud.ai - spring-ai-alibaba-starter - ${spring-ai-alibaba.version} + org.springframework.boot + spring-boot-starter-web - org.springframework.boot - spring-boot-starter-web + com.alibaba.cloud.ai + spring-ai-alibaba-starter + ${spring-ai-alibaba.version} commons-io commons-io - 2.16.1 + 2.18.0 + @@ -68,27 +59,21 @@ org.springframework.boot spring-boot-maven-plugin + ${spring-boot.version} + + + + repackage + + + org.apache.maven.plugins maven-deploy-plugin ${maven-deploy-plugin.version} - - true - - - - spring-milestones - Spring Milestones - https://repo.spring.io/milestone - - false - - - - diff --git a/spring-ai-alibaba-audio-example/audio-example/src/main/java/com/alibaba/cloud/ai/example/audio/AudioExampleApplication.java b/spring-ai-alibaba-audio-example/dashscope-audio/src/main/java/com/alibaba/cloud/ai/example/audio/DashScopeAudioApplication.java similarity index 90% rename from spring-ai-alibaba-audio-example/audio-example/src/main/java/com/alibaba/cloud/ai/example/audio/AudioExampleApplication.java rename to spring-ai-alibaba-audio-example/dashscope-audio/src/main/java/com/alibaba/cloud/ai/example/audio/DashScopeAudioApplication.java index faa5ad1..5d74e53 100644 --- a/spring-ai-alibaba-audio-example/audio-example/src/main/java/com/alibaba/cloud/ai/example/audio/AudioExampleApplication.java +++ b/spring-ai-alibaba-audio-example/dashscope-audio/src/main/java/com/alibaba/cloud/ai/example/audio/DashScopeAudioApplication.java @@ -25,11 +25,11 @@ */ @SpringBootApplication -public class AudioExampleApplication { +public class DashScopeAudioApplication { public static void main(String[] args) { - SpringApplication.run(AudioExampleApplication.class, args); + SpringApplication.run(DashScopeAudioApplication.class, args); } } diff --git a/spring-ai-alibaba-audio-example/audio-example/src/main/java/com/alibaba/cloud/ai/example/audio/stt/STTController.java b/spring-ai-alibaba-audio-example/dashscope-audio/src/main/java/com/alibaba/cloud/ai/example/audio/stt/STTController.java similarity index 90% rename from spring-ai-alibaba-audio-example/audio-example/src/main/java/com/alibaba/cloud/ai/example/audio/stt/STTController.java rename to spring-ai-alibaba-audio-example/dashscope-audio/src/main/java/com/alibaba/cloud/ai/example/audio/stt/STTController.java index 804f3c8..3fc7a35 100644 --- a/spring-ai-alibaba-audio-example/audio-example/src/main/java/com/alibaba/cloud/ai/example/audio/stt/STTController.java +++ b/spring-ai-alibaba-audio-example/dashscope-audio/src/main/java/com/alibaba/cloud/ai/example/audio/stt/STTController.java @@ -27,7 +27,8 @@ import com.alibaba.cloud.ai.dashscope.audio.DashScopeAudioTranscriptionOptions; import com.alibaba.cloud.ai.dashscope.audio.transcription.AudioTranscriptionModel; import com.alibaba.cloud.ai.dashscope.common.DashScopeException; -import jakarta.annotation.Resource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import reactor.core.publisher.Flux; import org.springframework.ai.audio.transcription.AudioTranscriptionPrompt; @@ -47,22 +48,27 @@ @RequestMapping("/ai/stt") public class STTController { - @Resource - private AudioTranscriptionModel transcriptionModel; + private final AudioTranscriptionModel transcriptionModel; + + private static final Logger log = LoggerFactory.getLogger(STTController.class); private static final String DEFAULT_MODEL_1 = "sensevoice-v1"; private static final String DEFAULT_MODEL_2 = "paraformer-realtime-v2"; - private static final String DEFAULT_MODEL_3 = "paraformer-v2"; - private static final String FILE_PATH = "spring-ai-alibaba-examples/audio-example/src/main/resources/stt/count.pcm"; + private static final String DEFAULT_MODEL_3 = "paraformer-v2"; private static final String AUDIO_RESOURCES_URL = "https://dashscope.oss-cn-beijing.aliyuncs.com/samples/audio/paraformer/hello_world_female2.wav"; private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1); + public STTController(AudioTranscriptionModel transcriptionModel) { + + this.transcriptionModel = transcriptionModel; + } + @GetMapping - public DashScopeAudioTranscriptionApi.Response.Output stt() throws MalformedURLException { + public String stt() throws MalformedURLException { AudioTranscriptionResponse response = transcriptionModel.call( new AudioTranscriptionPrompt( @@ -73,7 +79,7 @@ public DashScopeAudioTranscriptionApi.Response.Output stt() throws MalformedURLE ) ); - return response.getMetadata().get("output"); + return response.getResult().getOutput(); } @GetMapping("/stream") @@ -85,7 +91,7 @@ public String streamSTT() { Flux response = transcriptionModel .stream( new AudioTranscriptionPrompt( - new FileSystemResource(FILE_PATH), + new FileSystemResource("spring-ai-alibaba-audio-example/dashscope-audio/src/main/resources/stt/count.pcm"), DashScopeAudioTranscriptionOptions.builder() .withModel(DEFAULT_MODEL_2) .withSampleRate(16000) @@ -162,7 +168,7 @@ private void checkTaskStatus(String taskId, StringBuilder stringBuilder, CountDo latch.countDown(); } else if (taskStatus.equals(DashScopeAudioTranscriptionApi.TaskStatus.FAILED)) { - System.err.println("Transcription failed."); + log.warn("Transcription failed."); latch.countDown(); } } diff --git a/spring-ai-alibaba-audio-example/audio-example/src/main/java/com/alibaba/cloud/ai/example/audio/tts/TTSController.java b/spring-ai-alibaba-audio-example/dashscope-audio/src/main/java/com/alibaba/cloud/ai/example/audio/tts/TTSController.java similarity index 87% rename from spring-ai-alibaba-audio-example/audio-example/src/main/java/com/alibaba/cloud/ai/example/audio/tts/TTSController.java rename to spring-ai-alibaba-audio-example/dashscope-audio/src/main/java/com/alibaba/cloud/ai/example/audio/tts/TTSController.java index 8f06fbd..a9eba3f 100644 --- a/spring-ai-alibaba-audio-example/audio-example/src/main/java/com/alibaba/cloud/ai/example/audio/tts/TTSController.java +++ b/spring-ai-alibaba-audio-example/dashscope-audio/src/main/java/com/alibaba/cloud/ai/example/audio/tts/TTSController.java @@ -26,7 +26,6 @@ import com.alibaba.cloud.ai.dashscope.audio.synthesis.SpeechSynthesisPrompt; import com.alibaba.cloud.ai.dashscope.audio.synthesis.SpeechSynthesisResponse; import jakarta.annotation.PreDestroy; -import jakarta.annotation.Resource; import org.apache.commons.io.FileUtils; import reactor.core.publisher.Flux; @@ -45,12 +44,16 @@ @RequestMapping("/ai/tts") public class TTSController implements ApplicationRunner { - @Resource - private SpeechSynthesisModel speechSynthesisModel; + private final SpeechSynthesisModel speechSynthesisModel; private static final String TEXT = "白日依山尽,黄河入海流。"; - private static final String FILE_PATH = "spring-ai-alibaba-examples/audio-example/src/main/resources/gen/tts/"; + private static final String FILE_PATH = "spring-ai-alibaba-audio-example/dashscope-audio/src/main/resources/gen/tts"; + + public TTSController(SpeechSynthesisModel speechSynthesisModel) { + + this.speechSynthesisModel = speechSynthesisModel; + } @GetMapping public void tts() throws IOException { @@ -59,7 +62,7 @@ public void tts() throws IOException { new SpeechSynthesisPrompt(TEXT) ); - File file = new File(FILE_PATH + "output.mp3"); + File file = new File(FILE_PATH + "/output.mp3"); try (FileOutputStream fos = new FileOutputStream(file)) { ByteBuffer byteBuffer = response.getResult().getOutput().getAudio(); fos.write(byteBuffer.array()); @@ -77,7 +80,7 @@ public void streamTTS() { ); CountDownLatch latch = new CountDownLatch(1); - File file = new File(FILE_PATH + "output-stream.mp3"); + File file = new File(FILE_PATH + "/output-stream.mp3"); try (FileOutputStream fos = new FileOutputStream(file)) { response.doFinally( @@ -102,7 +105,7 @@ public void streamTTS() { } @Override - public void run(ApplicationArguments args) throws Exception { + public void run(ApplicationArguments args) { File file = new File(FILE_PATH); if (!file.exists()) { diff --git a/spring-ai-alibaba-audio-example/audio-example/src/main/resources/application.yml b/spring-ai-alibaba-audio-example/dashscope-audio/src/main/resources/application.yml similarity index 61% rename from spring-ai-alibaba-audio-example/audio-example/src/main/resources/application.yml rename to spring-ai-alibaba-audio-example/dashscope-audio/src/main/resources/application.yml index b55aca1..c98da0f 100644 --- a/spring-ai-alibaba-audio-example/audio-example/src/main/resources/application.yml +++ b/spring-ai-alibaba-audio-example/dashscope-audio/src/main/resources/application.yml @@ -1,9 +1,9 @@ server: - port: 8080 + port: 10009 spring: application: - name: audio-example-application + name: spring-ai-alibaba-audio-example-application ai: dashscope: diff --git a/spring-ai-alibaba-audio-example/audio-example/src/main/resources/stt/count.pcm b/spring-ai-alibaba-audio-example/dashscope-audio/src/main/resources/stt/count.pcm similarity index 100% rename from spring-ai-alibaba-audio-example/audio-example/src/main/resources/stt/count.pcm rename to spring-ai-alibaba-audio-example/dashscope-audio/src/main/resources/stt/count.pcm diff --git a/spring-ai-alibaba-audio-example/pom.xml b/spring-ai-alibaba-audio-example/pom.xml index b318c43..f057c37 100644 --- a/spring-ai-alibaba-audio-example/pom.xml +++ b/spring-ai-alibaba-audio-example/pom.xml @@ -36,7 +36,7 @@ Spring AI Alibaba Audio Examples - + dashscope-audio