Skip to content

Commit

Permalink
feat: add dashscope audio example
Browse files Browse the repository at this point in the history
Signed-off-by: yuluo-yx <[email protected]>
  • Loading branch information
yuluo-yx committed Jan 5, 2025
1 parent 1abbdad commit 03252d4
Show file tree
Hide file tree
Showing 9 changed files with 61 additions and 63 deletions.
1 change: 1 addition & 0 deletions spring-ai-alibaba-audio-example/dashscope-audio/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
src/main/resouces/gen
3 changes: 3 additions & 0 deletions spring-ai-alibaba-audio-example/dashscope-audio/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Spring AI Alibaba Audio Example

演示使用阿里通义大模型进行音频处理的例子。包含 STT(语音识别) 和 TTS(文生语音)。
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>

<!--
Copyright 2023-2024 the original author or authors.
Copyright 2025 the original author or authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand All @@ -16,79 +16,64 @@
limitations under the License.
-->

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>3.3.3</version>
<relativePath/>
<groupId>com.alibaba.cloud.ai</groupId>
<artifactId>spring-ai-alibaba-audio-example</artifactId>
<version>${revision}</version>
<relativePath>../pom.xml</relativePath>
</parent>

<groupId>com.alibaba.cloud.ai</groupId>
<artifactId>audio-example</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>audio-example</name>
<description>Audio Example project for Spring AI Alibaba</description>
<artifactId>dashscope-audio</artifactId>
<version>${revision}</version>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>
<maven-deploy-plugin.version>3.1.1</maven-deploy-plugin.version>

<!-- Spring AI -->
<spring-ai-alibaba.version>1.0.0-M3.3</spring-ai-alibaba.version>
</properties>
<description>Spring AI Alibaba Dashscope Audio Example</description>
<name>Spring AI Alibaba Dashscope Audio Examples</name>

<dependencies>
<dependency>
<groupId>com.alibaba.cloud.ai</groupId>
<artifactId>spring-ai-alibaba-starter</artifactId>
<version>${spring-ai-alibaba.version}</version>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>

<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
<groupId>com.alibaba.cloud.ai</groupId>
<artifactId>spring-ai-alibaba-starter</artifactId>
<version>${spring-ai-alibaba.version}</version>
</dependency>

<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.16.1</version>
<version>2.18.0</version>
</dependency>

</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<version>${spring-boot.version}</version>
<executions>
<execution>
<goals>
<goal>repackage</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-deploy-plugin</artifactId>
<version>${maven-deploy-plugin.version}</version>
<configuration>
<skip>true</skip>
</configuration>
</plugin>
</plugins>
</build>

<repositories>
<repository>
<id>spring-milestones</id>
<name>Spring Milestones</name>
<url>https://repo.spring.io/milestone</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
</repositories>

</project>
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@
*/

@SpringBootApplication
public class AudioExampleApplication {
public class DashScopeAudioApplication {

public static void main(String[] args) {

SpringApplication.run(AudioExampleApplication.class, args);
SpringApplication.run(DashScopeAudioApplication.class, args);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
import com.alibaba.cloud.ai.dashscope.audio.DashScopeAudioTranscriptionOptions;
import com.alibaba.cloud.ai.dashscope.audio.transcription.AudioTranscriptionModel;
import com.alibaba.cloud.ai.dashscope.common.DashScopeException;
import jakarta.annotation.Resource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import reactor.core.publisher.Flux;

import org.springframework.ai.audio.transcription.AudioTranscriptionPrompt;
Expand All @@ -47,22 +48,27 @@
@RequestMapping("/ai/stt")
public class STTController {

@Resource
private AudioTranscriptionModel transcriptionModel;
private final AudioTranscriptionModel transcriptionModel;

private static final Logger log = LoggerFactory.getLogger(STTController.class);

private static final String DEFAULT_MODEL_1 = "sensevoice-v1";

private static final String DEFAULT_MODEL_2 = "paraformer-realtime-v2";
private static final String DEFAULT_MODEL_3 = "paraformer-v2";

private static final String FILE_PATH = "spring-ai-alibaba-examples/audio-example/src/main/resources/stt/count.pcm";
private static final String DEFAULT_MODEL_3 = "paraformer-v2";

private static final String AUDIO_RESOURCES_URL = "https://dashscope.oss-cn-beijing.aliyuncs.com/samples/audio/paraformer/hello_world_female2.wav";

private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1);

public STTController(AudioTranscriptionModel transcriptionModel) {

this.transcriptionModel = transcriptionModel;
}

@GetMapping
public DashScopeAudioTranscriptionApi.Response.Output stt() throws MalformedURLException {
public String stt() throws MalformedURLException {

AudioTranscriptionResponse response = transcriptionModel.call(
new AudioTranscriptionPrompt(
Expand All @@ -73,7 +79,7 @@ public DashScopeAudioTranscriptionApi.Response.Output stt() throws MalformedURLE
)
);

return response.getMetadata().get("output");
return response.getResult().getOutput();
}

@GetMapping("/stream")
Expand All @@ -85,7 +91,7 @@ public String streamSTT() {
Flux<AudioTranscriptionResponse> response = transcriptionModel
.stream(
new AudioTranscriptionPrompt(
new FileSystemResource(FILE_PATH),
new FileSystemResource("spring-ai-alibaba-audio-example/dashscope-audio/src/main/resources/stt/count.pcm"),
DashScopeAudioTranscriptionOptions.builder()
.withModel(DEFAULT_MODEL_2)
.withSampleRate(16000)
Expand Down Expand Up @@ -162,7 +168,7 @@ private void checkTaskStatus(String taskId, StringBuilder stringBuilder, CountDo
latch.countDown();
}
else if (taskStatus.equals(DashScopeAudioTranscriptionApi.TaskStatus.FAILED)) {
System.err.println("Transcription failed.");
log.warn("Transcription failed.");
latch.countDown();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import com.alibaba.cloud.ai.dashscope.audio.synthesis.SpeechSynthesisPrompt;
import com.alibaba.cloud.ai.dashscope.audio.synthesis.SpeechSynthesisResponse;
import jakarta.annotation.PreDestroy;
import jakarta.annotation.Resource;
import org.apache.commons.io.FileUtils;
import reactor.core.publisher.Flux;

Expand All @@ -45,12 +44,16 @@
@RequestMapping("/ai/tts")
public class TTSController implements ApplicationRunner {

@Resource
private SpeechSynthesisModel speechSynthesisModel;
private final SpeechSynthesisModel speechSynthesisModel;

private static final String TEXT = "白日依山尽,黄河入海流。";

private static final String FILE_PATH = "spring-ai-alibaba-examples/audio-example/src/main/resources/gen/tts/";
private static final String FILE_PATH = "spring-ai-alibaba-audio-example/dashscope-audio/src/main/resources/gen/tts";

public TTSController(SpeechSynthesisModel speechSynthesisModel) {

this.speechSynthesisModel = speechSynthesisModel;
}

@GetMapping
public void tts() throws IOException {
Expand All @@ -59,7 +62,7 @@ public void tts() throws IOException {
new SpeechSynthesisPrompt(TEXT)
);

File file = new File(FILE_PATH + "output.mp3");
File file = new File(FILE_PATH + "/output.mp3");
try (FileOutputStream fos = new FileOutputStream(file)) {
ByteBuffer byteBuffer = response.getResult().getOutput().getAudio();
fos.write(byteBuffer.array());
Expand All @@ -77,7 +80,7 @@ public void streamTTS() {
);

CountDownLatch latch = new CountDownLatch(1);
File file = new File(FILE_PATH + "output-stream.mp3");
File file = new File(FILE_PATH + "/output-stream.mp3");
try (FileOutputStream fos = new FileOutputStream(file)) {

response.doFinally(
Expand All @@ -102,7 +105,7 @@ public void streamTTS() {
}

@Override
public void run(ApplicationArguments args) throws Exception {
public void run(ApplicationArguments args) {

File file = new File(FILE_PATH);
if (!file.exists()) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
server:
port: 8080
port: 10009

spring:
application:
name: audio-example-application
name: spring-ai-alibaba-audio-example-application

ai:
dashscope:
Expand Down
2 changes: 1 addition & 1 deletion spring-ai-alibaba-audio-example/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
<name>Spring AI Alibaba Audio Examples</name>

<modules>
<!--<module>dashscope</module>-->
<module>dashscope-audio</module>
</modules>

<build>
Expand Down

0 comments on commit 03252d4

Please sign in to comment.