Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add dashscope audio example #27

Merged
merged 10 commits into from
Jan 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions spring-ai-alibaba-audio-example/dashscope-audio/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
src/main/resouces/gen
3 changes: 3 additions & 0 deletions spring-ai-alibaba-audio-example/dashscope-audio/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Spring AI Alibaba Audio Example

演示使用阿里通义大模型进行音频处理的例子。包含 STT(语音识别) 和 TTS(文生语音)。
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>

<!--
Copyright 2023-2024 the original author or authors.
Copyright 2025 the original author or authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand All @@ -16,79 +16,64 @@
limitations under the License.
-->

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>3.3.3</version>
<relativePath/>
<groupId>com.alibaba.cloud.ai</groupId>
<artifactId>spring-ai-alibaba-audio-example</artifactId>
<version>${revision}</version>
<relativePath>../pom.xml</relativePath>
</parent>

<groupId>com.alibaba.cloud.ai</groupId>
<artifactId>audio-example</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>audio-example</name>
<description>Audio Example project for Spring AI Alibaba</description>
<artifactId>dashscope-audio</artifactId>
<version>${revision}</version>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>
<maven-deploy-plugin.version>3.1.1</maven-deploy-plugin.version>

<!-- Spring AI -->
<spring-ai-alibaba.version>1.0.0-M3.3</spring-ai-alibaba.version>
</properties>
<description>Spring AI Alibaba Dashscope Audio Example</description>
<name>Spring AI Alibaba Dashscope Audio Examples</name>

<dependencies>
<dependency>
<groupId>com.alibaba.cloud.ai</groupId>
<artifactId>spring-ai-alibaba-starter</artifactId>
<version>${spring-ai-alibaba.version}</version>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>

<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
<groupId>com.alibaba.cloud.ai</groupId>
<artifactId>spring-ai-alibaba-starter</artifactId>
<version>${spring-ai-alibaba.version}</version>
</dependency>

<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.16.1</version>
<version>2.18.0</version>
</dependency>

</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<version>${spring-boot.version}</version>
<executions>
<execution>
<goals>
<goal>repackage</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-deploy-plugin</artifactId>
<version>${maven-deploy-plugin.version}</version>
<configuration>
<skip>true</skip>
</configuration>
</plugin>
</plugins>
</build>

<repositories>
<repository>
<id>spring-milestones</id>
<name>Spring Milestones</name>
<url>https://repo.spring.io/milestone</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
</repositories>

</project>
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@
*/

@SpringBootApplication
public class AudioExampleApplication {
public class DashScopeAudioApplication {

public static void main(String[] args) {

SpringApplication.run(AudioExampleApplication.class, args);
SpringApplication.run(DashScopeAudioApplication.class, args);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
import com.alibaba.cloud.ai.dashscope.audio.DashScopeAudioTranscriptionOptions;
import com.alibaba.cloud.ai.dashscope.audio.transcription.AudioTranscriptionModel;
import com.alibaba.cloud.ai.dashscope.common.DashScopeException;
import jakarta.annotation.Resource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import reactor.core.publisher.Flux;

import org.springframework.ai.audio.transcription.AudioTranscriptionPrompt;
Expand All @@ -47,22 +48,27 @@
@RequestMapping("/ai/stt")
public class STTController {

@Resource
private AudioTranscriptionModel transcriptionModel;
private final AudioTranscriptionModel transcriptionModel;

private static final Logger log = LoggerFactory.getLogger(STTController.class);

private static final String DEFAULT_MODEL_1 = "sensevoice-v1";

private static final String DEFAULT_MODEL_2 = "paraformer-realtime-v2";
private static final String DEFAULT_MODEL_3 = "paraformer-v2";

private static final String FILE_PATH = "spring-ai-alibaba-examples/audio-example/src/main/resources/stt/count.pcm";
private static final String DEFAULT_MODEL_3 = "paraformer-v2";

private static final String AUDIO_RESOURCES_URL = "https://dashscope.oss-cn-beijing.aliyuncs.com/samples/audio/paraformer/hello_world_female2.wav";

private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1);

public STTController(AudioTranscriptionModel transcriptionModel) {

this.transcriptionModel = transcriptionModel;
}

@GetMapping
public DashScopeAudioTranscriptionApi.Response.Output stt() throws MalformedURLException {
public String stt() throws MalformedURLException {

AudioTranscriptionResponse response = transcriptionModel.call(
new AudioTranscriptionPrompt(
Expand All @@ -73,7 +79,7 @@ public DashScopeAudioTranscriptionApi.Response.Output stt() throws MalformedURLE
)
);

return response.getMetadata().get("output");
return response.getResult().getOutput();
}

@GetMapping("/stream")
Expand All @@ -85,7 +91,7 @@ public String streamSTT() {
Flux<AudioTranscriptionResponse> response = transcriptionModel
.stream(
new AudioTranscriptionPrompt(
new FileSystemResource(FILE_PATH),
new FileSystemResource("spring-ai-alibaba-audio-example/dashscope-audio/src/main/resources/stt/count.pcm"),
DashScopeAudioTranscriptionOptions.builder()
.withModel(DEFAULT_MODEL_2)
.withSampleRate(16000)
Expand Down Expand Up @@ -162,7 +168,7 @@ private void checkTaskStatus(String taskId, StringBuilder stringBuilder, CountDo
latch.countDown();
}
else if (taskStatus.equals(DashScopeAudioTranscriptionApi.TaskStatus.FAILED)) {
System.err.println("Transcription failed.");
log.warn("Transcription failed.");
latch.countDown();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import com.alibaba.cloud.ai.dashscope.audio.synthesis.SpeechSynthesisPrompt;
import com.alibaba.cloud.ai.dashscope.audio.synthesis.SpeechSynthesisResponse;
import jakarta.annotation.PreDestroy;
import jakarta.annotation.Resource;
import org.apache.commons.io.FileUtils;
import reactor.core.publisher.Flux;

Expand All @@ -45,12 +44,16 @@
@RequestMapping("/ai/tts")
public class TTSController implements ApplicationRunner {

@Resource
private SpeechSynthesisModel speechSynthesisModel;
private final SpeechSynthesisModel speechSynthesisModel;

private static final String TEXT = "白日依山尽,黄河入海流。";

private static final String FILE_PATH = "spring-ai-alibaba-examples/audio-example/src/main/resources/gen/tts/";
private static final String FILE_PATH = "spring-ai-alibaba-audio-example/dashscope-audio/src/main/resources/gen/tts";

public TTSController(SpeechSynthesisModel speechSynthesisModel) {

this.speechSynthesisModel = speechSynthesisModel;
}

@GetMapping
public void tts() throws IOException {
Expand All @@ -59,7 +62,7 @@ public void tts() throws IOException {
new SpeechSynthesisPrompt(TEXT)
);

File file = new File(FILE_PATH + "output.mp3");
File file = new File(FILE_PATH + "/output.mp3");
try (FileOutputStream fos = new FileOutputStream(file)) {
ByteBuffer byteBuffer = response.getResult().getOutput().getAudio();
fos.write(byteBuffer.array());
Expand All @@ -77,7 +80,7 @@ public void streamTTS() {
);

CountDownLatch latch = new CountDownLatch(1);
File file = new File(FILE_PATH + "output-stream.mp3");
File file = new File(FILE_PATH + "/output-stream.mp3");
try (FileOutputStream fos = new FileOutputStream(file)) {

response.doFinally(
Expand All @@ -102,7 +105,7 @@ public void streamTTS() {
}

@Override
public void run(ApplicationArguments args) throws Exception {
public void run(ApplicationArguments args) {

File file = new File(FILE_PATH);
if (!file.exists()) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
server:
port: 8080
port: 10009

spring:
application:
name: audio-example-application
name: spring-ai-alibaba-audio-example-application

ai:
dashscope:
Expand Down
2 changes: 1 addition & 1 deletion spring-ai-alibaba-audio-example/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
<name>Spring AI Alibaba Audio Examples</name>

<modules>
<!--<module>dashscope</module>-->
<module>dashscope-audio</module>
</modules>

<build>
Expand Down
Loading