Skip to content
Closed
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
17fcd84
feat:add speaker train
Oct 28, 2025
e8b8304
feat:speak train
Oct 28, 2025
771bd97
feat: train add
Oct 28, 2025
9b9e879
Merge remote-tracking branch 'origin/main' into feature/one-sentence-…
Oct 29, 2025
067e9c5
!131 Fix security issue: prevent unauthorized file access
Oct 29, 2025
163f957
Merge branch 'main' of ssh://code.iflytek.com:30004/xfyun_webdev_gite…
slqcode Oct 29, 2025
ecc0c51
Merge branch 'main' of github.com:wowo-zZ/astron-agent
slqcode Oct 29, 2025
5bcfb00
feat:add voice clone speaker
Oct 29, 2025
d334f51
Merge branch 'iflytek:main' into main
mikasa-code Oct 29, 2025
6f47883
Merge remote-tracking branch 'origin/main' into feature/one-sentence-…
Oct 29, 2025
8c6182b
fix:format
Oct 29, 2025
36bef6a
feat:add voice clone speaker
Oct 29, 2025
e9e2f04
Merge branch 'iflytek:main' into main
mikasa-code Oct 30, 2025
ff186a8
chore:add voice clone client
Oct 30, 2025
1d40f7c
Merge remote-tracking branch 'origin/main' into feature/one-sentence-…
Oct 30, 2025
9136703
chore: fix require language
Oct 30, 2025
60820d9
chore: add tts enum
Oct 30, 2025
9f1218b
chore: format
Oct 30, 2025
20b581c
refactor: remove unused config properties and inline constants
Oct 31, 2025
0da4847
refactor: rename model manufacturer to speaker type
Oct 31, 2025
9e8e78c
feat: add speaker training permissions and update pronunciation configs
Oct 31, 2025
d6bb2d6
feat: support internationalization for speaker names
Oct 31, 2025
7d6a29a
Merge remote-tracking branch 'origin/main' into feature/one-sentence-…
Oct 31, 2025
19def14
feat: remove deprecated RtaConfig class- Deleted unused RtaConfig con…
Oct 31, 2025
c7d518f
feat: add filename sanitization for audio uploads
Oct 31, 2025
446bbc0
fix: sanitize filename to prevent path traversal- Extract filename pa…
Oct 31, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,13 @@ public enum ResponseEnum {
USER_APP_ID_NOT_EXISTE(60039, "error.app.not.exists"),
PERSONALITY_AI_GENERATE_PARAM_EMPTY(60040, "error.personality.ai.generate.param.empty"),
PERSONALITY_AI_GENERATE_ERROR(60041, "error.personality.ai.generate.failed"),
AUDIO_FILE_FORMAT_UNSUPPORTED(60042, "error.audio.file.format.unsupported"),
AUDIO_FILE_SIZE_EXCEEDED(60043, "error.audio.file.size.exceeded"),
AUDIO_CHANNELS_INVALID(60044, "error.audio.channels.invalid"),
AUDIO_SAMPLE_RATE_TOO_LOW(60045, "error.audio.sample.rate.too.low"),
AUDIO_BIT_DEPTH_INVALID(60046, "error.audio.bit.depth.invalid"),
AUDIO_DURATION_TOO_LONG(60047, "error.audio.duration.too.long"),
SPEAKER_TRAIN_FAILED(60048, "error.speaker.train.failed"),

// Spark API errors 60040-60080
SPARK_API_PARAM_ERROR(60040, "error.spark.api.param.error"),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
package com.iflytek.astron.console.commons.util;

import com.iflytek.astron.console.commons.constant.ResponseEnum;
import com.iflytek.astron.console.commons.exception.BusinessException;
import lombok.extern.slf4j.Slf4j;
import org.jetbrains.annotations.NotNull;
import org.springframework.web.multipart.MultipartFile;

import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.UnsupportedAudioFileException;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;

/**
* Audio file validation utility class Validates audio format, quality parameters, etc.
*
* @author bowang
*/
@Slf4j
public class AudioValidator {

// Supported audio formats
private static final List<String> SUPPORTED_FORMATS = Arrays.asList("wav", "mp3", "m4a", "pcm");

// Audio quality requirements
// mono channel
private static final int REQUIRED_CHANNELS = 1;
// 24kHz
private static final float MIN_SAMPLE_RATE = 24000.0f;
// 16bit
private static final int REQUIRED_SAMPLE_SIZE = 16;
// 40 seconds
private static final int MAX_DURATION_SECONDS = 40;
// 3MB
private static final long MAX_FILE_SIZE_BYTES = 3 * 1024 * 1024;

/**
* Validate audio file
*
* @param file uploaded file
* @throws BusinessException throws business exception when validation fails
*/
public static void validateAudioFile(MultipartFile file) throws BusinessException {
if (file == null || file.isEmpty()) {
throw new BusinessException(ResponseEnum.FILE_EMPTY);
}

// 1. Check file format
validateFileFormat(file);

// 2. Check file size
validateFileSize(file);

// 3. Check audio properties
validateAudioProperties(file);
}

/**
* Validate file format
*/
private static void validateFileFormat(MultipartFile file) throws BusinessException {
String filename = file.getOriginalFilename();
if (filename == null) {
throw new BusinessException(ResponseEnum.PARAM_MISS);
}

String extension = getFileExtension(filename).toLowerCase();
if (!SUPPORTED_FORMATS.contains(extension)) {
throw new BusinessException(ResponseEnum.AUDIO_FILE_FORMAT_UNSUPPORTED);
}
}

/**
* Validate file size
*/
private static void validateFileSize(MultipartFile file) throws BusinessException {
if (file.getSize() > MAX_FILE_SIZE_BYTES) {
throw new BusinessException(ResponseEnum.AUDIO_FILE_SIZE_EXCEEDED);
}
}

/**
* Validate audio properties
*/
private static void validateAudioProperties(MultipartFile file) throws BusinessException {
String filename = file.getOriginalFilename();
if (filename == null) {
return;
}

String extension = getFileExtension(filename).toLowerCase();

try {
// For WAV and PCM formats, Java Sound API can be used for detailed validation
if ("wav".equals(extension) || "pcm".equals(extension)) {
validateWavPcmProperties(file);
} else if ("mp3".equals(extension) || "m4a".equals(extension)) {
// For MP3 and M4A, only basic checks are performed currently
// Java Sound API has limited support for these formats
validateMp3M4aBasic(file);
}
} catch (IOException | UnsupportedAudioFileException e) {
log.warn("Audio file validation failed: {}", e.getMessage());
// For audio files that cannot be parsed, only basic checks are performed
validateBasicAudioProperties(file);
}
}

/**
* Validate audio properties for WAV and PCM formats
*/
private static void validateWavPcmProperties(MultipartFile file) throws IOException, UnsupportedAudioFileException, BusinessException {
try (AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(file.getInputStream())) {
AudioFormat format = getAudioFormat(audioInputStream);

// Check duration (within 40 seconds)
long frameLength = audioInputStream.getFrameLength();
float frameRate = format.getFrameRate();
if (frameRate > 0) {
float durationSeconds = frameLength / frameRate;
if (durationSeconds > MAX_DURATION_SECONDS) {
throw new BusinessException(ResponseEnum.AUDIO_DURATION_TOO_LONG);
}
}
}
}

@NotNull
private static AudioFormat getAudioFormat(AudioInputStream audioInputStream) {
AudioFormat format = audioInputStream.getFormat();

// Check number of channels (mono)
if (format.getChannels() != REQUIRED_CHANNELS) {
throw new BusinessException(ResponseEnum.AUDIO_CHANNELS_INVALID);
}

// Check sample rate (24kHz and above)
if (format.getSampleRate() < MIN_SAMPLE_RATE) {
throw new BusinessException(ResponseEnum.AUDIO_SAMPLE_RATE_TOO_LOW);
}

// Check bit depth (16bit)
if (format.getSampleSizeInBits() != REQUIRED_SAMPLE_SIZE) {
throw new BusinessException(ResponseEnum.AUDIO_BIT_DEPTH_INVALID);
}
return format;
}

/**
* Validate basic properties for MP3 and M4A formats
*/
private static void validateMp3M4aBasic(MultipartFile file) throws BusinessException {
// For MP3 and M4A, only basic validation can be performed currently
// Duration check is roughly estimated by file size (this is not a precise method, but it is a
// reasonable approximation without specialized libraries)
long fileSize = file.getSize();

// Rough estimate: 16bit mono 24kHz audio is approximately 48KB per second
// 40 seconds of audio is approximately 1.92MB, leaving some margin
long estimatedMaxSizeForDuration = (long) (MAX_DURATION_SECONDS * 48000 * 1.5);

if (fileSize > estimatedMaxSizeForDuration) {
log.warn("Audio file size {} exceeds expected, may be too long", fileSize);
// Do not throw exception because this is only a rough estimate
}
}

/**
* Validate basic audio properties (used when audio format cannot be parsed)
*/
private static void validateBasicAudioProperties(MultipartFile file) throws BusinessException {
// Basic validation: file size reasonableness check
long fileSize = file.getSize();

// Ensure file is not too small (at least 1KB)
if (fileSize < 1024) {
throw new BusinessException(ResponseEnum.PARAM_ERROR);
}

log.info("Audio file passed basic validation, filename: {}, size: {} bytes", file.getOriginalFilename(), fileSize);
}

/**
* Get file extension
*/
private static String getFileExtension(String filename) {
int lastDotIndex = filename.lastIndexOf('.');
if (lastDotIndex == -1 || lastDotIndex == filename.length() - 1) {
return "";
}
return filename.substring(lastDotIndex + 1);
}
}
11 changes: 11 additions & 0 deletions console/backend/commons/src/main/resources/messages_en.properties
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,18 @@ language style, and scene information, strictly follow the role persona content
%s\n\
#Role Task:\n\
%s
error.personality.ai.generate.param.empty=AI personality generation parameter is empty
error.personality.ai.generate.failed=AI personality generation failed

# Default Bot Model Names
default.bot.model.x1=Spark X1 Large Model
default.bot.model.spark_4_0=Spark V4.0 Ultra Large Model

# Audio validation related error messages
error.audio.file.format.unsupported=Unsupported audio format, only supports: wav, mp3, m4a, pcm
error.audio.file.size.exceeded=Audio file size cannot exceed 3MB
error.audio.channels.invalid=Audio must be mono channel
error.audio.sample.rate.too.low=Audio sample rate must be 24kHz or higher
error.audio.bit.depth.invalid=Audio bit depth must be 16bit
error.audio.duration.too.long=Audio duration cannot exceed 40 seconds
error.speaker.train.failed=Sound training failed, please check if the audio file meets the requirements and if the corresponding ability has been authorized
11 changes: 11 additions & 0 deletions console/backend/commons/src/main/resources/messages_zh.properties
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,18 @@ personality.prompt=按照角色人设和角色任务,扮演角色完成对话
%s\n\
#角色任务:\n\
%s
error.personality.ai.generate.param.empty=AI人设生成参数为空
error.personality.ai.generate.failed=AI人设生成失败

# 默认Bot模型名称
default.bot.model.x1=星火大模型 Spark X1
default.bot.model.spark_4_0=星火大模型 Spark V4.0 Ultra

# 音频验证相关错误消息
error.audio.file.format.unsupported=不支持的音频格式,仅支持: wav, mp3, m4a, pcm
error.audio.file.size.exceeded=音频文件大小不能超过3MB
error.audio.channels.invalid=音频必须为单通道
error.audio.sample.rate.too.low=音频采样率必须为24kHz及以上
error.audio.bit.depth.invalid=音频位深度必须为16bit
error.audio.duration.too.long=音频时长不能超过40秒
error.speaker.train.failed=声音训练失败,请检查音频文件是否符合要求,是否已授权对应能力
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# speaker name
speaker.lingXiaoTang=Xiaotang Ling
speaker.lingXiaoYue=Xiaoyue Ling
speaker.lingFeiZhe=Feizhe Ling
speaker.lingXiaoQi=Xiaoqi Ling
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# speaker name
speaker.lingXiaoTang=聆小糖
speaker.lingXiaoYue=聆小玥
speaker.lingFeiZhe=聆飞哲
speaker.lingXiaoQi=聆小琪
5 changes: 5 additions & 0 deletions console/backend/hub/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,11 @@
<artifactId>converter-jackson</artifactId>
</dependency>

<dependency>
<groupId>cn.xfyun</groupId>
<artifactId>websdk-java-speech</artifactId>
</dependency>

</dependencies>

<build>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package com.iflytek.astron.console.hub.config;

import java.util.Locale;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.support.ResourceBundleMessageSource;
Expand All @@ -10,6 +9,8 @@
import org.springframework.web.servlet.i18n.AcceptHeaderLocaleResolver;
import org.springframework.web.servlet.i18n.LocaleChangeInterceptor;

import java.util.Locale;

@Configuration
public class InternationalConfig implements WebMvcConfigurer {

Expand Down Expand Up @@ -43,8 +44,9 @@ public void addInterceptors(InterceptorRegistry registry) {
@Bean
public ResourceBundleMessageSource messageSource() {
ResourceBundleMessageSource messageSource = new ResourceBundleMessageSource();
// Set resource file base name, corresponding to messages.properties files under classpath
messageSource.setBasename("messages");
// Set resource file base names, corresponding to messages.properties and speaker.properties files
// under classpath
messageSource.setBasenames("messages", "speaker");
// Set encoding format
messageSource.setDefaultEncoding("UTF-8");
// Whether to use default message when corresponding message is not found
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package com.iflytek.astron.console.hub.config;

import cn.xfyun.api.VoiceTrainClient;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

@Configuration
public class VoiceTrainConfig {

@Value("${spark.app-id}")
private String appId;

@Value("${spark.api-key}")
private String apiKey;

@Bean
public VoiceTrainClient voiceTrainClient() {
return new VoiceTrainClient.Builder(appId, apiKey).build();
}
}
Loading
Loading