iflytek · mikasa-code · Oct 28, 2025 · Oct 28, 2025 · Oct 28, 2025 · Oct 29, 2025
diff --git a/...ckend/commons/src/main/java/com/iflytek/astron/console/commons/constant/ResponseEnum.java b/...ckend/commons/src/main/java/com/iflytek/astron/console/commons/constant/ResponseEnum.java
@@ -88,6 +88,13 @@ public enum ResponseEnum {
     USER_APP_ID_NOT_EXISTE(60039, "error.app.not.exists"),
     PERSONALITY_AI_GENERATE_PARAM_EMPTY(60040, "error.personality.ai.generate.param.empty"),
     PERSONALITY_AI_GENERATE_ERROR(60041, "error.personality.ai.generate.failed"),
+    AUDIO_FILE_FORMAT_UNSUPPORTED(60042, "error.audio.file.format.unsupported"),
+    AUDIO_FILE_SIZE_EXCEEDED(60043, "error.audio.file.size.exceeded"),
+    AUDIO_CHANNELS_INVALID(60044, "error.audio.channels.invalid"),
+    AUDIO_SAMPLE_RATE_TOO_LOW(60045, "error.audio.sample.rate.too.low"),
+    AUDIO_BIT_DEPTH_INVALID(60046, "error.audio.bit.depth.invalid"),
+    AUDIO_DURATION_TOO_LONG(60047, "error.audio.duration.too.long"),
+    SPEAKER_TRAIN_FAILED(60048, "error.speaker.train.failed"),
 
     // Spark API errors 60040-60080
     SPARK_API_PARAM_ERROR(60040, "error.spark.api.param.error"),

diff --git a/...backend/commons/src/main/java/com/iflytek/astron/console/commons/util/AudioValidator.java b/...backend/commons/src/main/java/com/iflytek/astron/console/commons/util/AudioValidator.java
@@ -0,0 +1,196 @@
+package com.iflytek.astron.console.commons.util;
+
+import com.iflytek.astron.console.commons.constant.ResponseEnum;
+import com.iflytek.astron.console.commons.exception.BusinessException;
+import lombok.extern.slf4j.Slf4j;
+import org.jetbrains.annotations.NotNull;
+import org.springframework.web.multipart.MultipartFile;
+
+import javax.sound.sampled.AudioFormat;
+import javax.sound.sampled.AudioInputStream;
+import javax.sound.sampled.AudioSystem;
+import javax.sound.sampled.UnsupportedAudioFileException;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Audio file validation utility class Validates audio format, quality parameters, etc.
+ *
+ * @author bowang
+ */
+@Slf4j
+public class AudioValidator {
+
+    // Supported audio formats
+    private static final List<String> SUPPORTED_FORMATS = Arrays.asList("wav", "mp3", "m4a", "pcm");
+
+    // Audio quality requirements
+    // mono channel
+    private static final int REQUIRED_CHANNELS = 1;
+    // 24kHz
+    private static final float MIN_SAMPLE_RATE = 24000.0f;
+    // 16bit
+    private static final int REQUIRED_SAMPLE_SIZE = 16;
+    // 40 seconds
+    private static final int MAX_DURATION_SECONDS = 40;
+    // 3MB
+    private static final long MAX_FILE_SIZE_BYTES = 3 * 1024 * 1024;
+
+    /**
+     * Validate audio file
+     *
+     * @param file uploaded file
+     * @throws BusinessException throws business exception when validation fails
+     */
+    public static void validateAudioFile(MultipartFile file) throws BusinessException {
+        if (file == null || file.isEmpty()) {
+            throw new BusinessException(ResponseEnum.FILE_EMPTY);
+        }
+
+        // 1. Check file format
+        validateFileFormat(file);
+
+        // 2. Check file size
+        validateFileSize(file);
+
+        // 3. Check audio properties
+        validateAudioProperties(file);
+    }
+
+    /**
+     * Validate file format
+     */
+    private static void validateFileFormat(MultipartFile file) throws BusinessException {
+        String filename = file.getOriginalFilename();
+        if (filename == null) {
+            throw new BusinessException(ResponseEnum.PARAM_MISS);
+        }
+
+        String extension = getFileExtension(filename).toLowerCase();
+        if (!SUPPORTED_FORMATS.contains(extension)) {
+            throw new BusinessException(ResponseEnum.AUDIO_FILE_FORMAT_UNSUPPORTED);
+        }
+    }
+
+    /**
+     * Validate file size
+     */
+    private static void validateFileSize(MultipartFile file) throws BusinessException {
+        if (file.getSize() > MAX_FILE_SIZE_BYTES) {
+            throw new BusinessException(ResponseEnum.AUDIO_FILE_SIZE_EXCEEDED);
+        }
+    }
+
+    /**
+     * Validate audio properties
+     */
+    private static void validateAudioProperties(MultipartFile file) throws BusinessException {
+        String filename = file.getOriginalFilename();
+        if (filename == null) {
+            return;
+        }
+
+        String extension = getFileExtension(filename).toLowerCase();
+
+        try {
+            // For WAV and PCM formats, Java Sound API can be used for detailed validation
+            if ("wav".equals(extension) || "pcm".equals(extension)) {
+                validateWavPcmProperties(file);
+            } else if ("mp3".equals(extension) || "m4a".equals(extension)) {
+                // For MP3 and M4A, only basic checks are performed currently
+                // Java Sound API has limited support for these formats
+                validateMp3M4aBasic(file);
+            }
+        } catch (IOException | UnsupportedAudioFileException e) {
+            log.warn("Audio file validation failed: {}", e.getMessage());
+            // For audio files that cannot be parsed, only basic checks are performed
+            validateBasicAudioProperties(file);
+        }
+    }
+
+    /**
+     * Validate audio properties for WAV and PCM formats
+     */
+    private static void validateWavPcmProperties(MultipartFile file) throws IOException, UnsupportedAudioFileException, BusinessException {
+        try (AudioInputStream audioInputStream = AudioSystem.getAudioInputStream(file.getInputStream())) {
+            AudioFormat format = getAudioFormat(audioInputStream);
+
+            // Check duration (within 40 seconds)
+            long frameLength = audioInputStream.getFrameLength();
+            float frameRate = format.getFrameRate();
+            if (frameRate > 0) {
+                float durationSeconds = frameLength / frameRate;
+                if (durationSeconds > MAX_DURATION_SECONDS) {
+                    throw new BusinessException(ResponseEnum.AUDIO_DURATION_TOO_LONG);
+                }
+            }
+        }
+    }
+
+    @NotNull
+    private static AudioFormat getAudioFormat(AudioInputStream audioInputStream) {
+        AudioFormat format = audioInputStream.getFormat();
+
+        // Check number of channels (mono)
+        if (format.getChannels() != REQUIRED_CHANNELS) {
+            throw new BusinessException(ResponseEnum.AUDIO_CHANNELS_INVALID);
+        }
+
+        // Check sample rate (24kHz and above)
+        if (format.getSampleRate() < MIN_SAMPLE_RATE) {
+            throw new BusinessException(ResponseEnum.AUDIO_SAMPLE_RATE_TOO_LOW);
+        }
+
+        // Check bit depth (16bit)
+        if (format.getSampleSizeInBits() != REQUIRED_SAMPLE_SIZE) {
+            throw new BusinessException(ResponseEnum.AUDIO_BIT_DEPTH_INVALID);
+        }
+        return format;
+    }
+
+    /**
+     * Validate basic properties for MP3 and M4A formats
+     */
+    private static void validateMp3M4aBasic(MultipartFile file) throws BusinessException {
+        // For MP3 and M4A, only basic validation can be performed currently
+        // Duration check is roughly estimated by file size (this is not a precise method, but it is a
+        // reasonable approximation without specialized libraries)
+        long fileSize = file.getSize();
+
+        // Rough estimate: 16bit mono 24kHz audio is approximately 48KB per second
+        // 40 seconds of audio is approximately 1.92MB, leaving some margin
+        long estimatedMaxSizeForDuration = (long) (MAX_DURATION_SECONDS * 48000 * 1.5);
+
+        if (fileSize > estimatedMaxSizeForDuration) {
+            log.warn("Audio file size {} exceeds expected, may be too long", fileSize);
+            // Do not throw exception because this is only a rough estimate
+        }
+    }
+
+    /**
+     * Validate basic audio properties (used when audio format cannot be parsed)
+     */
+    private static void validateBasicAudioProperties(MultipartFile file) throws BusinessException {
+        // Basic validation: file size reasonableness check
+        long fileSize = file.getSize();
+
+        // Ensure file is not too small (at least 1KB)
+        if (fileSize < 1024) {
+            throw new BusinessException(ResponseEnum.PARAM_ERROR);
+        }
+
+        log.info("Audio file passed basic validation, filename: {}, size: {} bytes", file.getOriginalFilename(), fileSize);
+    }
+
+    /**
+     * Get file extension
+     */
+    private static String getFileExtension(String filename) {
+        int lastDotIndex = filename.lastIndexOf('.');
+        if (lastDotIndex == -1 || lastDotIndex == filename.length() - 1) {
+            return "";
+        }
+        return filename.substring(lastDotIndex + 1);
+    }
+}
diff --git a/console/backend/commons/src/main/resources/messages_en.properties b/console/backend/commons/src/main/resources/messages_en.properties
@@ -453,7 +453,18 @@ language style, and scene information, strictly follow the role persona content
 %s\n\
 #Role Task:\n\
 %s
+error.personality.ai.generate.param.empty=AI personality generation parameter is empty
+error.personality.ai.generate.failed=AI personality generation failed
 
 # Default Bot Model Names
 default.bot.model.x1=Spark X1 Large Model
 default.bot.model.spark_4_0=Spark V4.0 Ultra Large Model
+
+# Audio validation related error messages
+error.audio.file.format.unsupported=Unsupported audio format, only supports: wav, mp3, m4a, pcm
+error.audio.file.size.exceeded=Audio file size cannot exceed 3MB
+error.audio.channels.invalid=Audio must be mono channel
+error.audio.sample.rate.too.low=Audio sample rate must be 24kHz or higher
+error.audio.bit.depth.invalid=Audio bit depth must be 16bit
+error.audio.duration.too.long=Audio duration cannot exceed 40 seconds
+error.speaker.train.failed=Sound training failed, please check if the audio file meets the requirements and if the corresponding ability has been authorized
diff --git a/console/backend/commons/src/main/resources/messages_zh.properties b/console/backend/commons/src/main/resources/messages_zh.properties
@@ -453,7 +453,18 @@ personality.prompt=按照角色人设和角色任务，扮演角色完成对话
 %s\n\
 #角色任务：\n\
 %s
+error.personality.ai.generate.param.empty=AI人设生成参数为空
+error.personality.ai.generate.failed=AI人设生成失败
 
 # 默认Bot模型名称
 default.bot.model.x1=星火大模型 Spark X1
 default.bot.model.spark_4_0=星火大模型 Spark V4.0 Ultra
+
+# 音频验证相关错误消息
+error.audio.file.format.unsupported=不支持的音频格式，仅支持: wav, mp3, m4a, pcm
+error.audio.file.size.exceeded=音频文件大小不能超过3MB
+error.audio.channels.invalid=音频必须为单通道
+error.audio.sample.rate.too.low=音频采样率必须为24kHz及以上
+error.audio.bit.depth.invalid=音频位深度必须为16bit
+error.audio.duration.too.long=音频时长不能超过40秒
+error.speaker.train.failed=声音训练失败,请检查音频文件是否符合要求,是否已授权对应能力
diff --git a/console/backend/commons/src/main/resources/speaker_en.properties b/console/backend/commons/src/main/resources/speaker_en.properties
@@ -0,0 +1,5 @@
+# speaker name
+speaker.lingXiaoTang=Xiaotang Ling
+speaker.lingXiaoYue=Xiaoyue Ling
+speaker.lingFeiZhe=Feizhe Ling
+speaker.lingXiaoQi=Xiaoqi Ling
diff --git a/console/backend/commons/src/main/resources/speaker_zh.properties b/console/backend/commons/src/main/resources/speaker_zh.properties
@@ -0,0 +1,5 @@
+# speaker name
+speaker.lingXiaoTang=聆小糖
+speaker.lingXiaoYue=聆小玥
+speaker.lingFeiZhe=聆飞哲
+speaker.lingXiaoQi=聆小琪
diff --git a/console/backend/hub/pom.xml b/console/backend/hub/pom.xml
@@ -152,6 +152,11 @@
             <artifactId>converter-jackson</artifactId>
         </dependency>
 
+        <dependency>
+            <groupId>cn.xfyun</groupId>
+            <artifactId>websdk-java-speech</artifactId>
+        </dependency>
+
     </dependencies>
 
     <build>

diff --git a/.../backend/hub/src/main/java/com/iflytek/astron/console/hub/config/InternationalConfig.java b/.../backend/hub/src/main/java/com/iflytek/astron/console/hub/config/InternationalConfig.java
@@ -1,6 +1,5 @@
 package com.iflytek.astron.console.hub.config;
 
-import java.util.Locale;
 import org.springframework.context.annotation.Bean;
 import org.springframework.context.annotation.Configuration;
 import org.springframework.context.support.ResourceBundleMessageSource;
@@ -10,6 +9,8 @@
 import org.springframework.web.servlet.i18n.AcceptHeaderLocaleResolver;
 import org.springframework.web.servlet.i18n.LocaleChangeInterceptor;
 
+import java.util.Locale;
+
 @Configuration
 public class InternationalConfig implements WebMvcConfigurer {
 
@@ -43,8 +44,9 @@ public void addInterceptors(InterceptorRegistry registry) {
     @Bean
     public ResourceBundleMessageSource messageSource() {
         ResourceBundleMessageSource messageSource = new ResourceBundleMessageSource();
-        // Set resource file base name, corresponding to messages.properties files under classpath
-        messageSource.setBasename("messages");
+        // Set resource file base names, corresponding to messages.properties and speaker.properties files
+        // under classpath
+        messageSource.setBasenames("messages", "speaker");
         // Set encoding format
         messageSource.setDefaultEncoding("UTF-8");
         // Whether to use default message when corresponding message is not found

diff --git a/...ole/backend/hub/src/main/java/com/iflytek/astron/console/hub/config/VoiceTrainConfig.java b/...ole/backend/hub/src/main/java/com/iflytek/astron/console/hub/config/VoiceTrainConfig.java
@@ -0,0 +1,21 @@
+package com.iflytek.astron.console.hub.config;
+
+import cn.xfyun.api.VoiceTrainClient;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+
+@Configuration
+public class VoiceTrainConfig {
+
+    @Value("${spark.app-id}")
+    private String appId;
+
+    @Value("${spark.api-key}")
+    private String apiKey;
+
+    @Bean
+    public VoiceTrainClient voiceTrainClient() {
+        return new VoiceTrainClient.Builder(appId, apiKey).build();
+    }
+}