完成火山引擎适配

This commit is contained in:
Maxchen
2025-11-07 11:54:11 +08:00
parent e402330692
commit cb26e452bb
15 changed files with 568 additions and 18 deletions

View File

@@ -19,6 +19,7 @@
<properties>
<easyexcel.version>3.2.1</easyexcel.version>
<jna.version>5.13.0</jna.version>
<java-websocket.version>1.5.5</java-websocket.version>
</properties>
<!-- 按照用户要求,不添加任何依赖 -->
@@ -84,5 +85,11 @@
<version>${jna.version}</version>
</dependency>
<dependency>
<groupId>org.java-websocket</groupId>
<artifactId>Java-WebSocket</artifactId>
<version>${java-websocket.version}</version>
</dependency>
</dependencies>
</project>

View File

@@ -1,4 +1,16 @@
package org.ruoyi.aihuman.config;
public class WebConfig {
import org.springframework.context.annotation.Configuration;
import org.springframework.web.servlet.config.annotation.ResourceHandlerRegistry;
import org.springframework.web.servlet.config.annotation.WebMvcConfigurer;
@Configuration
public class WebConfig implements WebMvcConfigurer {
@Override
public void addResourceHandlers(ResourceHandlerRegistry registry) {
// 映射/voice/**路径到classpath:/voice/目录
registry.addResourceHandler("/voice/**")
.addResourceLocations("classpath:/voice/")
.setCachePeriod(3600);
}
}

View File

@@ -1,4 +1,508 @@
package org.ruoyi.aihuman.controller;
import cn.dev33.satoken.annotation.SaIgnore;
import lombok.RequiredArgsConstructor;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
import org.springframework.validation.annotation.Validated;
import org.springframework.web.bind.annotation.*;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.core.io.ResourceLoader;
import org.springframework.beans.factory.annotation.Autowired;
import org.ruoyi.aihuman.domain.VoiceRequest;
import org.ruoyi.aihuman.protocol.EventType;
import org.ruoyi.aihuman.protocol.Message;
import org.ruoyi.aihuman.protocol.MsgType;
import org.ruoyi.aihuman.protocol.SpeechWebSocketClient;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.extern.slf4j.Slf4j;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Map;
import java.util.HashMap;
import java.util.UUID;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.concurrent.TimeUnit;
import org.springframework.context.annotation.Configuration;
import org.springframework.web.servlet.config.annotation.ResourceHandlerRegistry;
import org.springframework.web.servlet.config.annotation.WebMvcConfigurer;
/**
* 火山引擎相关接口
*
* @author ruoyi
*/
// 临时免登录
@SaIgnore
@Validated
@RequiredArgsConstructor
@Slf4j
@RestController
@RequestMapping("/aihuman/volcengine")
public class AihumanVolcengineController {
@Autowired
private ResourceLoader resourceLoader;
private static final ObjectMapper objectMapper = new ObjectMapper();
private static final Logger logger = LoggerFactory.getLogger(AihumanVolcengineController.class);
@PostMapping("/generate-voice-direct")
public ResponseEntity<byte[]> generateVoiceDirect(@RequestBody VoiceRequest request) {
try {
// 生成唯一的语音ID
String voiceId = UUID.randomUUID().toString().replace("-", "");
log.info("开始生成语音voiceId: {}", voiceId);
// 调用火山引擎TTS API获取音频数据
byte[] audioData = generateVoiceData(request, voiceId);
// 设置响应头,返回音频数据
HttpHeaders headers = new HttpHeaders();
headers.setContentType(MediaType.parseMediaType("audio/wav"));
headers.setContentDispositionFormData("attachment", "voice_" + System.currentTimeMillis() + ".wav");
headers.setContentLength(audioData.length);
log.info("语音生成成功并返回,长度: {} bytes", audioData.length);
return new ResponseEntity<>(audioData, headers, HttpStatus.OK);
} catch (Exception e) {
log.error("生成语音失败", e);
return new ResponseEntity<>(HttpStatus.INTERNAL_SERVER_ERROR);
}
}
private byte[] generateVoiceData(VoiceRequest request, String voiceId) {
try {
// 这里是调用火山引擎TTS API的核心逻辑
// 您需要根据火山引擎的API文档实现具体的调用逻辑
// 注意:这只是一个示例框架,您需要根据实际情况进行实现
// 调用火山引擎API并获取音频数据
// 假设您已经有现有的调用逻辑,这里保留原有的实现
String endpoint = request.getEndpoint();
String appId = request.getAppId();
String accessToken = request.getAccessToken();
String resourceId = request.getResourceId();
String voice = request.getVoice();
String text = request.getText();
String encoding = request.getEncoding();
// 调用原有的火山引擎API调用方法如果有
// 或者直接在这里实现API调用逻辑
byte[] audioData = callVolcengineTtsApiByte(endpoint, appId, accessToken,
resourceId, voice, text, encoding);
log.info("成功生成语音数据,大小: {} bytes", audioData.length);
return audioData;
} catch (Exception e) {
log.error("生成语音数据失败", e);
throw new RuntimeException("生成语音数据失败", e);
}
}
private byte[] callVolcengineTtsApiByte(String endpoint, String appId, String accessToken,
String resourceId, String voice, String text, String encoding) {
try {
// 确保resourceId不为空如果为空则根据voice类型获取默认值
if (resourceId == null || resourceId.isEmpty()) {
resourceId = voiceToResourceId(voice);
}
// 设置请求头
Map<String, String> headers = new HashMap<>();
headers.put("X-Api-App-Key", appId);
headers.put("X-Api-Access-Key", accessToken);
headers.put("X-Api-Resource-Id", resourceId);
headers.put("X-Api-Connect-Id", UUID.randomUUID().toString());
// 创建WebSocket客户端
SpeechWebSocketClient client = new SpeechWebSocketClient(new URI(endpoint), headers);
ByteArrayOutputStream totalAudioStream = new ByteArrayOutputStream();
boolean audioReceived = false;
try {
// 连接WebSocket
client.connectBlocking();
// 构建请求参数
Map<String, Object> request = new HashMap<>();
request.put("user", Map.of("uid", UUID.randomUUID().toString()));
request.put("namespace", "BidirectionalTTS");
Map<String, Object> reqParams = new HashMap<>();
reqParams.put("speaker", voice);
Map<String, Object> audioParams = new HashMap<>();
audioParams.put("format", encoding);
audioParams.put("sample_rate", 24000);
audioParams.put("enable_timestamp", true);
reqParams.put("audio_params", audioParams);
reqParams.put("additions", objectMapper.writeValueAsString(Map.of("disable_markdown_filter", false)));
request.put("req_params", reqParams);
// 开始连接
client.sendStartConnection();
// 等待连接成功
client.waitForMessage(MsgType.FULL_SERVER_RESPONSE, EventType.CONNECTION_STARTED);
// 处理每个句子
String[] sentences = text.split("");
for (int i = 0; i < sentences.length; i++) {
if (sentences[i].trim().isEmpty()) {
continue;
}
String sessionId = UUID.randomUUID().toString();
ByteArrayOutputStream sentenceAudioStream = new ByteArrayOutputStream();
// 开始会话
Map<String, Object> startReq = new HashMap<>();
startReq.put("user", request.get("user"));
startReq.put("namespace", request.get("namespace"));
startReq.put("req_params", request.get("req_params"));
startReq.put("event", EventType.START_SESSION.getValue());
client.sendStartSession(objectMapper.writeValueAsBytes(startReq), sessionId);
// 等待会话开始
client.waitForMessage(MsgType.FULL_SERVER_RESPONSE, EventType.SESSION_STARTED);
// 发送文本内容
for (char c : sentences[i].toCharArray()) {
@SuppressWarnings("unchecked")
Map<String, Object> currentReqParams = new HashMap<>((Map<String, Object>) request.get("req_params"));
currentReqParams.put("text", String.valueOf(c));
Map<String, Object> currentRequest = new HashMap<>();
currentRequest.put("user", request.get("user"));
currentRequest.put("namespace", request.get("namespace"));
currentRequest.put("req_params", currentReqParams);
currentRequest.put("event", EventType.TASK_REQUEST.getValue());
client.sendTaskRequest(objectMapper.writeValueAsBytes(currentRequest), sessionId);
}
// 结束会话
client.sendFinishSession(sessionId);
// 接收响应
while (true) {
Message msg = client.receiveMessage();
switch (msg.getType()) {
case FULL_SERVER_RESPONSE:
break;
case AUDIO_ONLY_SERVER:
if (!audioReceived && sentenceAudioStream.size() > 0) {
audioReceived = true;
}
if (msg.getPayload() != null) {
sentenceAudioStream.write(msg.getPayload());
}
break;
default:
// 不抛出异常,记录日志并继续处理
log.warn("Unexpected message type: {}", msg.getType());
}
if (msg.getEvent() == EventType.SESSION_FINISHED) {
break;
}
}
// 将当前句子的音频追加到总音频流
if (sentenceAudioStream.size() > 0) {
totalAudioStream.write(sentenceAudioStream.toByteArray());
}
}
// 验证是否收到音频数据
if (totalAudioStream.size() > 0) {
log.info("Audio data generated successfully, size: {} bytes", totalAudioStream.size());
return totalAudioStream.toByteArray();
} else {
throw new RuntimeException("No audio data received");
}
} finally {
// 结束连接
client.sendFinishConnection();
client.closeBlocking();
}
} catch (Exception e) {
log.error("Error calling Volcengine TTS API: {}", e.getMessage(), e);
throw new RuntimeException("Failed to generate voice", e);
}
}
/**
* 生成语音文件接口
* 用户传入JSON参数返回音频文件的播放地址
*/
@PostMapping("/generate-voice")
public ResponseEntity<?> generateVoice(@RequestBody VoiceRequest request) {
try {
// 1. 解析请求参数
String endpoint = request.getEndpoint();
String appId = request.getAppId();
String accessToken = request.getAccessToken();
String resourceId = request.getResourceId();
String voice = request.getVoice();
String text = request.getText();
String encoding = request.getEncoding();
// 1.1 验证必要参数
if (endpoint == null || endpoint.isEmpty()) {
return ResponseEntity.badRequest().body(Map.of("error", "endpoint cannot be null or empty"));
}
if (appId == null || appId.isEmpty()) {
return ResponseEntity.badRequest().body(Map.of("error", "appId cannot be null or empty"));
}
if (accessToken == null || accessToken.isEmpty()) {
return ResponseEntity.badRequest().body(Map.of("error", "accessToken cannot be null or empty"));
}
if (text == null || text.isEmpty()) {
return ResponseEntity.badRequest().body(Map.of("error", "text cannot be null or empty"));
}
// 1.2 设置默认值
if (encoding == null || encoding.isEmpty()) {
encoding = "mp3";
}
// 2. 调用火山引擎API生成音频文件
String audioUrl = callVolcengineTtsApi(endpoint, appId, accessToken, resourceId, voice, text, encoding);
// 3. 构造并返回响应
Map<String, String> response = new HashMap<>();
response.put("audioUrl", audioUrl);
return ResponseEntity.ok(response);
} catch (Exception e) {
// 处理异常情况
Map<String, String> errorResponse = new HashMap<>();
errorResponse.put("error", "生成音频文件失败: " + e.getMessage());
return ResponseEntity.status(500).body(errorResponse);
}
}
/**
* 调用火山引擎TTS API生成音频文件
*/
private String callVolcengineTtsApi(String endpoint, String appId, String accessToken,
String resourceId, String voice, String text, String encoding) {
try {
// 确保resourceId不为空如果为空则根据voice类型获取默认值
if (resourceId == null || resourceId.isEmpty()) {
resourceId = voiceToResourceId(voice);
}
// 生成唯一的文件名
String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMddHHmmss"));
String randomId = UUID.randomUUID().toString().substring(0, 8);
String fileName = "voice_" + timestamp + "_" + randomId + "." + encoding;
// 获取resources/voice目录路径
String voiceDirPath = getVoiceDirectoryPath();
File voiceDir = new File(voiceDirPath);
if (!voiceDir.exists()) {
voiceDir.mkdirs();
}
String filePath = voiceDirPath + File.separator + fileName;
// 设置请求头
Map<String, String> headers = new HashMap<>();
headers.put("X-Api-App-Key", appId);
headers.put("X-Api-Access-Key", accessToken);
headers.put("X-Api-Resource-Id", resourceId);
headers.put("X-Api-Connect-Id", UUID.randomUUID().toString());
// 创建WebSocket客户端
SpeechWebSocketClient client = new SpeechWebSocketClient(new URI(endpoint), headers);
ByteArrayOutputStream totalAudioStream = new ByteArrayOutputStream();
boolean audioReceived = false;
try {
// 连接WebSocket
client.connectBlocking();
// 构建请求参数
Map<String, Object> request = new HashMap<>();
request.put("user", Map.of("uid", UUID.randomUUID().toString()));
request.put("namespace", "BidirectionalTTS");
Map<String, Object> reqParams = new HashMap<>();
reqParams.put("speaker", voice);
Map<String, Object> audioParams = new HashMap<>();
audioParams.put("format", encoding);
audioParams.put("sample_rate", 24000);
audioParams.put("enable_timestamp", true);
reqParams.put("audio_params", audioParams);
reqParams.put("additions", objectMapper.writeValueAsString(Map.of("disable_markdown_filter", false)));
request.put("req_params", reqParams);
// 开始连接
client.sendStartConnection();
// 等待连接成功
client.waitForMessage(MsgType.FULL_SERVER_RESPONSE, EventType.CONNECTION_STARTED);
// 处理每个句子
String[] sentences = text.split("");
for (int i = 0; i < sentences.length; i++) {
if (sentences[i].trim().isEmpty()) {
continue;
}
String sessionId = UUID.randomUUID().toString();
ByteArrayOutputStream sentenceAudioStream = new ByteArrayOutputStream();
// 开始会话
Map<String, Object> startReq = new HashMap<>();
startReq.put("user", request.get("user"));
startReq.put("namespace", request.get("namespace"));
startReq.put("req_params", request.get("req_params"));
startReq.put("event", EventType.START_SESSION.getValue());
client.sendStartSession(objectMapper.writeValueAsBytes(startReq), sessionId);
// 等待会话开始
client.waitForMessage(MsgType.FULL_SERVER_RESPONSE, EventType.SESSION_STARTED);
// 发送文本内容
for (char c : sentences[i].toCharArray()) {
@SuppressWarnings("unchecked")
Map<String, Object> currentReqParams = new HashMap<>((Map<String, Object>) request.get("req_params"));
currentReqParams.put("text", String.valueOf(c));
Map<String, Object> currentRequest = new HashMap<>();
currentRequest.put("user", request.get("user"));
currentRequest.put("namespace", request.get("namespace"));
currentRequest.put("req_params", currentReqParams);
currentRequest.put("event", EventType.TASK_REQUEST.getValue());
client.sendTaskRequest(objectMapper.writeValueAsBytes(currentRequest), sessionId);
}
// 结束会话
client.sendFinishSession(sessionId);
// 接收响应
while (true) {
Message msg = client.receiveMessage();
switch (msg.getType()) {
case FULL_SERVER_RESPONSE:
break;
case AUDIO_ONLY_SERVER:
if (!audioReceived && sentenceAudioStream.size() > 0) {
audioReceived = true;
}
if (msg.getPayload() != null) {
sentenceAudioStream.write(msg.getPayload());
}
break;
default:
// 不抛出异常,记录日志并继续处理
log.warn("Unexpected message type: {}", msg.getType());
}
if (msg.getEvent() == EventType.SESSION_FINISHED) {
break;
}
}
// 将当前句子的音频追加到总音频流
if (sentenceAudioStream.size() > 0) {
totalAudioStream.write(sentenceAudioStream.toByteArray());
}
}
// 保存音频文件
if (totalAudioStream.size() > 0) {
Files.write(Paths.get(filePath), totalAudioStream.toByteArray(), StandardOpenOption.CREATE);
log.info("Audio saved to file: {}", filePath);
} else {
throw new RuntimeException("No audio data received");
}
// 结束连接
client.sendFinishConnection();
} finally {
client.closeBlocking();
}
// 返回音频文件的访问路径
return "/voice/" + fileName;
} catch (Exception e) {
log.error("Error calling Volcengine TTS API: {}", e.getMessage(), e);
throw new RuntimeException("Failed to generate voice", e);
}
}
/**
* 根据voice类型获取resourceId
*/
private String voiceToResourceId(String voice) {
if (voice != null && voice.startsWith("S_")) {
return "volc.megatts.default";
}
return "volc.service_type.10029";
}
/**
* 获取voice目录路径
*/
private String getVoiceDirectoryPath() {
try {
// 获取当前项目根目录
String projectRoot = System.getProperty("user.dir");
// 构建目标目录路径ruoyi-ai/ruoyi-modules/ruoyi-aihuman/src/main/resources/voice
File targetDir = new File(projectRoot, "ruoyi-modules/ruoyi-aihuman/src/main/resources/voice");
// 确保目录存在
if (!targetDir.exists()) {
boolean created = targetDir.mkdirs();
if (created) {
logger.info("成功创建目录: {}", targetDir.getAbsolutePath());
} else {
logger.warn("无法创建目录: {}", targetDir.getAbsolutePath());
// 降级方案直接使用项目根目录下的voice文件夹
File fallbackDir = new File(projectRoot, "voice");
if (!fallbackDir.exists()) {
fallbackDir.mkdirs();
}
return fallbackDir.getAbsolutePath();
}
}
return targetDir.getAbsolutePath();
} catch (Exception e) {
logger.error("获取音频目录路径失败", e);
// 异常情况下的安全降级
File safeDir = new File("voice");
if (!safeDir.exists()) {
safeDir.mkdirs();
}
return safeDir.getAbsolutePath();
}
}
}

View File

@@ -1,4 +1,21 @@
package org.ruoyi.aihuman.domain;
import com.fasterxml.jackson.annotation.JsonProperty;
import lombok.Data;
/**
* 语音请求参数实体类
*/
@Data
public class VoiceRequest {
@JsonProperty("ENDPOINT")
private String endpoint;
private String appId;
private String accessToken;
private String resourceId;
private String voice;
private String text;
private String encoding;
}

View File

@@ -1,4 +1,4 @@
package com.speech.protocol;
package org.ruoyi.aihuman.protocol;
import lombok.Getter;

View File

@@ -1,4 +1,4 @@
package com.speech.protocol;
package org.ruoyi.aihuman.protocol;
import lombok.Getter;

View File

@@ -1,4 +1,4 @@
package com.speech.protocol;
package org.ruoyi.aihuman.protocol;
import lombok.Getter;

View File

@@ -1,4 +1,4 @@
package com.speech.protocol;
package org.ruoyi.aihuman.protocol;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;

View File

@@ -1,4 +1,4 @@
package com.speech.protocol;
package org.ruoyi.aihuman.protocol;
import lombok.Getter;

View File

@@ -1,4 +1,4 @@
package com.speech.protocol;
package org.ruoyi.aihuman.protocol;
import lombok.Getter;

View File

@@ -1,4 +1,4 @@
package com.speech.protocol;
package org.ruoyi.aihuman.protocol;
import lombok.Getter;

View File

@@ -1,4 +1,4 @@
package com.speech.protocol;
package org.ruoyi.aihuman.protocol;
import lombok.extern.slf4j.Slf4j;
import org.java_websocket.client.WebSocketClient;

View File

@@ -1,4 +1,4 @@
package com.speech.protocol;
package org.ruoyi.aihuman.protocol;
import lombok.Getter;

View File

@@ -1,10 +1,10 @@
package com.speech.volcengine;
package org.ruoyi.aihuman.volcengine;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.speech.protocol.EventType;
import com.speech.protocol.Message;
import com.speech.protocol.MsgType;
import com.speech.protocol.SpeechWebSocketClient;
import org.ruoyi.aihuman.protocol.EventType;
import org.ruoyi.aihuman.protocol.Message;
import org.ruoyi.aihuman.protocol.MsgType;
import org.ruoyi.aihuman.protocol.SpeechWebSocketClient;
import lombok.extern.slf4j.Slf4j;
import java.io.ByteArrayOutputStream;
@@ -40,7 +40,7 @@ public class Bidirection {
String accessToken = System.getProperty("accessToken", "fOHuq4R4dirMYiOruCU3Ek9q75zV0KVW");
String resourceId = System.getProperty("resourceId", "seed-tts-2.0");
String voice = System.getProperty("voice", "zh_female_vv_uranus_bigtts");
String text = System.getProperty("text", "你好呀!如果你有关于老婆相关的问题,比如怎么让她开心、怎么照顾她等,都可以跟我说哦,我会根据【马斯克·陈】提供的关爱老婆百事通里的信息给你分析和建议哒");
String text = System.getProperty("text", "你好呀!我是AI合成的语音很高兴认识你");
String encoding = System.getProperty("encoding", "mp3");
if (appId.isEmpty() || accessToken.isEmpty()) {

View File

@@ -0,0 +1,10 @@
# 代码生成
gen:
# 作者
author: ageerle
# 默认生成包路径 system 需改成自己的模块名称 如 system monitor tool
packageName: org.ruoyi.aihuman
# 自动去除表前缀默认是false
autoRemovePre: false
# 表前缀(生成类名不会包含表前缀,多个用逗号分隔)
tablePrefix: aihuman_