mirror of
https://gitcode.com/ageerle/ruoyi-ai.git
synced 2026-04-10 18:27:07 +00:00
add:添加火山引擎语音合成
This commit is contained in:
@@ -0,0 +1,4 @@
|
||||
package org.ruoyi.aihuman.config;
|
||||
|
||||
public class WebConfig {
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
package org.ruoyi.aihuman.controller;
|
||||
|
||||
public class AihumanVolcengineController {
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
package org.ruoyi.aihuman.domain;
|
||||
|
||||
public class VoiceRequest {
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
package com.speech.protocol;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
@Getter
|
||||
public enum CompressionBits {
|
||||
None_((byte) 0),
|
||||
Gzip((byte) 0b1),
|
||||
Custom((byte) 0b11),
|
||||
;
|
||||
|
||||
private final byte value;
|
||||
|
||||
CompressionBits(byte b) {
|
||||
this.value = b;
|
||||
}
|
||||
|
||||
public static CompressionBits fromValue(int value) {
|
||||
for (CompressionBits type : CompressionBits.values()) {
|
||||
if (type.value == value) {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("Unknown CompressionBits value: " + value);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,90 @@
|
||||
package com.speech.protocol;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
@Getter
|
||||
public enum EventType {
|
||||
// Default event
|
||||
NONE(0),
|
||||
|
||||
// Upstream Connection events (1-49)
|
||||
START_CONNECTION(1),
|
||||
START_TASK(1),
|
||||
FINISH_CONNECTION(2),
|
||||
FINISH_TASK(2),
|
||||
|
||||
// Downstream Connection events (50-99)
|
||||
CONNECTION_STARTED(50),
|
||||
TASK_STARTED(50),
|
||||
CONNECTION_FAILED(51),
|
||||
TASK_FAILED(51),
|
||||
CONNECTION_FINISHED(52),
|
||||
TASK_FINISHED(52),
|
||||
|
||||
// Upstream Session events (100-149)
|
||||
START_SESSION(100),
|
||||
CANCEL_SESSION(101),
|
||||
FINISH_SESSION(102),
|
||||
|
||||
// Downstream Session events (150-199)
|
||||
SESSION_STARTED(150),
|
||||
SESSION_CANCELED(151),
|
||||
SESSION_FINISHED(152),
|
||||
SESSION_FAILED(153),
|
||||
USAGE_RESPONSE(154),
|
||||
CHARGE_DATA(154),
|
||||
|
||||
// Upstream General events (200-249)
|
||||
TASK_REQUEST(200),
|
||||
UPDATE_CONFIG(201),
|
||||
|
||||
// Downstream General events (250-299)
|
||||
AUDIO_MUTED(250),
|
||||
|
||||
// Upstream TTS events (300-349)
|
||||
SAY_HELLO(300),
|
||||
|
||||
// Downstream TTS events (350-399)
|
||||
TTS_SENTENCE_START(350),
|
||||
TTS_SENTENCE_END(351),
|
||||
TTS_RESPONSE(352),
|
||||
TTS_ENDED(359),
|
||||
PODCAST_ROUND_START(360),
|
||||
PODCAST_ROUND_RESPONSE(361),
|
||||
PODCAST_ROUND_END(362),
|
||||
|
||||
// Downstream ASR events (450-499)
|
||||
ASR_INFO(450),
|
||||
ASR_RESPONSE(451),
|
||||
ASR_ENDED(459),
|
||||
|
||||
// Upstream Chat events (500-549)
|
||||
CHAT_TTS_TEXT(500),
|
||||
|
||||
// Downstream Chat events (550-599)
|
||||
CHAT_RESPONSE(550),
|
||||
CHAT_ENDED(559),
|
||||
|
||||
// Subtitle events (650-699)
|
||||
SOURCE_SUBTITLE_START(650),
|
||||
SOURCE_SUBTITLE_RESPONSE(651),
|
||||
SOURCE_SUBTITLE_END(652),
|
||||
TRANSLATION_SUBTITLE_START(653),
|
||||
TRANSLATION_SUBTITLE_RESPONSE(654),
|
||||
TRANSLATION_SUBTITLE_END(655);
|
||||
|
||||
private final int value;
|
||||
|
||||
EventType(int value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public static EventType fromValue(int value) {
|
||||
for (EventType type : EventType.values()) {
|
||||
if (type.value == value) {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("Unknown EventType value: " + value);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
package com.speech.protocol;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
@Getter
|
||||
public enum HeaderSizeBits {
|
||||
HeaderSize4((byte) 1),
|
||||
HeaderSize8((byte) 2),
|
||||
HeaderSize12((byte) 3),
|
||||
HeaderSize16((byte) 4),
|
||||
;
|
||||
|
||||
private final byte value;
|
||||
|
||||
HeaderSizeBits(byte b) {
|
||||
this.value = b;
|
||||
}
|
||||
|
||||
public static HeaderSizeBits fromValue(int value) {
|
||||
for (HeaderSizeBits type : HeaderSizeBits.values()) {
|
||||
if (type.value == value) {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("Unknown HeaderSizeBits value: " + value);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,220 @@
|
||||
package com.speech.protocol;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
@Slf4j
|
||||
@Data
|
||||
public class Message {
|
||||
private byte version = VersionBits.Version1.getValue();
|
||||
private byte headerSize = HeaderSizeBits.HeaderSize4.getValue();
|
||||
private MsgType type;
|
||||
private MsgTypeFlagBits flag;
|
||||
private byte serialization = SerializationBits.JSON.getValue();
|
||||
private byte compression = 0;
|
||||
|
||||
private EventType event;
|
||||
private String sessionId;
|
||||
private String connectId;
|
||||
private int sequence;
|
||||
private int errorCode;
|
||||
|
||||
private byte[] payload;
|
||||
|
||||
public Message(MsgType type, MsgTypeFlagBits flag) {
|
||||
this.type = type;
|
||||
this.flag = flag;
|
||||
}
|
||||
|
||||
public static Message unmarshal(byte[] data) throws Exception {
|
||||
ByteBuffer buffer = ByteBuffer.wrap(data);
|
||||
|
||||
byte type_and_flag = data[1];
|
||||
MsgType type = MsgType.fromValue((type_and_flag >> 4) & 0x0F);
|
||||
MsgTypeFlagBits flag = MsgTypeFlagBits.fromValue(type_and_flag & 0x0F);
|
||||
|
||||
// Read version and header size
|
||||
int versionAndHeaderSize = buffer.get();
|
||||
VersionBits version = VersionBits.fromValue((versionAndHeaderSize >> 4) & 0x0F);
|
||||
HeaderSizeBits headerSize = HeaderSizeBits.fromValue(versionAndHeaderSize & 0x0F);
|
||||
|
||||
// Skip second byte
|
||||
buffer.get();
|
||||
|
||||
// Read serialization and compression method
|
||||
int serializationCompression = buffer.get();
|
||||
SerializationBits serialization = SerializationBits.fromValue((serializationCompression >> 4) & 0x0F);
|
||||
CompressionBits compression = CompressionBits.fromValue(serializationCompression & 0x0F);
|
||||
|
||||
// Skip padding bytes
|
||||
int headerSizeInt = 4 * (int) headerSize.getValue();
|
||||
int paddingSize = headerSizeInt - 3;
|
||||
while (paddingSize > 0) {
|
||||
buffer.get();
|
||||
paddingSize -= 1;
|
||||
}
|
||||
|
||||
Message message = new Message(type, flag);
|
||||
message.setVersion(version.getValue());
|
||||
message.setHeaderSize(headerSize.getValue());
|
||||
message.setSerialization(serialization.getValue());
|
||||
message.setCompression(compression.getValue());
|
||||
|
||||
// Read sequence if present
|
||||
if (flag == MsgTypeFlagBits.POSITIVE_SEQ || flag == MsgTypeFlagBits.NEGATIVE_SEQ) {
|
||||
// Read 4 bytes from ByteBuffer and parse as int (big-endian)
|
||||
byte[] sequeueBytes = new byte[4];
|
||||
if (buffer.remaining() >= 4) {
|
||||
buffer.get(sequeueBytes); // Read 4 bytes into array
|
||||
ByteBuffer wrapper = ByteBuffer.wrap(sequeueBytes);
|
||||
wrapper.order(ByteOrder.BIG_ENDIAN); // Set big-endian order
|
||||
message.setSequence(wrapper.getInt());
|
||||
}
|
||||
}
|
||||
|
||||
// Read event if present
|
||||
if (flag == MsgTypeFlagBits.WITH_EVENT) {
|
||||
// Read 4 bytes from ByteBuffer and parse as int (big-endian)
|
||||
byte[] eventBytes = new byte[4];
|
||||
if (buffer.remaining() >= 4) {
|
||||
buffer.get(eventBytes); // Read 4 bytes into array
|
||||
ByteBuffer wrapper = ByteBuffer.wrap(eventBytes);
|
||||
wrapper.order(ByteOrder.BIG_ENDIAN); // Set big-endian order
|
||||
message.setEvent(EventType.fromValue(wrapper.getInt()));
|
||||
}
|
||||
|
||||
if (type != MsgType.ERROR && !(message.event == EventType.START_CONNECTION
|
||||
|| message.event == EventType.FINISH_CONNECTION ||
|
||||
message.event == EventType.CONNECTION_STARTED
|
||||
|| message.event == EventType.CONNECTION_FAILED ||
|
||||
message.event == EventType.CONNECTION_FINISHED)) {
|
||||
// Read sessionId if present
|
||||
int sessionIdLength = buffer.getInt();
|
||||
if (sessionIdLength > 0) {
|
||||
byte[] sessionIdBytes = new byte[sessionIdLength];
|
||||
buffer.get(sessionIdBytes);
|
||||
message.setSessionId(new String(sessionIdBytes, StandardCharsets.UTF_8));
|
||||
}
|
||||
}
|
||||
|
||||
if (message.event == EventType.CONNECTION_STARTED || message.event == EventType.CONNECTION_FAILED
|
||||
|| message.event == EventType.CONNECTION_FINISHED) {
|
||||
// Read connectId if present
|
||||
int connectIdLength = buffer.getInt();
|
||||
if (connectIdLength > 0) {
|
||||
byte[] connectIdBytes = new byte[connectIdLength];
|
||||
buffer.get(connectIdBytes);
|
||||
message.setConnectId(new String(connectIdBytes, StandardCharsets.UTF_8));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Read errorCode if present
|
||||
if (type == MsgType.ERROR) {
|
||||
// Read 4 bytes from ByteBuffer and parse as int (big-endian)
|
||||
byte[] errorCodeBytes = new byte[4];
|
||||
if (buffer.remaining() >= 4) {
|
||||
buffer.get(errorCodeBytes); // Read 4 bytes into array
|
||||
ByteBuffer wrapper = ByteBuffer.wrap(errorCodeBytes);
|
||||
wrapper.order(ByteOrder.BIG_ENDIAN); // Set big-endian order
|
||||
message.setErrorCode(wrapper.getInt());
|
||||
}
|
||||
}
|
||||
|
||||
// Read remaining bytes as payload
|
||||
if (buffer.remaining() > 0) {
|
||||
// 4 bytes length
|
||||
int payloadLength = buffer.getInt();
|
||||
if (payloadLength > 0) {
|
||||
byte[] payloadBytes = new byte[payloadLength];
|
||||
buffer.get(payloadBytes);
|
||||
message.setPayload(payloadBytes);
|
||||
}
|
||||
}
|
||||
|
||||
return message;
|
||||
}
|
||||
|
||||
public byte[] marshal() throws Exception {
|
||||
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
|
||||
|
||||
// Write header
|
||||
buffer.write((version & 0x0F) << 4 | (headerSize & 0x0F));
|
||||
buffer.write((type.getValue() & 0x0F) << 4 | (flag.getValue() & 0x0F));
|
||||
buffer.write((serialization & 0x0F) << 4 | (compression & 0x0F));
|
||||
|
||||
int headerSizeInt = 4 * (int) headerSize;
|
||||
int padding = headerSizeInt - buffer.size();
|
||||
while (padding > 0) {
|
||||
buffer.write(0);
|
||||
padding -= 1;
|
||||
}
|
||||
|
||||
// Write event if present
|
||||
if (event != null) {
|
||||
byte[] eventBytes = ByteBuffer.allocate(4).putInt(event.getValue()).array();
|
||||
buffer.write(eventBytes);
|
||||
}
|
||||
|
||||
// Write sessionId if present
|
||||
if (sessionId != null) {
|
||||
byte[] sessionIdBytes = sessionId.getBytes(StandardCharsets.UTF_8);
|
||||
buffer.write(ByteBuffer.allocate(4).putInt(sessionIdBytes.length).array());
|
||||
buffer.write(sessionIdBytes);
|
||||
}
|
||||
|
||||
// Write connectId if present
|
||||
if (connectId != null) {
|
||||
byte[] connectIdBytes = connectId.getBytes(StandardCharsets.UTF_8);
|
||||
buffer.write(ByteBuffer.allocate(4).putInt(connectIdBytes.length).array());
|
||||
buffer.write(connectIdBytes);
|
||||
}
|
||||
|
||||
// Write sequence if present
|
||||
if (sequence != 0) {
|
||||
buffer.write(ByteBuffer.allocate(4).putInt(sequence).array());
|
||||
}
|
||||
|
||||
// Write errorCode if present
|
||||
if (errorCode != 0) {
|
||||
buffer.write(ByteBuffer.allocate(4).putInt(errorCode).array());
|
||||
}
|
||||
|
||||
// Write payload if present
|
||||
if (payload != null && payload.length > 0) {
|
||||
buffer.write(ByteBuffer.allocate(4).putInt(payload.length).array());
|
||||
buffer.write(payload);
|
||||
}
|
||||
return buffer.toByteArray();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
switch (this.type) {
|
||||
case AUDIO_ONLY_SERVER:
|
||||
case AUDIO_ONLY_CLIENT:
|
||||
if (this.flag == MsgTypeFlagBits.POSITIVE_SEQ || this.flag == MsgTypeFlagBits.NEGATIVE_SEQ) {
|
||||
return String.format("MsgType: %s, EventType: %s, Sequence: %d, PayloadSize: %d", this.type, this.event, this.sequence,
|
||||
this.payload != null ? this.payload.length : 0);
|
||||
}
|
||||
return String.format("MsgType: %s, EventType: %s, PayloadSize: %d", this.type, this.event,
|
||||
this.payload != null ? this.payload.length : 0);
|
||||
case ERROR:
|
||||
return String.format("MsgType: %s, EventType: %s, ErrorCode: %d, Payload: %s", this.type, this.event, this.errorCode,
|
||||
this.payload != null ? new String(this.payload) : "null");
|
||||
default:
|
||||
if (this.flag == MsgTypeFlagBits.POSITIVE_SEQ || this.flag == MsgTypeFlagBits.NEGATIVE_SEQ) {
|
||||
return String.format("MsgType: %s, EventType: %s, Sequence: %d, Payload: %s",
|
||||
this.type, this.event, this.sequence,
|
||||
this.payload != null ? new String(this.payload) : "null");
|
||||
}
|
||||
return String.format("MsgType: %s, EventType: %s, Payload: %s", this.type, this.event,
|
||||
this.payload != null ? new String(this.payload) : "null");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
package com.speech.protocol;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
@Getter
|
||||
public enum MsgType {
|
||||
INVALID((byte) 0),
|
||||
FULL_CLIENT_REQUEST((byte) 0b1),
|
||||
AUDIO_ONLY_CLIENT((byte) 0b10),
|
||||
FULL_SERVER_RESPONSE((byte) 0b1001),
|
||||
AUDIO_ONLY_SERVER((byte) 0b1011),
|
||||
FRONT_END_RESULT_SERVER((byte) 0b1100),
|
||||
ERROR((byte) 0b1111);
|
||||
|
||||
private final byte value;
|
||||
|
||||
MsgType(byte value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public static MsgType fromValue(int value) {
|
||||
for (MsgType type : MsgType.values()) {
|
||||
if (type.value == value) {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("Unknown MsgType value: " + value);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
package com.speech.protocol;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
@Getter
|
||||
public enum MsgTypeFlagBits {
|
||||
NO_SEQ((byte) 0), // Non-terminating packet without sequence number
|
||||
POSITIVE_SEQ((byte) 0b1), // Non-terminating packet with positive sequence number
|
||||
LAST_NO_SEQ((byte) 0b10), // Terminating packet without sequence number
|
||||
NEGATIVE_SEQ((byte) 0b11), // Terminating packet with negative sequence number
|
||||
WITH_EVENT((byte) 0b100); // Packet containing event number
|
||||
|
||||
private final byte value;
|
||||
|
||||
MsgTypeFlagBits(byte value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public static MsgTypeFlagBits fromValue(int value) {
|
||||
for (MsgTypeFlagBits flag : MsgTypeFlagBits.values()) {
|
||||
if (flag.value == value) {
|
||||
return flag;
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("Unknown MsgTypeFlagBits value: " + value);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
package com.speech.protocol;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
@Getter
|
||||
public enum SerializationBits {
|
||||
Raw((byte) 0),
|
||||
JSON((byte) 0b1),
|
||||
Thrift((byte) 0b11),
|
||||
Custom((byte) 0b1111),
|
||||
;
|
||||
|
||||
private final byte value;
|
||||
|
||||
SerializationBits(byte b) {
|
||||
this.value = b;
|
||||
}
|
||||
|
||||
public static SerializationBits fromValue(int value) {
|
||||
for (SerializationBits type : SerializationBits.values()) {
|
||||
if (type.value == value) {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("Unknown SerializationBits value: " + value);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,115 @@
|
||||
package com.speech.protocol;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.java_websocket.client.WebSocketClient;
|
||||
import org.java_websocket.handshake.ServerHandshake;
|
||||
|
||||
import java.net.URI;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
|
||||
@Slf4j
|
||||
public class SpeechWebSocketClient extends WebSocketClient {
|
||||
private final BlockingQueue<Message> messageQueue = new LinkedBlockingQueue<>();
|
||||
|
||||
public SpeechWebSocketClient(URI serverUri, Map<String, String> headers) {
|
||||
super(serverUri, headers);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onOpen(ServerHandshake handshakedata) {
|
||||
log.info("WebSocket connection established, Logid: {}", handshakedata.getFieldValue("x-tt-logid"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onMessage(String message) {
|
||||
log.warn("Received unexpected text message: {}", message);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onMessage(ByteBuffer bytes) {
|
||||
try {
|
||||
Message message = Message.unmarshal(bytes.array());
|
||||
messageQueue.put(message);
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to parse message", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onClose(int code, String reason, boolean remote) {
|
||||
log.info("WebSocket connection closed: code={}, reason={}, remote={}", code, reason, remote);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onError(Exception ex) {
|
||||
log.error("WebSocket error", ex);
|
||||
}
|
||||
|
||||
public void sendStartConnection() throws Exception {
|
||||
Message message = new Message(MsgType.FULL_CLIENT_REQUEST, MsgTypeFlagBits.WITH_EVENT);
|
||||
message.setEvent(EventType.START_CONNECTION);
|
||||
message.setPayload("{}".getBytes());
|
||||
sendMessage(message);
|
||||
}
|
||||
|
||||
public void sendFinishConnection() throws Exception {
|
||||
Message message = new Message(MsgType.FULL_CLIENT_REQUEST, MsgTypeFlagBits.WITH_EVENT);
|
||||
message.setEvent(EventType.FINISH_CONNECTION);
|
||||
sendMessage(message);
|
||||
}
|
||||
|
||||
public void sendStartSession(byte[] payload, String sessionId) throws Exception {
|
||||
Message message = new Message(MsgType.FULL_CLIENT_REQUEST, MsgTypeFlagBits.WITH_EVENT);
|
||||
message.setEvent(EventType.START_SESSION);
|
||||
message.setSessionId(sessionId);
|
||||
message.setPayload(payload);
|
||||
sendMessage(message);
|
||||
}
|
||||
|
||||
public void sendFinishSession(String sessionId) throws Exception {
|
||||
Message message = new Message(MsgType.FULL_CLIENT_REQUEST, MsgTypeFlagBits.WITH_EVENT);
|
||||
message.setEvent(EventType.FINISH_SESSION);
|
||||
message.setSessionId(sessionId);
|
||||
message.setPayload("{}".getBytes());
|
||||
sendMessage(message);
|
||||
}
|
||||
|
||||
public void sendTaskRequest(byte[] payload, String sessionId) throws Exception {
|
||||
Message message = new Message(MsgType.FULL_CLIENT_REQUEST, MsgTypeFlagBits.WITH_EVENT);
|
||||
message.setEvent(EventType.TASK_REQUEST);
|
||||
message.setSessionId(sessionId);
|
||||
message.setPayload(payload);
|
||||
sendMessage(message);
|
||||
}
|
||||
|
||||
public void sendFullClientMessage(byte[] payload) throws Exception {
|
||||
Message message = new Message(MsgType.FULL_CLIENT_REQUEST, MsgTypeFlagBits.NO_SEQ);
|
||||
message.setPayload(payload);
|
||||
sendMessage(message);
|
||||
}
|
||||
|
||||
public void sendMessage(Message message) throws Exception {
|
||||
log.info("Send: {}", message);
|
||||
send(message.marshal());
|
||||
}
|
||||
|
||||
public Message receiveMessage() throws InterruptedException {
|
||||
Message message = messageQueue.take();
|
||||
log.info("Receive: {}", message);
|
||||
return message;
|
||||
}
|
||||
|
||||
public Message waitForMessage(MsgType type, EventType event) throws InterruptedException {
|
||||
while (true) {
|
||||
Message message = receiveMessage();
|
||||
if (message.getType() == type && message.getEvent() == event) {
|
||||
return message;
|
||||
} else {
|
||||
throw new RuntimeException("Unexpected message: " + message);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
package com.speech.protocol;
|
||||
|
||||
import lombok.Getter;
|
||||
|
||||
@Getter
|
||||
public enum VersionBits {
|
||||
Version1((byte) 1),
|
||||
Version2((byte) 2),
|
||||
Version3((byte) 3),
|
||||
Version4((byte) 4),
|
||||
;
|
||||
|
||||
private final byte value;
|
||||
|
||||
VersionBits(byte b) {
|
||||
this.value = b;
|
||||
}
|
||||
|
||||
public static VersionBits fromValue(int value) {
|
||||
for (VersionBits type : VersionBits.values()) {
|
||||
if (type.value == value) {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("Unknown VersionBits value: " + value);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
package org.ruoyi.aihuman.service;
|
||||
|
||||
public interface AihumanVolcengineService {
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
package org.ruoyi.aihuman.service.impl;
|
||||
|
||||
public class AihumanVolcengineServiceImpl {
|
||||
}
|
||||
@@ -0,0 +1,160 @@
|
||||
package com.speech.volcengine;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.speech.protocol.EventType;
|
||||
import com.speech.protocol.Message;
|
||||
import com.speech.protocol.MsgType;
|
||||
import com.speech.protocol.SpeechWebSocketClient;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.net.URI;
|
||||
import java.nio.file.Files;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
||||
@Slf4j
|
||||
public class Bidirection {
|
||||
private static final String ENDPOINT = "wss://openspeech.bytedance.com/api/v3/tts/bidirection";
|
||||
private static final ObjectMapper objectMapper = new ObjectMapper();
|
||||
|
||||
/**
|
||||
* Get resource ID based on voice type
|
||||
*
|
||||
* @param voice Voice type string
|
||||
* @return Corresponding resource ID
|
||||
*/
|
||||
public static String voiceToResourceId(String voice) {
|
||||
// Map different voice types to resource IDs based on actual needs
|
||||
if (voice.startsWith("S_")) {
|
||||
return "volc.megatts.default";
|
||||
}
|
||||
return "volc.service_type.10029";
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
// Configure parameters
|
||||
String appId = System.getProperty("appId", "1055299334");
|
||||
String accessToken = System.getProperty("accessToken", "fOHuq4R4dirMYiOruCU3Ek9q75zV0KVW");
|
||||
String resourceId = System.getProperty("resourceId", "seed-tts-2.0");
|
||||
String voice = System.getProperty("voice", "zh_female_vv_uranus_bigtts");
|
||||
String text = System.getProperty("text", "你好呀!如果你有关于老婆相关的问题,比如怎么让她开心、怎么照顾她等,都可以跟我说哦,我会根据【马斯克·陈】提供的关爱老婆百事通里的信息给你分析和建议哒。");
|
||||
String encoding = System.getProperty("encoding", "mp3");
|
||||
|
||||
if (appId.isEmpty() || accessToken.isEmpty()) {
|
||||
throw new IllegalArgumentException("Please set appId and accessToken system properties");
|
||||
}
|
||||
|
||||
// Set request headers
|
||||
Map<String, String> headers = Map.of(
|
||||
"X-Api-App-Key", appId,
|
||||
"X-Api-Access-Key", accessToken,
|
||||
"X-Api-Resource-Id", resourceId.isEmpty() ? voiceToResourceId(voice) : resourceId,
|
||||
"X-Api-Connect-Id", UUID.randomUUID().toString());
|
||||
|
||||
// Create WebSocket client
|
||||
SpeechWebSocketClient client = new SpeechWebSocketClient(new URI(ENDPOINT), headers);
|
||||
try {
|
||||
client.connectBlocking();
|
||||
Map<String, Object> request = Map.of(
|
||||
"user", Map.of("uid", UUID.randomUUID().toString()),
|
||||
"namespace", "BidirectionalTTS",
|
||||
"req_params", Map.of(
|
||||
"speaker", voice,
|
||||
"audio_params", Map.of(
|
||||
"format", encoding,
|
||||
"sample_rate", 24000,
|
||||
"enable_timestamp", true),
|
||||
// additions requires a JSON string
|
||||
"additions", objectMapper.writeValueAsString(Map.of(
|
||||
"disable_markdown_filter", false))));
|
||||
|
||||
// Start connection
|
||||
client.sendStartConnection();
|
||||
// Wait for connection started
|
||||
client.waitForMessage(MsgType.FULL_SERVER_RESPONSE, EventType.CONNECTION_STARTED);
|
||||
|
||||
// Process each sentence
|
||||
String[] sentences = text.split("。");
|
||||
boolean audioReceived = false;
|
||||
for (int i = 0; i < sentences.length; i++) {
|
||||
if (sentences[i].trim().isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
String sessionId = UUID.randomUUID().toString();
|
||||
ByteArrayOutputStream audioStream = new ByteArrayOutputStream();
|
||||
|
||||
// Start session
|
||||
Map<String, Object> startReq = Map.of(
|
||||
"user", request.get("user"),
|
||||
"namespace", request.get("namespace"),
|
||||
"req_params", request.get("req_params"),
|
||||
"event", EventType.START_SESSION.getValue());
|
||||
client.sendStartSession(objectMapper.writeValueAsBytes(startReq), sessionId);
|
||||
// Wait for session started
|
||||
client.waitForMessage(MsgType.FULL_SERVER_RESPONSE, EventType.SESSION_STARTED);
|
||||
|
||||
// Send text
|
||||
for (char c : sentences[i].toCharArray()) {
|
||||
// Create new req_params with text
|
||||
@SuppressWarnings("unchecked")
|
||||
Map<String, Object> currentReqParams = new HashMap<>(
|
||||
(Map<String, Object>) request.get("req_params"));
|
||||
currentReqParams.put("text", String.valueOf(c));
|
||||
|
||||
// Create current request
|
||||
Map<String, Object> currentRequest = Map.of(
|
||||
"user", request.get("user"),
|
||||
"namespace", request.get("namespace"),
|
||||
"req_params", currentReqParams,
|
||||
"event", EventType.TASK_REQUEST.getValue());
|
||||
|
||||
client.sendTaskRequest(objectMapper.writeValueAsBytes(currentRequest), sessionId);
|
||||
}
|
||||
|
||||
// End session
|
||||
client.sendFinishSession(sessionId);
|
||||
|
||||
// Receive response
|
||||
while (true) {
|
||||
Message msg = client.receiveMessage();
|
||||
switch (msg.getType()) {
|
||||
case FULL_SERVER_RESPONSE:
|
||||
break;
|
||||
case AUDIO_ONLY_SERVER:
|
||||
if (!audioReceived && audioStream.size() > 0) {
|
||||
audioReceived = true;
|
||||
}
|
||||
if (msg.getPayload() != null) {
|
||||
audioStream.write(msg.getPayload());
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new RuntimeException("Unexpected message: " + msg);
|
||||
}
|
||||
if (msg.getEvent() == EventType.SESSION_FINISHED) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (audioStream.size() > 0) {
|
||||
String fileName = String.format("%s_session_%d.%s", voice, i, encoding);
|
||||
Files.write(new File(fileName).toPath(), audioStream.toByteArray());
|
||||
log.info("Audio saved to file: {}", fileName);
|
||||
}
|
||||
}
|
||||
|
||||
if (!audioReceived) {
|
||||
throw new RuntimeException("No audio data received");
|
||||
}
|
||||
|
||||
// End connection
|
||||
client.sendFinishConnection();
|
||||
} finally {
|
||||
client.closeBlocking();
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user