From e4023306922d8b20c4b140b4a9f4279445365259 Mon Sep 17 00:00:00 2001 From: Maxchen <1174344944@qq.com> Date: Thu, 6 Nov 2025 19:43:52 +0800 Subject: [PATCH] =?UTF-8?q?add:=E6=B7=BB=E5=8A=A0=E7=81=AB=E5=B1=B1?= =?UTF-8?q?=E5=BC=95=E6=93=8E=E8=AF=AD=E9=9F=B3=E5=90=88=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../org/ruoyi/aihuman/config/WebConfig.java | 4 + .../AihumanVolcengineController.java | 4 + .../ruoyi/aihuman/domain/VoiceRequest.java | 4 + .../aihuman/protocol/CompressionBits.java | 26 +++ .../org/ruoyi/aihuman/protocol/EventType.java | 90 +++++++ .../aihuman/protocol/HeaderSizeBits.java | 27 +++ .../org/ruoyi/aihuman/protocol/Message.java | 220 ++++++++++++++++++ .../org/ruoyi/aihuman/protocol/MsgType.java | 29 +++ .../aihuman/protocol/MsgTypeFlagBits.java | 27 +++ .../aihuman/protocol/SerializationBits.java | 27 +++ .../protocol/SpeechWebSocketClient.java | 115 +++++++++ .../ruoyi/aihuman/protocol/VersionBits.java | 27 +++ .../service/AihumanVolcengineService.java | 4 + .../impl/AihumanVolcengineServiceImpl.java | 4 + .../ruoyi/aihuman/volcengine/Bidirection.java | 160 +++++++++++++ 15 files changed, 768 insertions(+) create mode 100644 ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/config/WebConfig.java create mode 100644 ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/controller/AihumanVolcengineController.java create mode 100644 ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/domain/VoiceRequest.java create mode 100644 ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/CompressionBits.java create mode 100644 ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/EventType.java create mode 100644 ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/HeaderSizeBits.java create mode 100644 ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/Message.java create mode 100644 ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/MsgType.java create mode 100644 ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/MsgTypeFlagBits.java create mode 100644 ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/SerializationBits.java create mode 100644 ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/SpeechWebSocketClient.java create mode 100644 ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/VersionBits.java create mode 100644 ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/service/AihumanVolcengineService.java create mode 100644 ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/service/impl/AihumanVolcengineServiceImpl.java create mode 100644 ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/volcengine/Bidirection.java diff --git a/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/config/WebConfig.java b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/config/WebConfig.java new file mode 100644 index 00000000..5efe553b --- /dev/null +++ b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/config/WebConfig.java @@ -0,0 +1,4 @@ +package org.ruoyi.aihuman.config; + +public class WebConfig { +} diff --git a/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/controller/AihumanVolcengineController.java b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/controller/AihumanVolcengineController.java new file mode 100644 index 00000000..601e8a6e --- /dev/null +++ b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/controller/AihumanVolcengineController.java @@ -0,0 +1,4 @@ +package org.ruoyi.aihuman.controller; + +public class AihumanVolcengineController { +} diff --git a/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/domain/VoiceRequest.java b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/domain/VoiceRequest.java new file mode 100644 index 00000000..4be027e6 --- /dev/null +++ b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/domain/VoiceRequest.java @@ -0,0 +1,4 @@ +package org.ruoyi.aihuman.domain; + +public class VoiceRequest { +} diff --git a/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/CompressionBits.java b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/CompressionBits.java new file mode 100644 index 00000000..55442fef --- /dev/null +++ b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/CompressionBits.java @@ -0,0 +1,26 @@ +package com.speech.protocol; + +import lombok.Getter; + +@Getter +public enum CompressionBits { + None_((byte) 0), + Gzip((byte) 0b1), + Custom((byte) 0b11), + ; + + private final byte value; + + CompressionBits(byte b) { + this.value = b; + } + + public static CompressionBits fromValue(int value) { + for (CompressionBits type : CompressionBits.values()) { + if (type.value == value) { + return type; + } + } + throw new IllegalArgumentException("Unknown CompressionBits value: " + value); + } +} diff --git a/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/EventType.java b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/EventType.java new file mode 100644 index 00000000..5d043dde --- /dev/null +++ b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/EventType.java @@ -0,0 +1,90 @@ +package com.speech.protocol; + +import lombok.Getter; + +@Getter +public enum EventType { + // Default event + NONE(0), + + // Upstream Connection events (1-49) + START_CONNECTION(1), + START_TASK(1), + FINISH_CONNECTION(2), + FINISH_TASK(2), + + // Downstream Connection events (50-99) + CONNECTION_STARTED(50), + TASK_STARTED(50), + CONNECTION_FAILED(51), + TASK_FAILED(51), + CONNECTION_FINISHED(52), + TASK_FINISHED(52), + + // Upstream Session events (100-149) + START_SESSION(100), + CANCEL_SESSION(101), + FINISH_SESSION(102), + + // Downstream Session events (150-199) + SESSION_STARTED(150), + SESSION_CANCELED(151), + SESSION_FINISHED(152), + SESSION_FAILED(153), + USAGE_RESPONSE(154), + CHARGE_DATA(154), + + // Upstream General events (200-249) + TASK_REQUEST(200), + UPDATE_CONFIG(201), + + // Downstream General events (250-299) + AUDIO_MUTED(250), + + // Upstream TTS events (300-349) + SAY_HELLO(300), + + // Downstream TTS events (350-399) + TTS_SENTENCE_START(350), + TTS_SENTENCE_END(351), + TTS_RESPONSE(352), + TTS_ENDED(359), + PODCAST_ROUND_START(360), + PODCAST_ROUND_RESPONSE(361), + PODCAST_ROUND_END(362), + + // Downstream ASR events (450-499) + ASR_INFO(450), + ASR_RESPONSE(451), + ASR_ENDED(459), + + // Upstream Chat events (500-549) + CHAT_TTS_TEXT(500), + + // Downstream Chat events (550-599) + CHAT_RESPONSE(550), + CHAT_ENDED(559), + + // Subtitle events (650-699) + SOURCE_SUBTITLE_START(650), + SOURCE_SUBTITLE_RESPONSE(651), + SOURCE_SUBTITLE_END(652), + TRANSLATION_SUBTITLE_START(653), + TRANSLATION_SUBTITLE_RESPONSE(654), + TRANSLATION_SUBTITLE_END(655); + + private final int value; + + EventType(int value) { + this.value = value; + } + + public static EventType fromValue(int value) { + for (EventType type : EventType.values()) { + if (type.value == value) { + return type; + } + } + throw new IllegalArgumentException("Unknown EventType value: " + value); + } +} \ No newline at end of file diff --git a/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/HeaderSizeBits.java b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/HeaderSizeBits.java new file mode 100644 index 00000000..86daf375 --- /dev/null +++ b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/HeaderSizeBits.java @@ -0,0 +1,27 @@ +package com.speech.protocol; + +import lombok.Getter; + +@Getter +public enum HeaderSizeBits { + HeaderSize4((byte) 1), + HeaderSize8((byte) 2), + HeaderSize12((byte) 3), + HeaderSize16((byte) 4), + ; + + private final byte value; + + HeaderSizeBits(byte b) { + this.value = b; + } + + public static HeaderSizeBits fromValue(int value) { + for (HeaderSizeBits type : HeaderSizeBits.values()) { + if (type.value == value) { + return type; + } + } + throw new IllegalArgumentException("Unknown HeaderSizeBits value: " + value); + } +} diff --git a/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/Message.java b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/Message.java new file mode 100644 index 00000000..bc44fc73 --- /dev/null +++ b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/Message.java @@ -0,0 +1,220 @@ +package com.speech.protocol; + +import lombok.Data; +import lombok.extern.slf4j.Slf4j; + +import java.io.ByteArrayOutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; + +@Slf4j +@Data +public class Message { + private byte version = VersionBits.Version1.getValue(); + private byte headerSize = HeaderSizeBits.HeaderSize4.getValue(); + private MsgType type; + private MsgTypeFlagBits flag; + private byte serialization = SerializationBits.JSON.getValue(); + private byte compression = 0; + + private EventType event; + private String sessionId; + private String connectId; + private int sequence; + private int errorCode; + + private byte[] payload; + + public Message(MsgType type, MsgTypeFlagBits flag) { + this.type = type; + this.flag = flag; + } + + public static Message unmarshal(byte[] data) throws Exception { + ByteBuffer buffer = ByteBuffer.wrap(data); + + byte type_and_flag = data[1]; + MsgType type = MsgType.fromValue((type_and_flag >> 4) & 0x0F); + MsgTypeFlagBits flag = MsgTypeFlagBits.fromValue(type_and_flag & 0x0F); + + // Read version and header size + int versionAndHeaderSize = buffer.get(); + VersionBits version = VersionBits.fromValue((versionAndHeaderSize >> 4) & 0x0F); + HeaderSizeBits headerSize = HeaderSizeBits.fromValue(versionAndHeaderSize & 0x0F); + + // Skip second byte + buffer.get(); + + // Read serialization and compression method + int serializationCompression = buffer.get(); + SerializationBits serialization = SerializationBits.fromValue((serializationCompression >> 4) & 0x0F); + CompressionBits compression = CompressionBits.fromValue(serializationCompression & 0x0F); + + // Skip padding bytes + int headerSizeInt = 4 * (int) headerSize.getValue(); + int paddingSize = headerSizeInt - 3; + while (paddingSize > 0) { + buffer.get(); + paddingSize -= 1; + } + + Message message = new Message(type, flag); + message.setVersion(version.getValue()); + message.setHeaderSize(headerSize.getValue()); + message.setSerialization(serialization.getValue()); + message.setCompression(compression.getValue()); + + // Read sequence if present + if (flag == MsgTypeFlagBits.POSITIVE_SEQ || flag == MsgTypeFlagBits.NEGATIVE_SEQ) { + // Read 4 bytes from ByteBuffer and parse as int (big-endian) + byte[] sequeueBytes = new byte[4]; + if (buffer.remaining() >= 4) { + buffer.get(sequeueBytes); // Read 4 bytes into array + ByteBuffer wrapper = ByteBuffer.wrap(sequeueBytes); + wrapper.order(ByteOrder.BIG_ENDIAN); // Set big-endian order + message.setSequence(wrapper.getInt()); + } + } + + // Read event if present + if (flag == MsgTypeFlagBits.WITH_EVENT) { + // Read 4 bytes from ByteBuffer and parse as int (big-endian) + byte[] eventBytes = new byte[4]; + if (buffer.remaining() >= 4) { + buffer.get(eventBytes); // Read 4 bytes into array + ByteBuffer wrapper = ByteBuffer.wrap(eventBytes); + wrapper.order(ByteOrder.BIG_ENDIAN); // Set big-endian order + message.setEvent(EventType.fromValue(wrapper.getInt())); + } + + if (type != MsgType.ERROR && !(message.event == EventType.START_CONNECTION + || message.event == EventType.FINISH_CONNECTION || + message.event == EventType.CONNECTION_STARTED + || message.event == EventType.CONNECTION_FAILED || + message.event == EventType.CONNECTION_FINISHED)) { + // Read sessionId if present + int sessionIdLength = buffer.getInt(); + if (sessionIdLength > 0) { + byte[] sessionIdBytes = new byte[sessionIdLength]; + buffer.get(sessionIdBytes); + message.setSessionId(new String(sessionIdBytes, StandardCharsets.UTF_8)); + } + } + + if (message.event == EventType.CONNECTION_STARTED || message.event == EventType.CONNECTION_FAILED + || message.event == EventType.CONNECTION_FINISHED) { + // Read connectId if present + int connectIdLength = buffer.getInt(); + if (connectIdLength > 0) { + byte[] connectIdBytes = new byte[connectIdLength]; + buffer.get(connectIdBytes); + message.setConnectId(new String(connectIdBytes, StandardCharsets.UTF_8)); + } + } + } + + // Read errorCode if present + if (type == MsgType.ERROR) { + // Read 4 bytes from ByteBuffer and parse as int (big-endian) + byte[] errorCodeBytes = new byte[4]; + if (buffer.remaining() >= 4) { + buffer.get(errorCodeBytes); // Read 4 bytes into array + ByteBuffer wrapper = ByteBuffer.wrap(errorCodeBytes); + wrapper.order(ByteOrder.BIG_ENDIAN); // Set big-endian order + message.setErrorCode(wrapper.getInt()); + } + } + + // Read remaining bytes as payload + if (buffer.remaining() > 0) { + // 4 bytes length + int payloadLength = buffer.getInt(); + if (payloadLength > 0) { + byte[] payloadBytes = new byte[payloadLength]; + buffer.get(payloadBytes); + message.setPayload(payloadBytes); + } + } + + return message; + } + + public byte[] marshal() throws Exception { + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + + // Write header + buffer.write((version & 0x0F) << 4 | (headerSize & 0x0F)); + buffer.write((type.getValue() & 0x0F) << 4 | (flag.getValue() & 0x0F)); + buffer.write((serialization & 0x0F) << 4 | (compression & 0x0F)); + + int headerSizeInt = 4 * (int) headerSize; + int padding = headerSizeInt - buffer.size(); + while (padding > 0) { + buffer.write(0); + padding -= 1; + } + + // Write event if present + if (event != null) { + byte[] eventBytes = ByteBuffer.allocate(4).putInt(event.getValue()).array(); + buffer.write(eventBytes); + } + + // Write sessionId if present + if (sessionId != null) { + byte[] sessionIdBytes = sessionId.getBytes(StandardCharsets.UTF_8); + buffer.write(ByteBuffer.allocate(4).putInt(sessionIdBytes.length).array()); + buffer.write(sessionIdBytes); + } + + // Write connectId if present + if (connectId != null) { + byte[] connectIdBytes = connectId.getBytes(StandardCharsets.UTF_8); + buffer.write(ByteBuffer.allocate(4).putInt(connectIdBytes.length).array()); + buffer.write(connectIdBytes); + } + + // Write sequence if present + if (sequence != 0) { + buffer.write(ByteBuffer.allocate(4).putInt(sequence).array()); + } + + // Write errorCode if present + if (errorCode != 0) { + buffer.write(ByteBuffer.allocate(4).putInt(errorCode).array()); + } + + // Write payload if present + if (payload != null && payload.length > 0) { + buffer.write(ByteBuffer.allocate(4).putInt(payload.length).array()); + buffer.write(payload); + } + return buffer.toByteArray(); + } + + @Override + public String toString() { + switch (this.type) { + case AUDIO_ONLY_SERVER: + case AUDIO_ONLY_CLIENT: + if (this.flag == MsgTypeFlagBits.POSITIVE_SEQ || this.flag == MsgTypeFlagBits.NEGATIVE_SEQ) { + return String.format("MsgType: %s, EventType: %s, Sequence: %d, PayloadSize: %d", this.type, this.event, this.sequence, + this.payload != null ? this.payload.length : 0); + } + return String.format("MsgType: %s, EventType: %s, PayloadSize: %d", this.type, this.event, + this.payload != null ? this.payload.length : 0); + case ERROR: + return String.format("MsgType: %s, EventType: %s, ErrorCode: %d, Payload: %s", this.type, this.event, this.errorCode, + this.payload != null ? new String(this.payload) : "null"); + default: + if (this.flag == MsgTypeFlagBits.POSITIVE_SEQ || this.flag == MsgTypeFlagBits.NEGATIVE_SEQ) { + return String.format("MsgType: %s, EventType: %s, Sequence: %d, Payload: %s", + this.type, this.event, this.sequence, + this.payload != null ? new String(this.payload) : "null"); + } + return String.format("MsgType: %s, EventType: %s, Payload: %s", this.type, this.event, + this.payload != null ? new String(this.payload) : "null"); + } + } +} \ No newline at end of file diff --git a/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/MsgType.java b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/MsgType.java new file mode 100644 index 00000000..9d782b36 --- /dev/null +++ b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/MsgType.java @@ -0,0 +1,29 @@ +package com.speech.protocol; + +import lombok.Getter; + +@Getter +public enum MsgType { + INVALID((byte) 0), + FULL_CLIENT_REQUEST((byte) 0b1), + AUDIO_ONLY_CLIENT((byte) 0b10), + FULL_SERVER_RESPONSE((byte) 0b1001), + AUDIO_ONLY_SERVER((byte) 0b1011), + FRONT_END_RESULT_SERVER((byte) 0b1100), + ERROR((byte) 0b1111); + + private final byte value; + + MsgType(byte value) { + this.value = value; + } + + public static MsgType fromValue(int value) { + for (MsgType type : MsgType.values()) { + if (type.value == value) { + return type; + } + } + throw new IllegalArgumentException("Unknown MsgType value: " + value); + } +} \ No newline at end of file diff --git a/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/MsgTypeFlagBits.java b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/MsgTypeFlagBits.java new file mode 100644 index 00000000..d155a562 --- /dev/null +++ b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/MsgTypeFlagBits.java @@ -0,0 +1,27 @@ +package com.speech.protocol; + +import lombok.Getter; + +@Getter +public enum MsgTypeFlagBits { + NO_SEQ((byte) 0), // Non-terminating packet without sequence number + POSITIVE_SEQ((byte) 0b1), // Non-terminating packet with positive sequence number + LAST_NO_SEQ((byte) 0b10), // Terminating packet without sequence number + NEGATIVE_SEQ((byte) 0b11), // Terminating packet with negative sequence number + WITH_EVENT((byte) 0b100); // Packet containing event number + + private final byte value; + + MsgTypeFlagBits(byte value) { + this.value = value; + } + + public static MsgTypeFlagBits fromValue(int value) { + for (MsgTypeFlagBits flag : MsgTypeFlagBits.values()) { + if (flag.value == value) { + return flag; + } + } + throw new IllegalArgumentException("Unknown MsgTypeFlagBits value: " + value); + } +} \ No newline at end of file diff --git a/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/SerializationBits.java b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/SerializationBits.java new file mode 100644 index 00000000..c822ecc8 --- /dev/null +++ b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/SerializationBits.java @@ -0,0 +1,27 @@ +package com.speech.protocol; + +import lombok.Getter; + +@Getter +public enum SerializationBits { + Raw((byte) 0), + JSON((byte) 0b1), + Thrift((byte) 0b11), + Custom((byte) 0b1111), + ; + + private final byte value; + + SerializationBits(byte b) { + this.value = b; + } + + public static SerializationBits fromValue(int value) { + for (SerializationBits type : SerializationBits.values()) { + if (type.value == value) { + return type; + } + } + throw new IllegalArgumentException("Unknown SerializationBits value: " + value); + } +} diff --git a/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/SpeechWebSocketClient.java b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/SpeechWebSocketClient.java new file mode 100644 index 00000000..63c20366 --- /dev/null +++ b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/SpeechWebSocketClient.java @@ -0,0 +1,115 @@ +package com.speech.protocol; + +import lombok.extern.slf4j.Slf4j; +import org.java_websocket.client.WebSocketClient; +import org.java_websocket.handshake.ServerHandshake; + +import java.net.URI; +import java.nio.ByteBuffer; +import java.util.Map; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; + +@Slf4j +public class SpeechWebSocketClient extends WebSocketClient { + private final BlockingQueue messageQueue = new LinkedBlockingQueue<>(); + + public SpeechWebSocketClient(URI serverUri, Map headers) { + super(serverUri, headers); + } + + @Override + public void onOpen(ServerHandshake handshakedata) { + log.info("WebSocket connection established, Logid: {}", handshakedata.getFieldValue("x-tt-logid")); + } + + @Override + public void onMessage(String message) { + log.warn("Received unexpected text message: {}", message); + } + + @Override + public void onMessage(ByteBuffer bytes) { + try { + Message message = Message.unmarshal(bytes.array()); + messageQueue.put(message); + } catch (Exception e) { + log.error("Failed to parse message", e); + } + } + + @Override + public void onClose(int code, String reason, boolean remote) { + log.info("WebSocket connection closed: code={}, reason={}, remote={}", code, reason, remote); + } + + @Override + public void onError(Exception ex) { + log.error("WebSocket error", ex); + } + + public void sendStartConnection() throws Exception { + Message message = new Message(MsgType.FULL_CLIENT_REQUEST, MsgTypeFlagBits.WITH_EVENT); + message.setEvent(EventType.START_CONNECTION); + message.setPayload("{}".getBytes()); + sendMessage(message); + } + + public void sendFinishConnection() throws Exception { + Message message = new Message(MsgType.FULL_CLIENT_REQUEST, MsgTypeFlagBits.WITH_EVENT); + message.setEvent(EventType.FINISH_CONNECTION); + sendMessage(message); + } + + public void sendStartSession(byte[] payload, String sessionId) throws Exception { + Message message = new Message(MsgType.FULL_CLIENT_REQUEST, MsgTypeFlagBits.WITH_EVENT); + message.setEvent(EventType.START_SESSION); + message.setSessionId(sessionId); + message.setPayload(payload); + sendMessage(message); + } + + public void sendFinishSession(String sessionId) throws Exception { + Message message = new Message(MsgType.FULL_CLIENT_REQUEST, MsgTypeFlagBits.WITH_EVENT); + message.setEvent(EventType.FINISH_SESSION); + message.setSessionId(sessionId); + message.setPayload("{}".getBytes()); + sendMessage(message); + } + + public void sendTaskRequest(byte[] payload, String sessionId) throws Exception { + Message message = new Message(MsgType.FULL_CLIENT_REQUEST, MsgTypeFlagBits.WITH_EVENT); + message.setEvent(EventType.TASK_REQUEST); + message.setSessionId(sessionId); + message.setPayload(payload); + sendMessage(message); + } + + public void sendFullClientMessage(byte[] payload) throws Exception { + Message message = new Message(MsgType.FULL_CLIENT_REQUEST, MsgTypeFlagBits.NO_SEQ); + message.setPayload(payload); + sendMessage(message); + } + + public void sendMessage(Message message) throws Exception { + log.info("Send: {}", message); + send(message.marshal()); + } + + public Message receiveMessage() throws InterruptedException { + Message message = messageQueue.take(); + log.info("Receive: {}", message); + return message; + } + + public Message waitForMessage(MsgType type, EventType event) throws InterruptedException { + while (true) { + Message message = receiveMessage(); + if (message.getType() == type && message.getEvent() == event) { + return message; + } else { + throw new RuntimeException("Unexpected message: " + message); + } + } + } +} \ No newline at end of file diff --git a/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/VersionBits.java b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/VersionBits.java new file mode 100644 index 00000000..1e3542fc --- /dev/null +++ b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/protocol/VersionBits.java @@ -0,0 +1,27 @@ +package com.speech.protocol; + +import lombok.Getter; + +@Getter +public enum VersionBits { + Version1((byte) 1), + Version2((byte) 2), + Version3((byte) 3), + Version4((byte) 4), + ; + + private final byte value; + + VersionBits(byte b) { + this.value = b; + } + + public static VersionBits fromValue(int value) { + for (VersionBits type : VersionBits.values()) { + if (type.value == value) { + return type; + } + } + throw new IllegalArgumentException("Unknown VersionBits value: " + value); + } +} diff --git a/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/service/AihumanVolcengineService.java b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/service/AihumanVolcengineService.java new file mode 100644 index 00000000..6c99f013 --- /dev/null +++ b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/service/AihumanVolcengineService.java @@ -0,0 +1,4 @@ +package org.ruoyi.aihuman.service; + +public interface AihumanVolcengineService { +} diff --git a/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/service/impl/AihumanVolcengineServiceImpl.java b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/service/impl/AihumanVolcengineServiceImpl.java new file mode 100644 index 00000000..aa11aabd --- /dev/null +++ b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/service/impl/AihumanVolcengineServiceImpl.java @@ -0,0 +1,4 @@ +package org.ruoyi.aihuman.service.impl; + +public class AihumanVolcengineServiceImpl { +} diff --git a/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/volcengine/Bidirection.java b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/volcengine/Bidirection.java new file mode 100644 index 00000000..749b5bf7 --- /dev/null +++ b/ruoyi-modules/ruoyi-aihuman/src/main/java/org/ruoyi/aihuman/volcengine/Bidirection.java @@ -0,0 +1,160 @@ +package com.speech.volcengine; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.speech.protocol.EventType; +import com.speech.protocol.Message; +import com.speech.protocol.MsgType; +import com.speech.protocol.SpeechWebSocketClient; +import lombok.extern.slf4j.Slf4j; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.net.URI; +import java.nio.file.Files; +import java.util.HashMap; +import java.util.Map; +import java.util.UUID; + +@Slf4j +public class Bidirection { + private static final String ENDPOINT = "wss://openspeech.bytedance.com/api/v3/tts/bidirection"; + private static final ObjectMapper objectMapper = new ObjectMapper(); + + /** + * Get resource ID based on voice type + * + * @param voice Voice type string + * @return Corresponding resource ID + */ + public static String voiceToResourceId(String voice) { + // Map different voice types to resource IDs based on actual needs + if (voice.startsWith("S_")) { + return "volc.megatts.default"; + } + return "volc.service_type.10029"; + } + + public static void main(String[] args) throws Exception { + // Configure parameters + String appId = System.getProperty("appId", "1055299334"); + String accessToken = System.getProperty("accessToken", "fOHuq4R4dirMYiOruCU3Ek9q75zV0KVW"); + String resourceId = System.getProperty("resourceId", "seed-tts-2.0"); + String voice = System.getProperty("voice", "zh_female_vv_uranus_bigtts"); + String text = System.getProperty("text", "你好呀!如果你有关于老婆相关的问题,比如怎么让她开心、怎么照顾她等,都可以跟我说哦,我会根据【马斯克·陈】提供的关爱老婆百事通里的信息给你分析和建议哒。"); + String encoding = System.getProperty("encoding", "mp3"); + + if (appId.isEmpty() || accessToken.isEmpty()) { + throw new IllegalArgumentException("Please set appId and accessToken system properties"); + } + + // Set request headers + Map headers = Map.of( + "X-Api-App-Key", appId, + "X-Api-Access-Key", accessToken, + "X-Api-Resource-Id", resourceId.isEmpty() ? voiceToResourceId(voice) : resourceId, + "X-Api-Connect-Id", UUID.randomUUID().toString()); + + // Create WebSocket client + SpeechWebSocketClient client = new SpeechWebSocketClient(new URI(ENDPOINT), headers); + try { + client.connectBlocking(); + Map request = Map.of( + "user", Map.of("uid", UUID.randomUUID().toString()), + "namespace", "BidirectionalTTS", + "req_params", Map.of( + "speaker", voice, + "audio_params", Map.of( + "format", encoding, + "sample_rate", 24000, + "enable_timestamp", true), + // additions requires a JSON string + "additions", objectMapper.writeValueAsString(Map.of( + "disable_markdown_filter", false)))); + + // Start connection + client.sendStartConnection(); + // Wait for connection started + client.waitForMessage(MsgType.FULL_SERVER_RESPONSE, EventType.CONNECTION_STARTED); + + // Process each sentence + String[] sentences = text.split("。"); + boolean audioReceived = false; + for (int i = 0; i < sentences.length; i++) { + if (sentences[i].trim().isEmpty()) { + continue; + } + + String sessionId = UUID.randomUUID().toString(); + ByteArrayOutputStream audioStream = new ByteArrayOutputStream(); + + // Start session + Map startReq = Map.of( + "user", request.get("user"), + "namespace", request.get("namespace"), + "req_params", request.get("req_params"), + "event", EventType.START_SESSION.getValue()); + client.sendStartSession(objectMapper.writeValueAsBytes(startReq), sessionId); + // Wait for session started + client.waitForMessage(MsgType.FULL_SERVER_RESPONSE, EventType.SESSION_STARTED); + + // Send text + for (char c : sentences[i].toCharArray()) { + // Create new req_params with text + @SuppressWarnings("unchecked") + Map currentReqParams = new HashMap<>( + (Map) request.get("req_params")); + currentReqParams.put("text", String.valueOf(c)); + + // Create current request + Map currentRequest = Map.of( + "user", request.get("user"), + "namespace", request.get("namespace"), + "req_params", currentReqParams, + "event", EventType.TASK_REQUEST.getValue()); + + client.sendTaskRequest(objectMapper.writeValueAsBytes(currentRequest), sessionId); + } + + // End session + client.sendFinishSession(sessionId); + + // Receive response + while (true) { + Message msg = client.receiveMessage(); + switch (msg.getType()) { + case FULL_SERVER_RESPONSE: + break; + case AUDIO_ONLY_SERVER: + if (!audioReceived && audioStream.size() > 0) { + audioReceived = true; + } + if (msg.getPayload() != null) { + audioStream.write(msg.getPayload()); + } + break; + default: + throw new RuntimeException("Unexpected message: " + msg); + } + if (msg.getEvent() == EventType.SESSION_FINISHED) { + break; + } + } + + if (audioStream.size() > 0) { + String fileName = String.format("%s_session_%d.%s", voice, i, encoding); + Files.write(new File(fileName).toPath(), audioStream.toByteArray()); + log.info("Audio saved to file: {}", fileName); + } + } + + if (!audioReceived) { + throw new RuntimeException("No audio data received"); + } + + // End connection + client.sendFinishConnection(); + } finally { + client.closeBlocking(); + } + } +} \ No newline at end of file