diff --git a/agentscope-core/src/main/java/io/agentscope/core/model/tts/Qwen3TTSFlashVoice.java b/agentscope-core/src/main/java/io/agentscope/core/model/tts/Qwen3TTSFlashVoice.java
new file mode 100644
index 000000000..f5f7aac54
--- /dev/null
+++ b/agentscope-core/src/main/java/io/agentscope/core/model/tts/Qwen3TTSFlashVoice.java
@@ -0,0 +1,238 @@
+/*
+ * Copyright 2024-2026 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.agentscope.core.model.tts;
+
+import java.util.Locale;
+import java.util.Random;
+import java.util.concurrent.ThreadLocalRandom;
+
+/**
+ * Predefined voices for Qwen3 TTS Flash / Realtime models.
+ *
+ *
The {@code voiceId} values correspond to the {@code voice} parameter
+ * accepted by qwen3-tts-flash and qwen3-tts-flash-realtime.
+ */
+public enum Qwen3TTSFlashVoice {
+
+ /**
+ * 芊悦 (Cherry) - A sunny, positive, friendly, and natural young woman.
+ */
+ CHERRY("Cherry", "芊悦", Gender.FEMALE, "A sunny, positive, friendly, and natural young woman"),
+
+ /**
+ * 晨煦 (Ethan) - A bright, warm, energetic, and vibrant male voice with a standard Mandarin pronunciation and a slight northern accent.
+ */
+ ETHAN(
+ "Ethan",
+ "晨煦",
+ Gender.MALE,
+ "A bright, warm, energetic, and vibrant male voice with a standard Mandarin"
+ + " pronunciation and a slight northern accent"),
+
+ /**
+ * 不吃鱼 (Nofish) - A male designer who cannot pronounce retroflex sounds.
+ */
+ NOFISH("Nofish", "不吃鱼", Gender.MALE, "A male designer who cannot pronounce retroflex sounds"),
+
+ /**
+ * 詹妮弗 (Jennifer) - A premium, cinematic American English female voice.
+ */
+ JENNIFER(
+ "Jennifer", "詹妮弗", Gender.FEMALE, "A premium, cinematic American English female voice"),
+
+ /**
+ * 甜茶 (Ryan) - A rhythmic and dramatic voice with a sense of realism and tension.
+ */
+ RYAN(
+ "Ryan",
+ "甜茶",
+ Gender.MALE,
+ "A rhythmic and dramatic voice with a sense of realism and tension"),
+
+ /**
+ * 卡捷琳娜 (Katerina) - A mature female voice with a rich rhythm and lingering resonance.
+ */
+ KATERINA(
+ "Katerina",
+ "卡捷琳娜",
+ Gender.FEMALE,
+ "A mature female voice with a rich rhythm and lingering resonance"),
+
+ /**
+ * 墨讲师 (Elias) - A voice that maintains academic rigor while using storytelling techniques to transform complex knowledge into digestible cognitive modules.
+ */
+ ELIAS(
+ "Elias",
+ "墨讲师",
+ Gender.MALE,
+ "A voice that maintains academic rigor while using storytelling techniques to transform"
+ + " complex knowledge into digestible cognitive modules"),
+
+ /**
+ * 上海-阿珍 (Jada) - An energetic woman from Shanghai.
+ */
+ JADA("Jada", "上海-阿珍", Gender.FEMALE, "An energetic woman from Shanghai"),
+
+ /**
+ * 北京-晓东 (Dylan) - A teenage boy who grew up in the hutongs of Beijing.
+ */
+ DYLAN("Dylan", "北京-晓东", Gender.MALE, "A teenage boy who grew up in the hutongs of Beijing"),
+
+ /**
+ * 四川-晴儿 (Sunny) - The voice of a Sichuan girl whose sweetness melts your heart.
+ */
+ SUNNY(
+ "Sunny",
+ "四川-晴儿",
+ Gender.FEMALE,
+ "The voice of a Sichuan girl whose sweetness melts your heart"),
+
+ /**
+ * 南京-老李 (li) - Patient male yoga instructor.
+ */
+ LI("li", "南京-老李", Gender.MALE, "Patient male yoga instructor"),
+
+ /**
+ * 陕西-秦川 (Marcus) - A voice that is broad-faced and brief-spoken, sincere-hearted and deep-voiced—the authentic flavor of Shaanxi.
+ */
+ MARCUS(
+ "Marcus",
+ "陕西-秦川",
+ Gender.MALE,
+ "A voice that is broad-faced and brief-spoken, sincere-hearted and deep-voiced—the"
+ + " authentic flavor of Shaanxi"),
+
+ /**
+ * 闽南-阿杰 (Roy) - The voice of a humorous, straightforward, and lively young Taiwanese man.
+ */
+ ROY(
+ "Roy",
+ "闽南-阿杰",
+ Gender.MALE,
+ "The voice of a humorous, straightforward, and lively young Taiwanese man"),
+
+ /**
+ * 天津-李彼得 (Peter) - The voice of a professional straight man in Tianjin crosstalk.
+ */
+ PETER(
+ "Peter",
+ "天津-李彼得",
+ Gender.MALE,
+ "The voice of a professional straight man in Tianjin crosstalk"),
+
+ /**
+ * 粤语-阿强 (Rocky) - The voice of the humorous and witty Rocky, here for online chatting.
+ */
+ ROCKY(
+ "Rocky",
+ "粤语-阿强",
+ Gender.MALE,
+ "The voice of the humorous and witty Rocky, here for online chatting"),
+
+ /**
+ * 粤语-阿清 (Kiki) - A sweet female companion from Hong Kong.
+ */
+ KIKI("Kiki", "粤语-阿清", Gender.FEMALE, "A sweet female companion from Hong Kong"),
+
+ /**
+ * 四川-程川 (Eric) - An unconventional man from Chengdu, Sichuan.
+ */
+ ERIC("Eric", "四川-程川", Gender.MALE, "An unconventional man from Chengdu, Sichuan");
+
+ private final String voiceId;
+ private final String displayName;
+ private final Gender gender;
+ private final String description;
+
+ Qwen3TTSFlashVoice(String voiceId, String displayName, Gender gender, String description) {
+ this.voiceId = voiceId;
+ this.displayName = displayName;
+ this.gender = gender;
+ this.description = description;
+ }
+
+ /**
+ * Voice id to use as the {@code voice} parameter in DashScope TTS requests.
+ */
+ public String getVoiceId() {
+ return voiceId;
+ }
+
+ /**
+ * Human friendly display name (typically Chinese).
+ */
+ public String getDisplayName() {
+ return displayName;
+ }
+
+ /**
+ * Gender of this voice (for informational / filtering purposes).
+ */
+ public Gender getGender() {
+ return gender;
+ }
+
+ /**
+ * Short description of the voice characteristics.
+ */
+ public String getDescription() {
+ return description;
+ }
+
+ /**
+ * Find a voice enum by its voiceId (case-insensitive).
+ *
+ * @param voiceId the voice id string, e.g. "Cherry"
+ * @return matching enum value, or {@code null} if not found
+ */
+ public static Qwen3TTSFlashVoice fromVoiceId(String voiceId) {
+ if (voiceId == null || voiceId.isEmpty()) {
+ return null;
+ }
+ String normalized = voiceId.toLowerCase(Locale.ROOT);
+ for (Qwen3TTSFlashVoice v : values()) {
+ if (v.voiceId.toLowerCase(Locale.ROOT).equals(normalized)) {
+ return v;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Pick a random voice using {@link ThreadLocalRandom}.
+ */
+ public static Qwen3TTSFlashVoice random() {
+ return random(ThreadLocalRandom.current());
+ }
+
+ /**
+ * Pick a random voice using the provided {@link Random} instance.
+ */
+ public static Qwen3TTSFlashVoice random(Random random) {
+ Qwen3TTSFlashVoice[] all = values();
+ if (all.length == 0) {
+ throw new IllegalStateException("No Qwen3TTSFlashVoice defined");
+ }
+ int idx = random.nextInt(all.length);
+ return all[idx];
+ }
+
+ /** Simple gender enum for voices. */
+ public enum Gender {
+ MALE,
+ FEMALE
+ }
+}
diff --git a/agentscope-core/src/test/java/io/agentscope/core/model/tts/Qwen3TTSFlashVoiceTest.java b/agentscope-core/src/test/java/io/agentscope/core/model/tts/Qwen3TTSFlashVoiceTest.java
new file mode 100644
index 000000000..8a52530ca
--- /dev/null
+++ b/agentscope-core/src/test/java/io/agentscope/core/model/tts/Qwen3TTSFlashVoiceTest.java
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2024-2026 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.agentscope.core.model.tts;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
+import org.junit.jupiter.api.DisplayName;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Unit tests for Qwen3TTSFlashVoice enum.
+ */
+class Qwen3TTSFlashVoiceTest {
+
+ @Test
+ @DisplayName("should have 17 voice constants defined")
+ void shouldHave17Voices() {
+ assertEquals(17, Qwen3TTSFlashVoice.values().length);
+ }
+
+ @Test
+ @DisplayName("should have correct voiceId for CHERRY")
+ void shouldHaveCorrectVoiceIdForCherry() {
+ assertEquals("Cherry", Qwen3TTSFlashVoice.CHERRY.getVoiceId());
+ assertEquals("芊悦", Qwen3TTSFlashVoice.CHERRY.getDisplayName());
+ assertEquals(Qwen3TTSFlashVoice.Gender.FEMALE, Qwen3TTSFlashVoice.CHERRY.getGender());
+ assertNotNull(Qwen3TTSFlashVoice.CHERRY.getDescription());
+ }
+
+ @Test
+ @DisplayName("should have correct voiceId for ETHAN")
+ void shouldHaveCorrectVoiceIdForEthan() {
+ assertEquals("Ethan", Qwen3TTSFlashVoice.ETHAN.getVoiceId());
+ assertEquals("晨煦", Qwen3TTSFlashVoice.ETHAN.getDisplayName());
+ assertEquals(Qwen3TTSFlashVoice.Gender.MALE, Qwen3TTSFlashVoice.ETHAN.getGender());
+ assertNotNull(Qwen3TTSFlashVoice.ETHAN.getDescription());
+ }
+
+ @Test
+ @DisplayName("should have correct gender for ELIAS")
+ void shouldHaveCorrectGenderForElias() {
+ assertEquals("Elias", Qwen3TTSFlashVoice.ELIAS.getVoiceId());
+ assertEquals(Qwen3TTSFlashVoice.Gender.MALE, Qwen3TTSFlashVoice.ELIAS.getGender());
+ }
+
+ @Test
+ @DisplayName("should find voice by voiceId case-insensitively")
+ void shouldFindVoiceByVoiceId() {
+ assertEquals(Qwen3TTSFlashVoice.CHERRY, Qwen3TTSFlashVoice.fromVoiceId("Cherry"));
+ assertEquals(Qwen3TTSFlashVoice.CHERRY, Qwen3TTSFlashVoice.fromVoiceId("cherry"));
+ assertEquals(Qwen3TTSFlashVoice.CHERRY, Qwen3TTSFlashVoice.fromVoiceId("CHERRY"));
+
+ assertEquals(Qwen3TTSFlashVoice.ETHAN, Qwen3TTSFlashVoice.fromVoiceId("Ethan"));
+ assertEquals(Qwen3TTSFlashVoice.LI, Qwen3TTSFlashVoice.fromVoiceId("li"));
+ assertEquals(Qwen3TTSFlashVoice.KIKI, Qwen3TTSFlashVoice.fromVoiceId("Kiki"));
+ }
+
+ @Test
+ @DisplayName("should return null for non-existent voiceId")
+ void shouldReturnNullForNonExistentVoiceId() {
+ assertNull(Qwen3TTSFlashVoice.fromVoiceId("NonExistent"));
+ assertNull(Qwen3TTSFlashVoice.fromVoiceId("Unknown"));
+ }
+
+ @Test
+ @DisplayName("should return null for null or empty voiceId")
+ void shouldReturnNullForNullOrEmptyVoiceId() {
+ assertNull(Qwen3TTSFlashVoice.fromVoiceId(null));
+ assertNull(Qwen3TTSFlashVoice.fromVoiceId(""));
+ }
+
+ @Test
+ @DisplayName("should return random voice using ThreadLocalRandom")
+ void shouldReturnRandomVoice() {
+ Qwen3TTSFlashVoice voice1 = Qwen3TTSFlashVoice.random();
+ assertNotNull(voice1);
+
+ // Call multiple times to verify randomness (not guaranteed to be different but should
+ // work)
+ Set voices = new HashSet<>();
+ for (int i = 0; i < 50; i++) {
+ voices.add(Qwen3TTSFlashVoice.random());
+ }
+ // With 17 voices and 50 calls, we should get at least 2 different voices
+ assertTrue(voices.size() >= 2);
+ }
+
+ @Test
+ @DisplayName("should return random voice using provided Random instance")
+ void shouldReturnRandomVoiceWithProvidedRandom() {
+ Random random = new Random(12345); // Fixed seed for reproducibility
+ Qwen3TTSFlashVoice voice1 = Qwen3TTSFlashVoice.random(random);
+ assertNotNull(voice1);
+
+ // Reset random with same seed to get same result
+ random = new Random(12345);
+ Qwen3TTSFlashVoice voice2 = Qwen3TTSFlashVoice.random(random);
+ assertEquals(voice1, voice2);
+ }
+
+ @Test
+ @DisplayName("should have all voices with non-null properties")
+ void shouldHaveAllVoicesWithNonNullProperties() {
+ for (Qwen3TTSFlashVoice voice : Qwen3TTSFlashVoice.values()) {
+ assertNotNull(voice.getVoiceId(), "voiceId should not be null for " + voice);
+ assertNotNull(voice.getDisplayName(), "displayName should not be null for " + voice);
+ assertNotNull(voice.getGender(), "gender should not be null for " + voice);
+ assertNotNull(voice.getDescription(), "description should not be null for " + voice);
+ }
+ }
+
+ @Test
+ @DisplayName("should have unique voiceIds for all voices")
+ void shouldHaveUniqueVoiceIds() {
+ Set voiceIds = new HashSet<>();
+ for (Qwen3TTSFlashVoice voice : Qwen3TTSFlashVoice.values()) {
+ assertTrue(
+ voiceIds.add(voice.getVoiceId()),
+ "Duplicate voiceId found: " + voice.getVoiceId());
+ }
+ assertEquals(17, voiceIds.size());
+ }
+
+ @Test
+ @DisplayName("Gender enum should have MALE and FEMALE")
+ void genderEnumShouldHaveMaleAndFemale() {
+ assertEquals(2, Qwen3TTSFlashVoice.Gender.values().length);
+ assertEquals(Qwen3TTSFlashVoice.Gender.MALE, Qwen3TTSFlashVoice.Gender.valueOf("MALE"));
+ assertEquals(Qwen3TTSFlashVoice.Gender.FEMALE, Qwen3TTSFlashVoice.Gender.valueOf("FEMALE"));
+ }
+
+ @Test
+ @DisplayName("should have correct distribution of male and female voices")
+ void shouldHaveCorrectGenderDistribution() {
+ int maleCount = 0;
+ int femaleCount = 0;
+ for (Qwen3TTSFlashVoice voice : Qwen3TTSFlashVoice.values()) {
+ if (voice.getGender() == Qwen3TTSFlashVoice.Gender.MALE) {
+ maleCount++;
+ } else if (voice.getGender() == Qwen3TTSFlashVoice.Gender.FEMALE) {
+ femaleCount++;
+ }
+ }
+ assertEquals(17, maleCount + femaleCount, "Total male + female should equal total voices");
+ assertTrue(maleCount > 0, "Should have at least one male voice");
+ assertTrue(femaleCount > 0, "Should have at least one female voice");
+ }
+}
diff --git a/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEvent.java b/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEvent.java
index 7a0a771be..e785ce523 100644
--- a/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEvent.java
+++ b/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEvent.java
@@ -166,4 +166,16 @@ public static GameEvent userInputReceived(String inputType, String content) {
GameEventType.USER_INPUT_RECEIVED,
Map.of("inputType", inputType, "content", content));
}
+
+ /**
+ * Create an audio chunk event for TTS.
+ *
+ * @param playerName The name of the player speaking
+ * @param audioBase64 Base64 encoded audio data
+ * @return The event
+ */
+ public static GameEvent audioChunk(String playerName, String audioBase64) {
+ return new GameEvent(
+ GameEventType.AUDIO_CHUNK, Map.of("player", playerName, "audio", audioBase64));
+ }
}
diff --git a/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEventEmitter.java b/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEventEmitter.java
index c38c35236..9fc7d455c 100644
--- a/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEventEmitter.java
+++ b/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEventEmitter.java
@@ -363,6 +363,19 @@ public void emitUserInputReceived(String inputType, String content) {
playerSink.tryEmitNext(event);
}
+ /**
+ * Emit an audio chunk for TTS.
+ * Audio is always public (everyone can hear day discussion).
+ *
+ * @param playerName The name of the player speaking
+ * @param audioBase64 Base64 encoded audio data
+ */
+ public void emitAudioChunk(String playerName, String audioBase64) {
+ GameEvent event = GameEvent.audioChunk(playerName, audioBase64);
+ godViewHistory.add(event);
+ playerSink.tryEmitNext(event);
+ }
+
/**
* Get the player event stream as a Flux.
* This stream contains events visible to the human player based on their role.
diff --git a/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEventType.java b/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEventType.java
index 758bd3fb9..02305009f 100644
--- a/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEventType.java
+++ b/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEventType.java
@@ -59,5 +59,8 @@ public enum GameEventType {
WAIT_USER_INPUT,
/** User input received confirmation. */
- USER_INPUT_RECEIVED
+ USER_INPUT_RECEIVED,
+
+ /** Audio chunk for TTS (text-to-speech). */
+ AUDIO_CHUNK
}
diff --git a/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/WerewolfWebGame.java b/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/WerewolfWebGame.java
index d883d52a4..4eaac57eb 100644
--- a/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/WerewolfWebGame.java
+++ b/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/WerewolfWebGame.java
@@ -23,6 +23,7 @@
import io.agentscope.core.agent.user.UserAgent;
import io.agentscope.core.formatter.dashscope.DashScopeMultiAgentFormatter;
import io.agentscope.core.memory.InMemoryMemory;
+import io.agentscope.core.message.Base64Source;
import io.agentscope.core.message.MessageMetadataKeys;
import io.agentscope.core.message.Msg;
import io.agentscope.core.message.MsgRole;
@@ -30,6 +31,8 @@
import io.agentscope.core.model.DashScopeChatModel;
import io.agentscope.core.model.GenerateOptions;
import io.agentscope.core.model.StructuredOutputReminder;
+import io.agentscope.core.model.tts.DashScopeRealtimeTTSModel;
+import io.agentscope.core.model.tts.Qwen3TTSFlashVoice;
import io.agentscope.core.pipeline.MsgHub;
import io.agentscope.core.tool.Toolkit;
import io.agentscope.examples.werewolf.GameConfiguration;
@@ -79,6 +82,8 @@ public class WerewolfWebGame {
private DashScopeChatModel model;
private GameState gameState;
private Player humanPlayer;
+ // Mapping from player name to assigned TTS voice (randomized per game)
+ private Map playerVoices;
public WerewolfWebGame(GameEventEmitter emitter, LocalizationBundle bundle) {
this(emitter, bundle, null, null, new GameConfiguration());
@@ -165,6 +170,9 @@ public void start() throws Exception {
}
private GameState initializeGame() {
+ // Initialize per-game TTS voice mapping
+ playerVoices = new HashMap<>();
+
List roles = new ArrayList<>();
for (int i = 0; i < gameConfig.getVillagerCount(); i++) roles.add(Role.VILLAGER);
for (int i = 0; i < gameConfig.getWerewolfCount(); i++) roles.add(Role.WEREWOLF);
@@ -305,6 +313,15 @@ private GameState initializeGame() {
teammates);
}
+ // Assign random TTS voice to each player (independent of roles)
+ List voices = new ArrayList<>(List.of(Qwen3TTSFlashVoice.values()));
+ Collections.shuffle(voices);
+ for (int i = 0; i < players.size(); i++) {
+ Player player = players.get(i);
+ Qwen3TTSFlashVoice voice = voices.get(i % voices.size());
+ playerVoices.put(player.getName(), voice);
+ }
+
return new GameState(players);
}
@@ -455,7 +472,7 @@ private Player werewolvesKill() {
try {
VoteModel voteData = vote.getStructuredData(VoteModel.class);
emitter.emitPlayerVote(
- vote.getName(),
+ werewolf.getName(),
voteData.targetPlayer,
voteData.reason,
EventVisibility.WEREWOLF_ONLY);
@@ -872,6 +889,9 @@ private void discussionPhase() {
Msg response = player.getAgent().call().block();
String content = utils.extractTextContent(response);
emitter.emitPlayerSpeak(player.getName(), content, "day_discussion");
+
+ // Generate TTS for AI speech (only during day discussion)
+ generateTTSForSpeech(player.getName(), content);
}
}
}
@@ -946,7 +966,7 @@ private Player votingPhase() {
try {
VoteModel voteData = vote.getStructuredData(VoteModel.class);
emitter.emitPlayerVote(
- vote.getName(),
+ player.getName(),
voteData.targetPlayer,
voteData.reason,
EventVisibility.PUBLIC);
@@ -1135,4 +1155,73 @@ private void emitStatsUpdate() {
gameState.getAliveWerewolves().size(),
gameState.getAliveVillagers().size());
}
+
+ /**
+ * Generate TTS audio for a player's speech and emit audio chunks to frontend.
+ * Only called during day discussion phase to avoid generating TTS for votes/actions.
+ *
+ * @param playerName The name of the speaking player
+ * @param text The text content to convert to speech
+ */
+ private void generateTTSForSpeech(String playerName, String text) {
+ if (text == null || text.trim().isEmpty()) {
+ return;
+ }
+
+ String apiKey = System.getenv("DASHSCOPE_API_KEY");
+ if (apiKey == null || apiKey.isEmpty()) {
+ // Skip TTS if no API key
+ return;
+ }
+
+ // Resolve voice for this player (fallback to a default if not assigned)
+ Qwen3TTSFlashVoice voice = playerVoices != null ? playerVoices.get(playerName) : null;
+ if (voice == null) {
+ voice = Qwen3TTSFlashVoice.CHERRY;
+ }
+
+ // Create TTS model for this specific speech
+ DashScopeRealtimeTTSModel ttsModel = null;
+ try {
+ ttsModel =
+ DashScopeRealtimeTTSModel.builder()
+ .apiKey(apiKey)
+ .modelName("qwen3-tts-flash-realtime")
+ .voice(voice.getVoiceId())
+ .sampleRate(24000)
+ .format("pcm")
+ .build();
+
+ // Start session
+ ttsModel.startSession();
+
+ // Subscribe to audio stream and emit chunks
+ ttsModel.getAudioStream()
+ .doOnNext(
+ audio -> {
+ if (audio.getSource() instanceof Base64Source src) {
+ emitter.emitAudioChunk(playerName, src.getData());
+ }
+ })
+ .subscribe();
+
+ // Push text to TTS
+ ttsModel.push(text);
+
+ // Finish and wait for all audio
+ ttsModel.finish().blockLast();
+ } catch (Exception e) {
+ // Log error but don't fail the game
+ System.err.println("TTS generation error for " + playerName + ": " + e.getMessage());
+ } finally {
+ // Clean up TTS resources
+ if (ttsModel != null) {
+ try {
+ ttsModel.close();
+ } catch (Exception e) {
+ // Ignore cleanup errors
+ }
+ }
+ }
+ }
}
diff --git a/agentscope-examples/werewolf-hitl/src/main/resources/static/js/app.js b/agentscope-examples/werewolf-hitl/src/main/resources/static/js/app.js
index 7b49ad93b..959e7fabd 100644
--- a/agentscope-examples/werewolf-hitl/src/main/resources/static/js/app.js
+++ b/agentscope-examples/werewolf-hitl/src/main/resources/static/js/app.js
@@ -24,6 +24,13 @@ let currentInputType = null;
let selectedRole = 'RANDOM';
let isSpectatorMode = false;
+// Audio state
+let audioContext = null;
+const playerAudioPlayers = new Map(); // Map
+// Global audio playback coordination (single speaker at a time)
+let currentSpeakingPlayer = null;
+const pendingSpeakingPlayers = []; // Queue of player names waiting to speak
+
// Role icons mapping
const roleIcons = {
'VILLAGER': '👤',
@@ -342,6 +349,9 @@ function handleEvent(event) {
case 'USER_INPUT_RECEIVED':
handleUserInputReceived(data.inputType, data.content);
break;
+ case 'AUDIO_CHUNK':
+ handleAudioChunk(data.player, data.audio);
+ break;
}
}
@@ -786,3 +796,220 @@ document.addEventListener('DOMContentLoaded', () => {
}));
renderPlayers();
});
+
+// ==================== Audio Functions ====================
+/**
+ * Initialize audio context on first user interaction.
+ */
+function initAudio() {
+ if (!audioContext) {
+ audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 24000 });
+ }
+}
+
+/**
+ * Handle audio chunk event from backend.
+ *
+ * @param {string} playerName - The name of the speaking player
+ * @param {string} audioBase64 - Base64 encoded audio data
+ */
+function handleAudioChunk(playerName, audioBase64) {
+ if (!audioBase64) return;
+
+ // Initialize audio context
+ initAudio();
+
+ // Get or create audio player for this player
+ let audioPlayer = playerAudioPlayers.get(playerName);
+ if (!audioPlayer) {
+ audioPlayer = createAudioPlayerForPlayer(playerName);
+ playerAudioPlayers.set(playerName, audioPlayer);
+ }
+
+ // Decode and add to playback queue
+ const audioData = base64ToArrayBuffer(audioBase64);
+ addAudioChunk(audioPlayer, audioData);
+
+ // Global coordination: only one player speaks at a time.
+ if (!currentSpeakingPlayer) {
+ // No one is speaking, start this player immediately
+ currentSpeakingPlayer = playerName;
+ if (!audioPlayer.isPlaying) {
+ playAudio(audioPlayer, playerName);
+ }
+ } else if (currentSpeakingPlayer === playerName) {
+ // Same player is already speaking, its queue will continue in playAudio
+ } else {
+ // Another player is speaking, enqueue this player if not already queued
+ if (!pendingSpeakingPlayers.includes(playerName)) {
+ pendingSpeakingPlayers.push(playerName);
+ }
+ }
+}
+
+/**
+ * Create an audio player for a specific player.
+ *
+ * @param {string} playerName - Player name
+ * @returns {object} Audio player object
+ */
+function createAudioPlayerForPlayer(playerName) {
+ return {
+ chunks: [], // Queue of audio chunks
+ sources: [], // Active audio sources
+ isPlaying: false,
+ currentIndex: 0 // Current playback position
+ };
+}
+
+/**
+ * Add audio chunk to player's queue.
+ *
+ * @param {object} audioPlayer - Audio player object
+ * @param {ArrayBuffer} audioData - Audio data
+ */
+function addAudioChunk(audioPlayer, audioData) {
+ audioPlayer.chunks.push(audioData);
+}
+
+/**
+ * Play audio from queue.
+ *
+ * @param {object} audioPlayer - Audio player object
+ * @param {string} playerName - Player name for visual feedback
+ */
+async function playAudio(audioPlayer, playerName) {
+ if (audioPlayer.isPlaying || audioPlayer.chunks.length === 0) {
+ return;
+ }
+
+ audioPlayer.isPlaying = true;
+ highlightPlayer(playerName);
+
+ // Play chunks from current index to end
+ while (audioPlayer.currentIndex < audioPlayer.chunks.length && audioPlayer.isPlaying) {
+ const chunk = audioPlayer.chunks[audioPlayer.currentIndex];
+ audioPlayer.currentIndex++;
+ await playAudioChunk(chunk, audioPlayer);
+
+ if (!audioPlayer.isPlaying) {
+ break;
+ }
+ }
+
+ // Playback completed
+ audioPlayer.isPlaying = false;
+ audioPlayer.currentIndex = 0; // Reset index
+ audioPlayer.chunks = []; // Clear processed chunks
+ unhighlightPlayer(playerName);
+
+ // Mark current speaker finished
+ if (currentSpeakingPlayer === playerName) {
+ currentSpeakingPlayer = null;
+ }
+
+ // Start next waiting player if any
+ while (pendingSpeakingPlayers.length > 0) {
+ const nextPlayerName = pendingSpeakingPlayers.shift();
+ const nextAudioPlayer = playerAudioPlayers.get(nextPlayerName);
+ if (nextAudioPlayer && nextAudioPlayer.chunks.length > 0) {
+ currentSpeakingPlayer = nextPlayerName;
+ if (!nextAudioPlayer.isPlaying) {
+ // Fire-and-forget, chaining will continue when this playback finishes
+ playAudio(nextAudioPlayer, nextPlayerName);
+ }
+ break;
+ }
+ }
+}
+
+/**
+ * Play a single audio chunk.
+ *
+ * @param {ArrayBuffer} audioData - Audio data
+ * @param {object} audioPlayer - Audio player object
+ * @returns {Promise} Promise that resolves when chunk finishes playing
+ */
+async function playAudioChunk(audioData, audioPlayer) {
+ return new Promise((resolve, reject) => {
+ if (!audioPlayer.isPlaying) {
+ resolve();
+ return;
+ }
+
+ try {
+ // Try to decode as PCM
+ playRawPCM(audioData, audioPlayer).then(resolve).catch(reject);
+ } catch (e) {
+ reject(e);
+ }
+ });
+}
+
+/**
+ * Play raw PCM audio data.
+ *
+ * @param {ArrayBuffer} data - PCM audio data
+ * @param {object} audioPlayer - Audio player object
+ * @returns {Promise} Promise that resolves when playback finishes
+ */
+async function playRawPCM(data, audioPlayer) {
+ return new Promise((resolve, reject) => {
+ if (!audioPlayer.isPlaying) {
+ resolve();
+ return;
+ }
+
+ try {
+ const pcmData = new Int16Array(data);
+ const floatData = new Float32Array(pcmData.length);
+ for (let i = 0; i < pcmData.length; i++) {
+ floatData[i] = pcmData[i] / 32768.0;
+ }
+
+ const audioBuffer = audioContext.createBuffer(1, floatData.length, 24000);
+ audioBuffer.getChannelData(0).set(floatData);
+
+ if (!audioPlayer.isPlaying) {
+ resolve();
+ return;
+ }
+
+ const source = audioContext.createBufferSource();
+ source.buffer = audioBuffer;
+ source.connect(audioContext.destination);
+ audioPlayer.sources.push(source);
+
+ source.onended = () => {
+ const index = audioPlayer.sources.indexOf(source);
+ if (index > -1) {
+ audioPlayer.sources.splice(index, 1);
+ }
+ resolve();
+ };
+
+ if (audioPlayer.isPlaying) {
+ source.start();
+ } else {
+ resolve();
+ }
+ } catch (e) {
+ reject(e);
+ }
+ });
+}
+
+/**
+ * Convert base64 string to ArrayBuffer.
+ *
+ * @param {string} base64 - Base64 encoded string
+ * @returns {ArrayBuffer} Decoded array buffer
+ */
+function base64ToArrayBuffer(base64) {
+ const binaryString = atob(base64);
+ const bytes = new Uint8Array(binaryString.length);
+ for (let i = 0; i < binaryString.length; i++) {
+ bytes[i] = binaryString.charCodeAt(i);
+ }
+ return bytes.buffer;
+}