diff --git a/agentscope-core/src/main/java/io/agentscope/core/model/tts/Qwen3TTSFlashVoice.java b/agentscope-core/src/main/java/io/agentscope/core/model/tts/Qwen3TTSFlashVoice.java
new file mode 100644
index 000000000..f5f7aac54
--- /dev/null
+++ b/agentscope-core/src/main/java/io/agentscope/core/model/tts/Qwen3TTSFlashVoice.java
@@ -0,0 +1,238 @@
+/*
+ * Copyright 2024-2026 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.agentscope.core.model.tts;
+
+import java.util.Locale;
+import java.util.Random;
+import java.util.concurrent.ThreadLocalRandom;
+
+/**
+ * Predefined voices for Qwen3 TTS Flash / Realtime models.
+ *
+ * <p>The {@code voiceId} values correspond to the {@code voice} parameter
+ * accepted by qwen3-tts-flash and qwen3-tts-flash-realtime.
+ */
+public enum Qwen3TTSFlashVoice {
+
+    /**
+     * 芊悦 (Cherry) - A sunny, positive, friendly, and natural young woman.
+     */
+    CHERRY("Cherry", "芊悦", Gender.FEMALE, "A sunny, positive, friendly, and natural young woman"),
+
+    /**
+     * 晨煦 (Ethan) - A bright, warm, energetic, and vibrant male voice with a standard Mandarin pronunciation and a slight northern accent.
+     */
+    ETHAN(
+            "Ethan",
+            "晨煦",
+            Gender.MALE,
+            "A bright, warm, energetic, and vibrant male voice with a standard Mandarin"
+                    + " pronunciation and a slight northern accent"),
+
+    /**
+     * 不吃鱼 (Nofish) - A male designer who cannot pronounce retroflex sounds.
+     */
+    NOFISH("Nofish", "不吃鱼", Gender.MALE, "A male designer who cannot pronounce retroflex sounds"),
+
+    /**
+     * 詹妮弗 (Jennifer) - A premium, cinematic American English female voice.
+     */
+    JENNIFER(
+            "Jennifer", "詹妮弗", Gender.FEMALE, "A premium, cinematic American English female voice"),
+
+    /**
+     * 甜茶 (Ryan) - A rhythmic and dramatic voice with a sense of realism and tension.
+     */
+    RYAN(
+            "Ryan",
+            "甜茶",
+            Gender.MALE,
+            "A rhythmic and dramatic voice with a sense of realism and tension"),
+
+    /**
+     * 卡捷琳娜 (Katerina) - A mature female voice with a rich rhythm and lingering resonance.
+     */
+    KATERINA(
+            "Katerina",
+            "卡捷琳娜",
+            Gender.FEMALE,
+            "A mature female voice with a rich rhythm and lingering resonance"),
+
+    /**
+     * 墨讲师 (Elias) - A voice that maintains academic rigor while using storytelling techniques to transform complex knowledge into digestible cognitive modules.
+     */
+    ELIAS(
+            "Elias",
+            "墨讲师",
+            Gender.MALE,
+            "A voice that maintains academic rigor while using storytelling techniques to transform"
+                    + " complex knowledge into digestible cognitive modules"),
+
+    /**
+     * 上海-阿珍 (Jada) - An energetic woman from Shanghai.
+     */
+    JADA("Jada", "上海-阿珍", Gender.FEMALE, "An energetic woman from Shanghai"),
+
+    /**
+     * 北京-晓东 (Dylan) - A teenage boy who grew up in the hutongs of Beijing.
+     */
+    DYLAN("Dylan", "北京-晓东", Gender.MALE, "A teenage boy who grew up in the hutongs of Beijing"),
+
+    /**
+     * 四川-晴儿 (Sunny) - The voice of a Sichuan girl whose sweetness melts your heart.
+     */
+    SUNNY(
+            "Sunny",
+            "四川-晴儿",
+            Gender.FEMALE,
+            "The voice of a Sichuan girl whose sweetness melts your heart"),
+
+    /**
+     * 南京-老李 (li) - Patient male yoga instructor.
+     */
+    LI("li", "南京-老李", Gender.MALE, "Patient male yoga instructor"),
+
+    /**
+     * 陕西-秦川 (Marcus) - A voice that is broad-faced and brief-spoken, sincere-hearted and deep-voiced—the authentic flavor of Shaanxi.
+     */
+    MARCUS(
+            "Marcus",
+            "陕西-秦川",
+            Gender.MALE,
+            "A voice that is broad-faced and brief-spoken, sincere-hearted and deep-voiced—the"
+                    + " authentic flavor of Shaanxi"),
+
+    /**
+     * 闽南-阿杰 (Roy) - The voice of a humorous, straightforward, and lively young Taiwanese man.
+     */
+    ROY(
+            "Roy",
+            "闽南-阿杰",
+            Gender.MALE,
+            "The voice of a humorous, straightforward, and lively young Taiwanese man"),
+
+    /**
+     * 天津-李彼得 (Peter) - The voice of a professional straight man in Tianjin crosstalk.
+     */
+    PETER(
+            "Peter",
+            "天津-李彼得",
+            Gender.MALE,
+            "The voice of a professional straight man in Tianjin crosstalk"),
+
+    /**
+     * 粤语-阿强 (Rocky) - The voice of the humorous and witty Rocky, here for online chatting.
+     */
+    ROCKY(
+            "Rocky",
+            "粤语-阿强",
+            Gender.MALE,
+            "The voice of the humorous and witty Rocky, here for online chatting"),
+
+    /**
+     * 粤语-阿清 (Kiki) - A sweet female companion from Hong Kong.
+     */
+    KIKI("Kiki", "粤语-阿清", Gender.FEMALE, "A sweet female companion from Hong Kong"),
+
+    /**
+     * 四川-程川 (Eric) - An unconventional man from Chengdu, Sichuan.
+     */
+    ERIC("Eric", "四川-程川", Gender.MALE, "An unconventional man from Chengdu, Sichuan");
+
+    private final String voiceId;
+    private final String displayName;
+    private final Gender gender;
+    private final String description;
+
+    Qwen3TTSFlashVoice(String voiceId, String displayName, Gender gender, String description) {
+        this.voiceId = voiceId;
+        this.displayName = displayName;
+        this.gender = gender;
+        this.description = description;
+    }
+
+    /**
+     * Voice id to use as the {@code voice} parameter in DashScope TTS requests.
+     */
+    public String getVoiceId() {
+        return voiceId;
+    }
+
+    /**
+     * Human friendly display name (typically Chinese).
+     */
+    public String getDisplayName() {
+        return displayName;
+    }
+
+    /**
+     * Gender of this voice (for informational / filtering purposes).
+     */
+    public Gender getGender() {
+        return gender;
+    }
+
+    /**
+     * Short description of the voice characteristics.
+     */
+    public String getDescription() {
+        return description;
+    }
+
+    /**
+     * Find a voice enum by its voiceId (case-insensitive).
+     *
+     * @param voiceId the voice id string, e.g. "Cherry"
+     * @return matching enum value, or {@code null} if not found
+     */
+    public static Qwen3TTSFlashVoice fromVoiceId(String voiceId) {
+        if (voiceId == null || voiceId.isEmpty()) {
+            return null;
+        }
+        String normalized = voiceId.toLowerCase(Locale.ROOT);
+        for (Qwen3TTSFlashVoice v : values()) {
+            if (v.voiceId.toLowerCase(Locale.ROOT).equals(normalized)) {
+                return v;
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Pick a random voice using {@link ThreadLocalRandom}.
+     */
+    public static Qwen3TTSFlashVoice random() {
+        return random(ThreadLocalRandom.current());
+    }
+
+    /**
+     * Pick a random voice using the provided {@link Random} instance.
+     */
+    public static Qwen3TTSFlashVoice random(Random random) {
+        Qwen3TTSFlashVoice[] all = values();
+        if (all.length == 0) {
+            throw new IllegalStateException("No Qwen3TTSFlashVoice defined");
+        }
+        int idx = random.nextInt(all.length);
+        return all[idx];
+    }
+
+    /** Simple gender enum for voices. */
+    public enum Gender {
+        MALE,
+        FEMALE
+    }
+}
diff --git a/agentscope-core/src/test/java/io/agentscope/core/model/tts/Qwen3TTSFlashVoiceTest.java b/agentscope-core/src/test/java/io/agentscope/core/model/tts/Qwen3TTSFlashVoiceTest.java
new file mode 100644
index 000000000..8a52530ca
--- /dev/null
+++ b/agentscope-core/src/test/java/io/agentscope/core/model/tts/Qwen3TTSFlashVoiceTest.java
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2024-2026 the original author or authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package io.agentscope.core.model.tts;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
+import org.junit.jupiter.api.DisplayName;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Unit tests for Qwen3TTSFlashVoice enum.
+ */
+class Qwen3TTSFlashVoiceTest {
+
+    @Test
+    @DisplayName("should have 17 voice constants defined")
+    void shouldHave17Voices() {
+        assertEquals(17, Qwen3TTSFlashVoice.values().length);
+    }
+
+    @Test
+    @DisplayName("should have correct voiceId for CHERRY")
+    void shouldHaveCorrectVoiceIdForCherry() {
+        assertEquals("Cherry", Qwen3TTSFlashVoice.CHERRY.getVoiceId());
+        assertEquals("芊悦", Qwen3TTSFlashVoice.CHERRY.getDisplayName());
+        assertEquals(Qwen3TTSFlashVoice.Gender.FEMALE, Qwen3TTSFlashVoice.CHERRY.getGender());
+        assertNotNull(Qwen3TTSFlashVoice.CHERRY.getDescription());
+    }
+
+    @Test
+    @DisplayName("should have correct voiceId for ETHAN")
+    void shouldHaveCorrectVoiceIdForEthan() {
+        assertEquals("Ethan", Qwen3TTSFlashVoice.ETHAN.getVoiceId());
+        assertEquals("晨煦", Qwen3TTSFlashVoice.ETHAN.getDisplayName());
+        assertEquals(Qwen3TTSFlashVoice.Gender.MALE, Qwen3TTSFlashVoice.ETHAN.getGender());
+        assertNotNull(Qwen3TTSFlashVoice.ETHAN.getDescription());
+    }
+
+    @Test
+    @DisplayName("should have correct gender for ELIAS")
+    void shouldHaveCorrectGenderForElias() {
+        assertEquals("Elias", Qwen3TTSFlashVoice.ELIAS.getVoiceId());
+        assertEquals(Qwen3TTSFlashVoice.Gender.MALE, Qwen3TTSFlashVoice.ELIAS.getGender());
+    }
+
+    @Test
+    @DisplayName("should find voice by voiceId case-insensitively")
+    void shouldFindVoiceByVoiceId() {
+        assertEquals(Qwen3TTSFlashVoice.CHERRY, Qwen3TTSFlashVoice.fromVoiceId("Cherry"));
+        assertEquals(Qwen3TTSFlashVoice.CHERRY, Qwen3TTSFlashVoice.fromVoiceId("cherry"));
+        assertEquals(Qwen3TTSFlashVoice.CHERRY, Qwen3TTSFlashVoice.fromVoiceId("CHERRY"));
+
+        assertEquals(Qwen3TTSFlashVoice.ETHAN, Qwen3TTSFlashVoice.fromVoiceId("Ethan"));
+        assertEquals(Qwen3TTSFlashVoice.LI, Qwen3TTSFlashVoice.fromVoiceId("li"));
+        assertEquals(Qwen3TTSFlashVoice.KIKI, Qwen3TTSFlashVoice.fromVoiceId("Kiki"));
+    }
+
+    @Test
+    @DisplayName("should return null for non-existent voiceId")
+    void shouldReturnNullForNonExistentVoiceId() {
+        assertNull(Qwen3TTSFlashVoice.fromVoiceId("NonExistent"));
+        assertNull(Qwen3TTSFlashVoice.fromVoiceId("Unknown"));
+    }
+
+    @Test
+    @DisplayName("should return null for null or empty voiceId")
+    void shouldReturnNullForNullOrEmptyVoiceId() {
+        assertNull(Qwen3TTSFlashVoice.fromVoiceId(null));
+        assertNull(Qwen3TTSFlashVoice.fromVoiceId(""));
+    }
+
+    @Test
+    @DisplayName("should return random voice using ThreadLocalRandom")
+    void shouldReturnRandomVoice() {
+        Qwen3TTSFlashVoice voice1 = Qwen3TTSFlashVoice.random();
+        assertNotNull(voice1);
+
+        // Call multiple times to verify randomness (not guaranteed to be different but should
+        // work)
+        Set<Qwen3TTSFlashVoice> voices = new HashSet<>();
+        for (int i = 0; i < 50; i++) {
+            voices.add(Qwen3TTSFlashVoice.random());
+        }
+        // With 17 voices and 50 calls, we should get at least 2 different voices
+        assertTrue(voices.size() >= 2);
+    }
+
+    @Test
+    @DisplayName("should return random voice using provided Random instance")
+    void shouldReturnRandomVoiceWithProvidedRandom() {
+        Random random = new Random(12345); // Fixed seed for reproducibility
+        Qwen3TTSFlashVoice voice1 = Qwen3TTSFlashVoice.random(random);
+        assertNotNull(voice1);
+
+        // Reset random with same seed to get same result
+        random = new Random(12345);
+        Qwen3TTSFlashVoice voice2 = Qwen3TTSFlashVoice.random(random);
+        assertEquals(voice1, voice2);
+    }
+
+    @Test
+    @DisplayName("should have all voices with non-null properties")
+    void shouldHaveAllVoicesWithNonNullProperties() {
+        for (Qwen3TTSFlashVoice voice : Qwen3TTSFlashVoice.values()) {
+            assertNotNull(voice.getVoiceId(), "voiceId should not be null for " + voice);
+            assertNotNull(voice.getDisplayName(), "displayName should not be null for " + voice);
+            assertNotNull(voice.getGender(), "gender should not be null for " + voice);
+            assertNotNull(voice.getDescription(), "description should not be null for " + voice);
+        }
+    }
+
+    @Test
+    @DisplayName("should have unique voiceIds for all voices")
+    void shouldHaveUniqueVoiceIds() {
+        Set<String> voiceIds = new HashSet<>();
+        for (Qwen3TTSFlashVoice voice : Qwen3TTSFlashVoice.values()) {
+            assertTrue(
+                    voiceIds.add(voice.getVoiceId()),
+                    "Duplicate voiceId found: " + voice.getVoiceId());
+        }
+        assertEquals(17, voiceIds.size());
+    }
+
+    @Test
+    @DisplayName("Gender enum should have MALE and FEMALE")
+    void genderEnumShouldHaveMaleAndFemale() {
+        assertEquals(2, Qwen3TTSFlashVoice.Gender.values().length);
+        assertEquals(Qwen3TTSFlashVoice.Gender.MALE, Qwen3TTSFlashVoice.Gender.valueOf("MALE"));
+        assertEquals(Qwen3TTSFlashVoice.Gender.FEMALE, Qwen3TTSFlashVoice.Gender.valueOf("FEMALE"));
+    }
+
+    @Test
+    @DisplayName("should have correct distribution of male and female voices")
+    void shouldHaveCorrectGenderDistribution() {
+        int maleCount = 0;
+        int femaleCount = 0;
+        for (Qwen3TTSFlashVoice voice : Qwen3TTSFlashVoice.values()) {
+            if (voice.getGender() == Qwen3TTSFlashVoice.Gender.MALE) {
+                maleCount++;
+            } else if (voice.getGender() == Qwen3TTSFlashVoice.Gender.FEMALE) {
+                femaleCount++;
+            }
+        }
+        assertEquals(17, maleCount + femaleCount, "Total male + female should equal total voices");
+        assertTrue(maleCount > 0, "Should have at least one male voice");
+        assertTrue(femaleCount > 0, "Should have at least one female voice");
+    }
+}
diff --git a/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEvent.java b/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEvent.java
index 7a0a771be..e785ce523 100644
--- a/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEvent.java
+++ b/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEvent.java
@@ -166,4 +166,16 @@ public static GameEvent userInputReceived(String inputType, String content) {
                 GameEventType.USER_INPUT_RECEIVED,
                 Map.of("inputType", inputType, "content", content));
     }
+
+    /**
+     * Create an audio chunk event for TTS.
+     *
+     * @param playerName The name of the player speaking
+     * @param audioBase64 Base64 encoded audio data
+     * @return The event
+     */
+    public static GameEvent audioChunk(String playerName, String audioBase64) {
+        return new GameEvent(
+                GameEventType.AUDIO_CHUNK, Map.of("player", playerName, "audio", audioBase64));
+    }
 }
diff --git a/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEventEmitter.java b/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEventEmitter.java
index c38c35236..9fc7d455c 100644
--- a/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEventEmitter.java
+++ b/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEventEmitter.java
@@ -363,6 +363,19 @@ public void emitUserInputReceived(String inputType, String content) {
         playerSink.tryEmitNext(event);
     }
 
+    /**
+     * Emit an audio chunk for TTS.
+     * Audio is always public (everyone can hear day discussion).
+     *
+     * @param playerName The name of the player speaking
+     * @param audioBase64 Base64 encoded audio data
+     */
+    public void emitAudioChunk(String playerName, String audioBase64) {
+        GameEvent event = GameEvent.audioChunk(playerName, audioBase64);
+        godViewHistory.add(event);
+        playerSink.tryEmitNext(event);
+    }
+
     /**
      * Get the player event stream as a Flux.
      * This stream contains events visible to the human player based on their role.
diff --git a/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEventType.java b/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEventType.java
index 758bd3fb9..02305009f 100644
--- a/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEventType.java
+++ b/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/GameEventType.java
@@ -59,5 +59,8 @@ public enum GameEventType {
     WAIT_USER_INPUT,
 
     /** User input received confirmation. */
-    USER_INPUT_RECEIVED
+    USER_INPUT_RECEIVED,
+
+    /** Audio chunk for TTS (text-to-speech). */
+    AUDIO_CHUNK
 }
diff --git a/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/WerewolfWebGame.java b/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/WerewolfWebGame.java
index d883d52a4..4eaac57eb 100644
--- a/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/WerewolfWebGame.java
+++ b/agentscope-examples/werewolf-hitl/src/main/java/io/agentscope/examples/werewolf/web/WerewolfWebGame.java
@@ -23,6 +23,7 @@
 import io.agentscope.core.agent.user.UserAgent;
 import io.agentscope.core.formatter.dashscope.DashScopeMultiAgentFormatter;
 import io.agentscope.core.memory.InMemoryMemory;
+import io.agentscope.core.message.Base64Source;
 import io.agentscope.core.message.MessageMetadataKeys;
 import io.agentscope.core.message.Msg;
 import io.agentscope.core.message.MsgRole;
@@ -30,6 +31,8 @@
 import io.agentscope.core.model.DashScopeChatModel;
 import io.agentscope.core.model.GenerateOptions;
 import io.agentscope.core.model.StructuredOutputReminder;
+import io.agentscope.core.model.tts.DashScopeRealtimeTTSModel;
+import io.agentscope.core.model.tts.Qwen3TTSFlashVoice;
 import io.agentscope.core.pipeline.MsgHub;
 import io.agentscope.core.tool.Toolkit;
 import io.agentscope.examples.werewolf.GameConfiguration;
@@ -79,6 +82,8 @@ public class WerewolfWebGame {
     private DashScopeChatModel model;
     private GameState gameState;
     private Player humanPlayer;
+    // Mapping from player name to assigned TTS voice (randomized per game)
+    private Map<String, Qwen3TTSFlashVoice> playerVoices;
 
     public WerewolfWebGame(GameEventEmitter emitter, LocalizationBundle bundle) {
         this(emitter, bundle, null, null, new GameConfiguration());
@@ -165,6 +170,9 @@ public void start() throws Exception {
     }
 
     private GameState initializeGame() {
+        // Initialize per-game TTS voice mapping
+        playerVoices = new HashMap<>();
+
         List<Role> roles = new ArrayList<>();
         for (int i = 0; i < gameConfig.getVillagerCount(); i++) roles.add(Role.VILLAGER);
         for (int i = 0; i < gameConfig.getWerewolfCount(); i++) roles.add(Role.WEREWOLF);
@@ -305,6 +313,15 @@ private GameState initializeGame() {
                     teammates);
         }
 
+        // Assign random TTS voice to each player (independent of roles)
+        List<Qwen3TTSFlashVoice> voices = new ArrayList<>(List.of(Qwen3TTSFlashVoice.values()));
+        Collections.shuffle(voices);
+        for (int i = 0; i < players.size(); i++) {
+            Player player = players.get(i);
+            Qwen3TTSFlashVoice voice = voices.get(i % voices.size());
+            playerVoices.put(player.getName(), voice);
+        }
+
         return new GameState(players);
     }
 
@@ -455,7 +472,7 @@ private Player werewolvesKill() {
                     try {
                         VoteModel voteData = vote.getStructuredData(VoteModel.class);
                         emitter.emitPlayerVote(
-                                vote.getName(),
+                                werewolf.getName(),
                                 voteData.targetPlayer,
                                 voteData.reason,
                                 EventVisibility.WEREWOLF_ONLY);
@@ -872,6 +889,9 @@ private void discussionPhase() {
                         Msg response = player.getAgent().call().block();
                         String content = utils.extractTextContent(response);
                         emitter.emitPlayerSpeak(player.getName(), content, "day_discussion");
+
+                        // Generate TTS for AI speech (only during day discussion)
+                        generateTTSForSpeech(player.getName(), content);
                     }
                 }
             }
@@ -946,7 +966,7 @@ private Player votingPhase() {
                     try {
                         VoteModel voteData = vote.getStructuredData(VoteModel.class);
                         emitter.emitPlayerVote(
-                                vote.getName(),
+                                player.getName(),
                                 voteData.targetPlayer,
                                 voteData.reason,
                                 EventVisibility.PUBLIC);
@@ -1135,4 +1155,73 @@ private void emitStatsUpdate() {
                 gameState.getAliveWerewolves().size(),
                 gameState.getAliveVillagers().size());
     }
+
+    /**
+     * Generate TTS audio for a player's speech and emit audio chunks to frontend.
+     * Only called during day discussion phase to avoid generating TTS for votes/actions.
+     *
+     * @param playerName The name of the speaking player
+     * @param text The text content to convert to speech
+     */
+    private void generateTTSForSpeech(String playerName, String text) {
+        if (text == null || text.trim().isEmpty()) {
+            return;
+        }
+
+        String apiKey = System.getenv("DASHSCOPE_API_KEY");
+        if (apiKey == null || apiKey.isEmpty()) {
+            // Skip TTS if no API key
+            return;
+        }
+
+        // Resolve voice for this player (fallback to a default if not assigned)
+        Qwen3TTSFlashVoice voice = playerVoices != null ? playerVoices.get(playerName) : null;
+        if (voice == null) {
+            voice = Qwen3TTSFlashVoice.CHERRY;
+        }
+
+        // Create TTS model for this specific speech
+        DashScopeRealtimeTTSModel ttsModel = null;
+        try {
+            ttsModel =
+                    DashScopeRealtimeTTSModel.builder()
+                            .apiKey(apiKey)
+                            .modelName("qwen3-tts-flash-realtime")
+                            .voice(voice.getVoiceId())
+                            .sampleRate(24000)
+                            .format("pcm")
+                            .build();
+
+            // Start session
+            ttsModel.startSession();
+
+            // Subscribe to audio stream and emit chunks
+            ttsModel.getAudioStream()
+                    .doOnNext(
+                            audio -> {
+                                if (audio.getSource() instanceof Base64Source src) {
+                                    emitter.emitAudioChunk(playerName, src.getData());
+                                }
+                            })
+                    .subscribe();
+
+            // Push text to TTS
+            ttsModel.push(text);
+
+            // Finish and wait for all audio
+            ttsModel.finish().blockLast();
+        } catch (Exception e) {
+            // Log error but don't fail the game
+            System.err.println("TTS generation error for " + playerName + ": " + e.getMessage());
+        } finally {
+            // Clean up TTS resources
+            if (ttsModel != null) {
+                try {
+                    ttsModel.close();
+                } catch (Exception e) {
+                    // Ignore cleanup errors
+                }
+            }
+        }
+    }
 }
diff --git a/agentscope-examples/werewolf-hitl/src/main/resources/static/js/app.js b/agentscope-examples/werewolf-hitl/src/main/resources/static/js/app.js
index 7b49ad93b..959e7fabd 100644
--- a/agentscope-examples/werewolf-hitl/src/main/resources/static/js/app.js
+++ b/agentscope-examples/werewolf-hitl/src/main/resources/static/js/app.js
@@ -24,6 +24,13 @@ let currentInputType = null;
 let selectedRole = 'RANDOM';
 let isSpectatorMode = false;
 
+// Audio state
+let audioContext = null;
+const playerAudioPlayers = new Map(); // Map<playerName, audioPlayer>
+// Global audio playback coordination (single speaker at a time)
+let currentSpeakingPlayer = null;
+const pendingSpeakingPlayers = []; // Queue of player names waiting to speak
+
 // Role icons mapping
 const roleIcons = {
     'VILLAGER': '👤',
@@ -342,6 +349,9 @@ function handleEvent(event) {
         case 'USER_INPUT_RECEIVED':
             handleUserInputReceived(data.inputType, data.content);
             break;
+        case 'AUDIO_CHUNK':
+            handleAudioChunk(data.player, data.audio);
+            break;
     }
 }
 
@@ -786,3 +796,220 @@ document.addEventListener('DOMContentLoaded', () => {
     }));
     renderPlayers();
 });
+
+// ==================== Audio Functions ====================
+/**
+ * Initialize audio context on first user interaction.
+ */
+function initAudio() {
+    if (!audioContext) {
+        audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 24000 });
+    }
+}
+
+/**
+ * Handle audio chunk event from backend.
+ *  
+ * @param {string} playerName - The name of the speaking player
+ * @param {string} audioBase64 - Base64 encoded audio data
+ */
+function handleAudioChunk(playerName, audioBase64) {
+    if (!audioBase64) return;
+
+    // Initialize audio context
+    initAudio();
+
+    // Get or create audio player for this player
+    let audioPlayer = playerAudioPlayers.get(playerName);
+    if (!audioPlayer) {
+        audioPlayer = createAudioPlayerForPlayer(playerName);
+        playerAudioPlayers.set(playerName, audioPlayer);
+    }
+
+    // Decode and add to playback queue
+    const audioData = base64ToArrayBuffer(audioBase64);
+    addAudioChunk(audioPlayer, audioData);
+
+    // Global coordination: only one player speaks at a time.
+    if (!currentSpeakingPlayer) {
+        // No one is speaking, start this player immediately
+        currentSpeakingPlayer = playerName;
+        if (!audioPlayer.isPlaying) {
+            playAudio(audioPlayer, playerName);
+        }
+    } else if (currentSpeakingPlayer === playerName) {
+        // Same player is already speaking, its queue will continue in playAudio
+    } else {
+        // Another player is speaking, enqueue this player if not already queued
+        if (!pendingSpeakingPlayers.includes(playerName)) {
+            pendingSpeakingPlayers.push(playerName);
+        }
+    }
+}
+
+/**
+ * Create an audio player for a specific player.
+ *
+ * @param {string} playerName - Player name
+ * @returns {object} Audio player object
+ */
+function createAudioPlayerForPlayer(playerName) {
+    return {
+        chunks: [],      // Queue of audio chunks
+        sources: [],     // Active audio sources
+        isPlaying: false,
+        currentIndex: 0  // Current playback position
+    };
+}
+
+/**
+ * Add audio chunk to player's queue.
+ *
+ * @param {object} audioPlayer - Audio player object
+ * @param {ArrayBuffer} audioData - Audio data
+ */
+function addAudioChunk(audioPlayer, audioData) {
+    audioPlayer.chunks.push(audioData);
+}
+
+/**
+ * Play audio from queue.
+ *
+ * @param {object} audioPlayer - Audio player object  
+ * @param {string} playerName - Player name for visual feedback
+ */
+async function playAudio(audioPlayer, playerName) {
+    if (audioPlayer.isPlaying || audioPlayer.chunks.length === 0) {
+        return;
+    }
+
+    audioPlayer.isPlaying = true;
+    highlightPlayer(playerName);
+
+    // Play chunks from current index to end
+    while (audioPlayer.currentIndex < audioPlayer.chunks.length && audioPlayer.isPlaying) {
+        const chunk = audioPlayer.chunks[audioPlayer.currentIndex];
+        audioPlayer.currentIndex++;
+        await playAudioChunk(chunk, audioPlayer);
+
+        if (!audioPlayer.isPlaying) {
+            break;
+        }
+    }
+
+    // Playback completed
+    audioPlayer.isPlaying = false;
+    audioPlayer.currentIndex = 0; // Reset index
+    audioPlayer.chunks = []; // Clear processed chunks
+    unhighlightPlayer(playerName);
+
+    // Mark current speaker finished
+    if (currentSpeakingPlayer === playerName) {
+        currentSpeakingPlayer = null;
+    }
+
+    // Start next waiting player if any
+    while (pendingSpeakingPlayers.length > 0) {
+        const nextPlayerName = pendingSpeakingPlayers.shift();
+        const nextAudioPlayer = playerAudioPlayers.get(nextPlayerName);
+        if (nextAudioPlayer && nextAudioPlayer.chunks.length > 0) {
+            currentSpeakingPlayer = nextPlayerName;
+            if (!nextAudioPlayer.isPlaying) {
+                // Fire-and-forget, chaining will continue when this playback finishes
+                playAudio(nextAudioPlayer, nextPlayerName);
+            }
+            break;
+        }
+    }
+}
+
+/**
+ * Play a single audio chunk.
+ *
+ * @param {ArrayBuffer} audioData - Audio data
+ * @param {object} audioPlayer - Audio player object
+ * @returns {Promise} Promise that resolves when chunk finishes playing
+ */
+async function playAudioChunk(audioData, audioPlayer) {
+    return new Promise((resolve, reject) => {
+        if (!audioPlayer.isPlaying) {
+            resolve();
+            return;
+        }
+
+        try {
+            // Try to decode as PCM
+            playRawPCM(audioData, audioPlayer).then(resolve).catch(reject);
+        } catch (e) {
+            reject(e);
+        }
+    });
+}
+
+/**
+ * Play raw PCM audio data.
+ *
+ * @param {ArrayBuffer} data - PCM audio data
+ * @param {object} audioPlayer - Audio player object
+ * @returns {Promise} Promise that resolves when playback finishes
+ */
+async function playRawPCM(data, audioPlayer) {
+    return new Promise((resolve, reject) => {
+        if (!audioPlayer.isPlaying) {
+            resolve();
+            return;
+        }
+
+        try {
+            const pcmData = new Int16Array(data);
+            const floatData = new Float32Array(pcmData.length);
+            for (let i = 0; i < pcmData.length; i++) {
+                floatData[i] = pcmData[i] / 32768.0;
+            }
+
+            const audioBuffer = audioContext.createBuffer(1, floatData.length, 24000);
+            audioBuffer.getChannelData(0).set(floatData);
+
+            if (!audioPlayer.isPlaying) {
+                resolve();
+                return;
+            }
+
+            const source = audioContext.createBufferSource();
+            source.buffer = audioBuffer;
+            source.connect(audioContext.destination);
+            audioPlayer.sources.push(source);
+
+            source.onended = () => {
+                const index = audioPlayer.sources.indexOf(source);
+                if (index > -1) {
+                    audioPlayer.sources.splice(index, 1);
+                }
+                resolve();
+            };
+
+            if (audioPlayer.isPlaying) {
+                source.start();
+            } else {
+                resolve();
+            }
+        } catch (e) {
+            reject(e);
+        }
+    });
+}
+
+/**
+ * Convert base64 string to ArrayBuffer.
+ *
+ * @param {string} base64 - Base64 encoded string
+ * @returns {ArrayBuffer} Decoded array buffer
+ */
+function base64ToArrayBuffer(base64) {
+    const binaryString = atob(base64);
+    const bytes = new Uint8Array(binaryString.length);
+    for (let i = 0; i < binaryString.length; i++) {
+        bytes[i] = binaryString.charCodeAt(i);
+    }
+    return bytes.buffer;
+}