From 20a42bd683ad7cd180b49c4df8b9940632181581 Mon Sep 17 00:00:00 2001 From: Paul Shippy Date: Mon, 16 Jun 2025 14:59:40 -0700 Subject: [PATCH 1/5] Working example --- .../console-ruby/audio_service.rb | 544 ++++++++++++++++++ .../sample-codes/console-ruby/nova_sonic.rb | 117 ++++ .../sample-codes/console-ruby/portaudio.rb | 251 ++++++++ 3 files changed, 912 insertions(+) create mode 100644 speech-to-speech/sample-codes/console-ruby/audio_service.rb create mode 100755 speech-to-speech/sample-codes/console-ruby/nova_sonic.rb create mode 100644 speech-to-speech/sample-codes/console-ruby/portaudio.rb diff --git a/speech-to-speech/sample-codes/console-ruby/audio_service.rb b/speech-to-speech/sample-codes/console-ruby/audio_service.rb new file mode 100644 index 00000000..cb5378f1 --- /dev/null +++ b/speech-to-speech/sample-codes/console-ruby/audio_service.rb @@ -0,0 +1,544 @@ +# frozen_string_literal: true + +require 'aws-sdk-bedrockruntime' +require 'securerandom' +require 'json' +require 'base64' +require 'concurrent' + +module NovaSonic + class AudioService + attr_reader :session_id, :logger + + class << self + def active_sessions + @active_sessions ||= {} + end + + def session_cleanup_in_progress + @session_cleanup_in_progress ||= Set.new + end + + def cleanup_in_progress?(session_id) + session_cleanup_in_progress.include?(session_id) + end + + def session_creation_mutexes + @session_creation_mutexes ||= {} + end + end + + def initialize(session_id, logger = Rails.logger) + @session_id = session_id + @logger = logger + + @inference_config = { + maxTokens: 10_000, + topP: 0.95, + temperature: 0.9 + } + + initialize_aws_client + end + + def setup_session + create_stream_session + initiate_session + @logger.info "Session setup completed successfully for #{session_id}" + self + rescue StandardError => e + @logger.error "Error in setup_session: #{e.message}" + @logger.error "Backtrace: #{e.backtrace.first(3).join('; ')}" + raise e + end + + def stop_audio_streaming + session_data = self.class.active_sessions[session_id] + return unless session_data + + begin + send_content_end if session_data[:input_stream] + + session_data[:is_audio_content_start_sent] = false + session_data[:audio_content_id] = SecureRandom.uuid + + @logger.info "Audio streaming stopped for session #{session_id}, session remains active" + rescue StandardError => e + @logger.error "Error stopping audio streaming: #{e.message}" + end + end + + def close_session + session_data = self.class.active_sessions[session_id] + return unless session_data + + self.class.session_cleanup_in_progress << session_id + + begin + send_content_end if session_data[:input_stream] + send_prompt_end if session_data[:input_stream] + send_session_end if session_data[:input_stream] + rescue StandardError => e + @logger.error "Error closing AWS streams: #{e.message}" + end + + self.class.active_sessions.delete(session_id) + self.class.session_creation_mutexes.delete(session_id) + self.class.session_cleanup_in_progress.delete(session_id) + + @logger.info "Session #{session_id} closed and cleaned up" + end + + def stream_audio(audio_data) + decoded_audio = Base64.decode64(audio_data) + stream_audio_data(decoded_audio) + end + + def stream_audio_data(audio_data) + return if audio_data.nil? || audio_data.empty? + + session_data = self.class.active_sessions[session_id] + unless session_data&.[](:is_active) + @logger.error "Session #{session_id} not found or inactive" + return + end + + @logger.debug "Streaming audio data: #{audio_data.length} bytes" + + if !session_data[:is_audio_content_start_sent] && session_data[:input_stream].present? + @logger.debug "Sending audio content start" + send_audio_content_start + session_data[:is_audio_content_start_sent] = true + end + + stream_audio_chunk(audio_data) + end + + def setup_system_prompt(content) + return false if content.blank? + + @custom_system_prompt = content + @logger.info "Custom system prompt stored for session #{session_id}" + true + end + + def on_event(event_type, &handler) + @event_handlers ||= {} + @event_handlers[event_type] = handler + self + end + + private + + def initialize_aws_client + client_options = { + region: 'us-east-1', + http_wire_trace: false, + enable_alpn: true + } + + @bedrock_runtime_client = Aws::BedrockRuntime::AsyncClient.new(client_options) + end + + def create_stream_session + session_mutex = self.class.session_creation_mutexes[session_id] ||= Mutex.new + + session_mutex.synchronize do + return if self.class.active_sessions[session_id] + + session_data = { + queue_mutex: Mutex.new, + prompt_name: SecureRandom.uuid, + inference_config: @inference_config, + is_active: true, + is_prompt_start_sent: false, + is_audio_content_start_sent: false, + audio_content_id: SecureRandom.uuid + } + + self.class.active_sessions[session_id] = session_data + end + end + + def initiate_session + session_data = self.class.active_sessions[session_id] + unless session_data + @logger.error "Stream session #{session_id} not found in active_sessions" + raise "Stream session #{session_id} not found" + end + + session_data[:queue_mutex].synchronize do + return if session_data[:initializing] + return if session_data[:is_active] && session_data[:input_stream] + + session_data[:initializing] = true + end + + begin + initialize_session_core(session_data) + @logger.info "Session #{session_id} initialization completed successfully" + rescue StandardError => e + @logger.error "Session #{session_id} initialization failed: #{e.class} - #{e.message}" + handle_initialization_error(e) + + raise e + ensure + session_data[:initializing] = false + end + end + + def initialize_session_core(session_data) + input_stream = Aws::BedrockRuntime::EventStreams::InvokeModelWithBidirectionalStreamInput.new + output_stream = Aws::BedrockRuntime::EventStreams::InvokeModelWithBidirectionalStreamOutput.new + + setup_output_stream_handlers(output_stream) + + prompt_id = SecureRandom.uuid + text_content_id = SecureRandom.uuid + audio_content_id = SecureRandom.uuid + + session_data[:prompt_name] = prompt_id + session_data[:audio_content_id] = audio_content_id + + async_resp = @bedrock_runtime_client.invoke_model_with_bidirectional_stream( + model_id: 'amazon.nova-sonic-v1:0', + input_event_stream_handler: input_stream, + output_event_stream_handler: output_stream + ) + + session_data[:input_stream] = input_stream + session_data[:async_resp] = async_resp + + send_initial_events(input_stream, prompt_id, text_content_id, session_data[:audio_content_id]) + + session_data[:is_active] = true + session_data[:is_prompt_start_sent] = true + session_data[:is_audio_content_start_sent] = true + end + + def setup_output_stream_handlers(output_stream) + output_stream.on_event do |event| + handle_aws_event(event) + rescue StandardError => e + @logger.error "Error handling AWS event: #{e.message}" + end + end + + def handle_aws_event(event) + if event.is_a?(Hash) && event[:event_type] == :chunk && event[:bytes] + begin + response_data = JSON.parse(event[:bytes]) + + if response_data['event'] + event_type = response_data['event'].keys.first + event_data = response_data['event'][event_type] + + dispatch_event(event_type, event_data) + end + rescue JSON::ParserError => e + @logger.error "Failed to parse JSON response: #{e.message}" + end + elsif event.respond_to?(:event_type) && event.event_type == :chunk && event.respond_to?(:bytes) + begin + response_data = JSON.parse(event.bytes) + + if response_data['event'] + event_type = response_data['event'].keys.first + event_data = response_data['event'][event_type] + + dispatch_event(event_type, event_data) + end + rescue JSON::ParserError => e + @logger.error "Failed to parse JSON response: #{e.message}" + end + end + end + + def dispatch_event(event_type, data) + @logger.debug "Dispatching event: #{event_type}" + @event_handlers[event_type]&.call(data) + + return unless @event_handlers&.[]('any') + + @event_handlers['any'].call({ type: event_type, data: }) + end + + def send_initial_events(input_stream, prompt_id, text_content_id, audio_content_id) + events = build_initial_events(prompt_id, text_content_id, audio_content_id) + send_events_to_stream(input_stream, events) + end + + def build_initial_events(prompt_id, text_content_id, audio_content_id) + system_prompt = @custom_system_prompt || default_system_prompt + + [ + build_session_start_event, + build_prompt_start_event(prompt_id), + build_text_content_start_event(prompt_id, text_content_id), + build_system_prompt_event(prompt_id, text_content_id, system_prompt), + build_text_content_end_event(prompt_id, text_content_id), + build_audio_content_start_event(prompt_id, audio_content_id) + ] + end + + def send_events_to_stream(input_stream, events) + events.each_with_index do |event, index| + input_stream.signal_chunk_event(bytes: event) + rescue StandardError => e + @logger.error "Error sending initial event #{index + 1}: #{e.message}" + raise "Failed to send initial event: #{e.message}" + end + end + + def build_session_start_event + { + event: { + sessionStart: { + inferenceConfiguration: @inference_config + } + } + }.to_json + end + + def build_prompt_start_event(prompt_id) + { + event: { + promptStart: { + promptName: prompt_id, + textOutputConfiguration: { + mediaType: 'text/plain' + }, + audioOutputConfiguration: { + mediaType: 'audio/lpcm', + sampleRateHertz: 16_000, + sampleSizeBits: 16, + channelCount: 1, + voiceId: 'en_us_tiffany', + encoding: 'base64', + audioType: 'SPEECH' + }, + toolUseOutputConfiguration: { + mediaType: 'application/json' + }, + toolConfiguration: { + tools: [] + } + } + } + }.to_json + end + + def build_text_content_start_event(prompt_id, text_content_id) + { + event: { + contentStart: { + promptName: prompt_id, + contentName: text_content_id, + type: 'TEXT', + interactive: true, + textInputConfiguration: { + mediaType: 'text/plain' + } + } + } + }.to_json + end + + def build_system_prompt_event(prompt_id, text_content_id, system_prompt) + { + event: { + textInput: { + promptName: prompt_id, + contentName: text_content_id, + content: system_prompt, + role: 'SYSTEM' + } + } + }.to_json + end + + def build_text_content_end_event(prompt_id, text_content_id) + { + event: { + contentEnd: { + promptName: prompt_id, + contentName: text_content_id + } + } + }.to_json + end + + def build_audio_content_start_event(prompt_id, audio_content_id) + { + event: { + contentStart: { + promptName: prompt_id, + contentName: audio_content_id, + type: 'AUDIO', + role: 'USER', + interactive: true, + audioInputConfiguration: { + mediaType: 'audio/lpcm', + sampleRateHertz: 16_000, + sampleSizeBits: 16, + channelCount: 1, + audioType: 'SPEECH', + encoding: 'base64' + } + } + } + }.to_json + end + + def stream_audio_chunk(audio_data) + session_data = self.class.active_sessions[session_id] + + unless session_data&.[](:is_active) + @logger.error "Session #{session_id} not found or inactive" + @logger.error "Available sessions: #{self.class.active_sessions.keys}" + raise "Session not active or not found: #{session_id}" + end + + unless session_data[:input_stream] + @logger.error "No input stream available for session #{session_id}" + raise "No input stream available for session: #{session_id}" + end + + if audio_data.nil? || audio_data.empty? + @logger.error "Empty audio data received for session #{session_id}" + raise 'Empty audio data received' + end + + base64_data = Base64.strict_encode64(audio_data) + + audio_event = { + event: { + audioInput: { + promptName: session_data[:prompt_name], + contentName: session_data[:audio_content_id], + content: base64_data, + role: 'USER' + } + } + }.to_json + + begin + session_data[:input_stream].signal_chunk_event(bytes: audio_event) + rescue StandardError => e + @logger.error "Failed to send audio chunk: #{e.message}" + session_data[:is_active] = false + session_data[:input_stream] = nil + raise e + end + end + + def default_system_prompt + 'You are a friendly assistant. The user and you will engage in a spoken dialog ' \ + 'exchanging the transcripts of a natural real-time conversation.' \ + 'Keep your responses short, generally two or three sentences for chatty scenarios.' + end + + def handle_initialization_error(error) + @logger.error "Initialization error for #{session_id}: #{error.message}" + + session_data = self.class.active_sessions[session_id] + session_data[:is_active] = false if session_data + + begin + dispatch_event('error', { + source: 'initialization', + error: error.message + }) + rescue StandardError => e + @logger.error "Error dispatching initialization error event: #{e.message}" + end + end + + def send_audio_content_start + session_data = self.class.active_sessions[session_id] + return unless session_data&.[](:input_stream) + + event = { + event: { + contentStart: { + promptName: session_data[:prompt_name], + contentName: session_data[:audio_content_id], + type: 'AUDIO', + interactive: true, + audioInputConfiguration: { + mediaType: 'audio/lpcm', + sampleRateHertz: 16_000, + sampleSizeBits: 16, + channelCount: 1, + audioType: 'SPEECH', + encoding: 'base64' + } + } + } + }.to_json + + begin + session_data[:input_stream].signal_chunk_event(bytes: event) + rescue StandardError => e + @logger.error "Error sending audio content start: #{e.message}" + end + end + + def send_content_end + session_data = self.class.active_sessions[session_id] + return unless session_data&.[](:input_stream) + + event = { + event: { + contentEnd: { + promptName: session_data[:prompt_name], + contentName: session_data[:audio_content_id] + } + } + }.to_json + + begin + session_data[:input_stream].signal_chunk_event(bytes: event) + rescue StandardError => e + @logger.error "Error sending content end: #{e.message}" + end + end + + def send_prompt_end + session_data = self.class.active_sessions[session_id] + return unless session_data&.[](:input_stream) + + event = { + event: { + promptEnd: { + promptName: session_data[:prompt_name] + } + } + }.to_json + + begin + session_data[:input_stream].signal_chunk_event(bytes: event) + rescue StandardError => e + @logger.error "Error sending prompt end: #{e.message}" + end + end + + def send_session_end + session_data = self.class.active_sessions[session_id] + return unless session_data&.[](:input_stream) + + event = { + event: { + sessionEnd: {} + } + }.to_json + + begin + session_data[:input_stream].signal_chunk_event(bytes: event) + session_data[:input_stream].signal_end_stream + rescue StandardError => e + @logger.error "Error sending session end: #{e.message}" + end + end + end +end diff --git a/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb b/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb new file mode 100755 index 00000000..e139a48d --- /dev/null +++ b/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb @@ -0,0 +1,117 @@ +#!/usr/bin/env ruby +require_relative 'audio_service' +require_relative 'portaudio' +require 'base64' +require 'securerandom' +require 'logger' + +module NovaSonic + class SimpleExample + def initialize + @session_id = SecureRandom.uuid + @logger = Logger.new($stdout) + @logger.level = Logger::INFO + @audio_service = AudioService.new(@session_id, @logger) + @audio_handler = PortAudioHandler.new(@logger) + @running = false + end + + def start + setup_event_handlers + setup_audio_handlers + setup_session + start_audio_streams + end + + def stop + @running = false + @audio_handler.stop + @audio_service.close_session + end + + private + + def setup_event_handlers + @audio_service.on_event('textOutput') do |data| + @logger.info data.inspect + end + + @audio_service.on_event('audioOutput') do |data| + audio_data = Base64.decode64(data['content']) + @audio_handler.queue_audio_output(audio_data) + end + + @audio_service.on_event('error') do |data| + @logger.error "āŒ Error: #{data['error']}" + end + end + + def setup_audio_handlers + @audio_handler.on_audio_input do |raw_pcm| + unless @audio_handler.echo_cancellation_active? + @audio_service.stream_audio(Base64.strict_encode64(raw_pcm)) + end + end + end + + def setup_session + @audio_service.setup_session + end + + def start_audio_streams + @running = true + @audio_handler.start + + @logger.info "šŸŽ¤ Audio streaming started" + @logger.info "šŸ”Š Simple time-based echo cancellation" + end + end +end + +# Example usage: +if __FILE__ == $PROGRAM_NAME + example = NovaSonic::SimpleExample.new + shutdown_requested = false + + Signal.trap("INT") do + puts "\nšŸ›‘ IMMEDIATE SHUTDOWN REQUESTED" + shutdown_requested = true + # Give it 1 second to cleanup, then force exit + Thread.new do + sleep 1 + puts "🚨 FORCING EXIT NOW!" + exit!(0) + end + end + + Signal.trap("TERM") do + puts "\nšŸ›‘ TERMINATE SIGNAL - IMMEDIATE EXIT" + exit!(0) + end + + begin + puts "šŸš€ Starting Nova Sonic (Simple Echo Cancellation)..." + puts "šŸŽ¤ Should work for multiple conversations!" + puts "šŸ”Š 1-second echo cancellation after AI speech" + puts "Press Ctrl+C to stop" + + example.start + + until shutdown_requested + sleep 0.05 # Faster response + end + + rescue StandardError => e + puts "āŒ Error: #{e.message}" + ensure + puts "šŸ›‘ Attempting quick cleanup..." + begin + # Try cleanup but don't wait long + example.stop + sleep 0.2 # Give it 200ms max + rescue + # Ignore cleanup errors + end + puts "šŸ‘‹ Goodbye!" + end +end \ No newline at end of file diff --git a/speech-to-speech/sample-codes/console-ruby/portaudio.rb b/speech-to-speech/sample-codes/console-ruby/portaudio.rb new file mode 100644 index 00000000..8a00c357 --- /dev/null +++ b/speech-to-speech/sample-codes/console-ruby/portaudio.rb @@ -0,0 +1,251 @@ +#!/usr/bin/env ruby +require 'ffi-portaudio' +require 'base64' + +class PortAudioHandler + include FFI::PortAudio + + INPUT_SAMPLE_RATE = 16000 + OUTPUT_SAMPLE_RATE = 16000 + CHANNELS = 1 + FRAMES_PER_BUFFER = 2048 + + def initialize(logger) + @logger = logger + @running = false + @input_stream = nil + @output_stream = nil + @input_callback = nil + + # Much simpler buffering + @sample_buffer = [] + @buffer_mutex = Mutex.new + @playing_audio = false + @last_output_time = Time.now + + initialize_portaudio + end + + def on_audio_input(&block) + @input_callback = block + end + + def queue_audio_output(audio_data) + @buffer_mutex.synchronize do + # Convert raw PCM to samples and add to buffer + new_samples = audio_data.unpack('s*') + @sample_buffer.concat(new_samples) + @playing_audio = true + @last_output_time = Time.now + + @logger.debug "Queued #{new_samples.length} samples, buffer now #{@sample_buffer.length} samples" + end + end + + def start + return if @running + + @running = true + start_input_stream + start_output_stream + @logger.info "šŸŽµ Fixed PortAudio streams started" + end + + def stop + return unless @running + + @running = false + + # Force exit in background if this takes too long + timeout_thread = Thread.new do + sleep 0.5 + puts "🚨 AUDIO SHUTDOWN TIMEOUT - FORCING EXIT!" + exit!(0) + end + + begin + stop_streams + cleanup_portaudio + @logger.info "šŸ”‡ PortAudio streams stopped" + + # Cancel timeout + timeout_thread.kill + rescue + # If cleanup fails, just exit + puts "āš ļø Audio cleanup failed, forcing exit" + exit!(0) + end + end + + def echo_cancellation_active? + # Simple time-based approach: block input for 1 second after last audio output + @playing_audio && (Time.now - @last_output_time) < 1.0 + end + + private + + def initialize_portaudio + result = API.Pa_Initialize + if result == :paNoError + @logger.info "šŸŽ¤ PortAudio initialized" + else + @logger.error "āŒ Failed to initialize PortAudio: #{result}" + end + end + + def start_input_stream + @input_stream = InputStreamHandler.new(@logger, @input_callback, self) + + input_device = API.Pa_GetDefaultInputDevice + if input_device < 0 + @logger.error "āŒ No default input device found" + return + end + + input_params = API::PaStreamParameters.new + input_params[:device] = input_device + input_params[:channelCount] = CHANNELS + input_params[:sampleFormat] = API::Int16 + input_params[:suggestedLatency] = 0.05 + input_params[:hostApiSpecificStreamInfo] = nil + + @input_stream.open(input_params, nil, INPUT_SAMPLE_RATE, FRAMES_PER_BUFFER) + @input_stream.start + @logger.info "šŸŽ¤ Input stream started" + end + + def start_output_stream + @output_stream = OutputStreamHandler.new(@logger, @sample_buffer, @buffer_mutex, self) + + output_device = API.Pa_GetDefaultOutputDevice + if output_device < 0 + @logger.error "āŒ No default output device found" + return + end + + output_params = API::PaStreamParameters.new + output_params[:device] = output_device + output_params[:channelCount] = CHANNELS + output_params[:sampleFormat] = API::Int16 + output_params[:suggestedLatency] = 0.1 + output_params[:hostApiSpecificStreamInfo] = nil + + @output_stream.open(nil, output_params, OUTPUT_SAMPLE_RATE, FRAMES_PER_BUFFER) + @output_stream.start + @logger.info "šŸ”Š Output stream started" + end + + def stop_streams + @running = false + + begin + if @input_stream + @input_stream.stop rescue nil + @input_stream.close rescue nil + @input_stream = nil + end + rescue => e + @logger.error "Error stopping input stream: #{e.message}" + end + + begin + if @output_stream + @output_stream.stop rescue nil + @output_stream.close rescue nil + @output_stream = nil + end + rescue => e + @logger.error "Error stopping output stream: #{e.message}" + end + end + + def cleanup_portaudio + API.Pa_Terminate + end + + def mark_output_finished + @buffer_mutex.synchronize do + if @sample_buffer.empty? + @playing_audio = false + @logger.debug "Output finished - microphone re-enabled" + end + end + end + + # Input stream handler class + class InputStreamHandler < FFI::PortAudio::Stream + def initialize(logger, callback, parent_handler) + @logger = logger + @callback = callback + @parent_handler = parent_handler + @input_count = 0 + end + + def process(input, output, frameCount, timeInfo, statusFlags, userData) + @input_count += 1 + + # Log periodically + if @input_count % 1000 == 0 + echo_active = @parent_handler.echo_cancellation_active? + @logger.debug "Input callback ##{@input_count}, echo_cancellation=#{echo_active}" + end + + unless @parent_handler.echo_cancellation_active? + if input && frameCount > 0 + audio_data = input.read_array_of_int16(frameCount * CHANNELS) + raw_pcm = audio_data.pack('s*') + @callback.call(raw_pcm) if @callback + end + end + + :paContinue + end + end + + # Fixed output stream handler class + class OutputStreamHandler < FFI::PortAudio::Stream + def initialize(logger, sample_buffer, mutex, parent_handler) + @logger = logger + @sample_buffer = sample_buffer + @mutex = mutex + @parent_handler = parent_handler + @output_count = 0 + end + + def process(input, output, frameCount, timeInfo, statusFlags, userData) + @output_count += 1 + required_samples = frameCount * CHANNELS + + @mutex.synchronize do + if @sample_buffer.length >= required_samples + # We have enough samples + samples = @sample_buffer.shift(required_samples) + output.write_array_of_int16(samples) + + if @output_count % 100 == 0 + @logger.debug "Output ##{@output_count}: played #{required_samples} samples, #{@sample_buffer.length} remaining" + end + + elsif @sample_buffer.length > 0 + # Use what we have and pad with silence + available_samples = @sample_buffer.shift(@sample_buffer.length) + padding_needed = required_samples - available_samples.length + complete_samples = available_samples + [0] * padding_needed + output.write_array_of_int16(complete_samples) + + @logger.debug "Output ##{@output_count}: used #{available_samples.length} samples, padded #{padding_needed}" + + else + # Output silence + silence = [0] * required_samples + output.write_array_of_int16(silence) + + # Mark output as finished when buffer is empty + @parent_handler.mark_output_finished + end + end + + :paContinue + end + end +end \ No newline at end of file From 410c31ff2c137600d2bc5daebc2ecdd969816849 Mon Sep 17 00:00:00 2001 From: Paul Shippy Date: Mon, 16 Jun 2025 15:00:26 -0700 Subject: [PATCH 2/5] Dependencies --- .../sample-codes/console-ruby/Gemfile | 2 ++ .../sample-codes/console-ruby/Gemfile.lock | 32 +++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 speech-to-speech/sample-codes/console-ruby/Gemfile create mode 100644 speech-to-speech/sample-codes/console-ruby/Gemfile.lock diff --git a/speech-to-speech/sample-codes/console-ruby/Gemfile b/speech-to-speech/sample-codes/console-ruby/Gemfile new file mode 100644 index 00000000..6cdf9c6a --- /dev/null +++ b/speech-to-speech/sample-codes/console-ruby/Gemfile @@ -0,0 +1,2 @@ +gem 'aws-sdk-bedrockruntime' +gem 'ffi-portaudio' \ No newline at end of file diff --git a/speech-to-speech/sample-codes/console-ruby/Gemfile.lock b/speech-to-speech/sample-codes/console-ruby/Gemfile.lock new file mode 100644 index 00000000..398fb223 --- /dev/null +++ b/speech-to-speech/sample-codes/console-ruby/Gemfile.lock @@ -0,0 +1,32 @@ +GEM + specs: + aws-eventstream (1.4.0) + aws-partitions (1.1114.0) + aws-sdk-bedrockruntime (1.49.0) + aws-sdk-core (~> 3, >= 3.225.0) + aws-sigv4 (~> 1.5) + aws-sdk-core (3.225.1) + aws-eventstream (~> 1, >= 1.3.0) + aws-partitions (~> 1, >= 1.992.0) + aws-sigv4 (~> 1.9) + base64 + jmespath (~> 1, >= 1.6.1) + logger + aws-sigv4 (1.12.0) + aws-eventstream (~> 1, >= 1.0.2) + base64 (0.3.0) + ffi (1.17.2-arm64-darwin) + ffi-portaudio (0.1.3) + ffi + jmespath (1.6.2) + logger (1.7.0) + +PLATFORMS + arm64-darwin-24 + +DEPENDENCIES + aws-sdk-bedrockruntime + ffi-portaudio + +BUNDLED WITH + 2.6.9 From 4efe7bf6a86f034a1c217ca69252a7653d22ee40 Mon Sep 17 00:00:00 2001 From: Paul Shippy Date: Mon, 16 Jun 2025 15:06:36 -0700 Subject: [PATCH 3/5] Show role --- speech-to-speech/sample-codes/console-ruby/nova_sonic.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb b/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb index e139a48d..476bf5f8 100755 --- a/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb +++ b/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb @@ -33,7 +33,7 @@ def stop def setup_event_handlers @audio_service.on_event('textOutput') do |data| - @logger.info data.inspect + @logger.info "#{data['role']}: #{data['content']}" end @audio_service.on_event('audioOutput') do |data| From a82a9d11d96f5275ee3f85d4bd535b0047d5994b Mon Sep 17 00:00:00 2001 From: Paul Shippy Date: Mon, 16 Jun 2025 15:37:52 -0700 Subject: [PATCH 4/5] Only show speculative stage assistant messages --- .../sample-codes/console-ruby/nova_sonic.rb | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb b/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb index 476bf5f8..8a0a4025 100755 --- a/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb +++ b/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb @@ -14,6 +14,7 @@ def initialize @audio_service = AudioService.new(@session_id, @logger) @audio_handler = PortAudioHandler.new(@logger) @running = false + @current_stage = nil end def start @@ -32,8 +33,22 @@ def stop private def setup_event_handlers + @audio_service.on_event('contentStart') do |data| + additionalModelFields = data['additionalModelFields'] + additionalModelFields = JSON.parse(additionalModelFields) if additionalModelFields.is_a?(String) + + if additionalModelFields != nil + stage = additionalModelFields['generationStage'] + @current_stage = stage if stage + end + end + @audio_service.on_event('textOutput') do |data| - @logger.info "#{data['role']}: #{data['content']}" + if @current_stage == 'SPECULATIVE' && data['role'] == 'ASSISTANT' + @logger.info "šŸŽ¤ #{data['role']}: #{data['content']}" + elsif data['role'] == 'USER' + @logger.info "šŸ‘¤ #{data['role']}: #{data['content']}" + end end @audio_service.on_event('audioOutput') do |data| From 13c8d362426be676c79ad4e8903e71e075e4b584 Mon Sep 17 00:00:00 2001 From: Paul Shippy Date: Mon, 23 Jun 2025 15:37:01 -0700 Subject: [PATCH 5/5] Fix issue stopping --- speech-to-speech/sample-codes/console-ruby/nova_sonic.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb b/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb index 8a0a4025..0251b6ef 100755 --- a/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb +++ b/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb @@ -26,8 +26,8 @@ def start def stop @running = false - @audio_handler.stop @audio_service.close_session + @audio_handler.stop end private