From 20a42bd683ad7cd180b49c4df8b9940632181581 Mon Sep 17 00:00:00 2001
From: Paul Shippy <paul.shippy@strongmind.com>
Date: Mon, 16 Jun 2025 14:59:40 -0700
Subject: [PATCH 1/5] Working example

---
 .../console-ruby/audio_service.rb             | 544 ++++++++++++++++++
 .../sample-codes/console-ruby/nova_sonic.rb   | 117 ++++
 .../sample-codes/console-ruby/portaudio.rb    | 251 ++++++++
 3 files changed, 912 insertions(+)
 create mode 100644 speech-to-speech/sample-codes/console-ruby/audio_service.rb
 create mode 100755 speech-to-speech/sample-codes/console-ruby/nova_sonic.rb
 create mode 100644 speech-to-speech/sample-codes/console-ruby/portaudio.rb

diff --git a/speech-to-speech/sample-codes/console-ruby/audio_service.rb b/speech-to-speech/sample-codes/console-ruby/audio_service.rb
new file mode 100644
index 00000000..cb5378f1
--- /dev/null
+++ b/speech-to-speech/sample-codes/console-ruby/audio_service.rb
@@ -0,0 +1,544 @@
+# frozen_string_literal: true
+
+require 'aws-sdk-bedrockruntime'
+require 'securerandom'
+require 'json'
+require 'base64'
+require 'concurrent'
+
+module NovaSonic
+  class AudioService
+    attr_reader :session_id, :logger
+
+    class << self
+      def active_sessions
+        @active_sessions ||= {}
+      end
+
+      def session_cleanup_in_progress
+        @session_cleanup_in_progress ||= Set.new
+      end
+
+      def cleanup_in_progress?(session_id)
+        session_cleanup_in_progress.include?(session_id)
+      end
+
+      def session_creation_mutexes
+        @session_creation_mutexes ||= {}
+      end
+    end
+
+    def initialize(session_id, logger = Rails.logger)
+      @session_id = session_id
+      @logger = logger
+
+      @inference_config = {
+        maxTokens: 10_000,
+        topP: 0.95,
+        temperature: 0.9
+      }
+
+      initialize_aws_client
+    end
+
+    def setup_session
+      create_stream_session
+      initiate_session
+      @logger.info "Session setup completed successfully for #{session_id}"
+      self
+    rescue StandardError => e
+      @logger.error "Error in setup_session: #{e.message}"
+      @logger.error "Backtrace: #{e.backtrace.first(3).join('; ')}"
+      raise e
+    end
+
+    def stop_audio_streaming
+      session_data = self.class.active_sessions[session_id]
+      return unless session_data
+
+      begin
+        send_content_end if session_data[:input_stream]
+
+        session_data[:is_audio_content_start_sent] = false
+        session_data[:audio_content_id] = SecureRandom.uuid
+
+        @logger.info "Audio streaming stopped for session #{session_id}, session remains active"
+      rescue StandardError => e
+        @logger.error "Error stopping audio streaming: #{e.message}"
+      end
+    end
+
+    def close_session
+      session_data = self.class.active_sessions[session_id]
+      return unless session_data
+
+      self.class.session_cleanup_in_progress << session_id
+
+      begin
+        send_content_end if session_data[:input_stream]
+        send_prompt_end if session_data[:input_stream]
+        send_session_end if session_data[:input_stream]
+      rescue StandardError => e
+        @logger.error "Error closing AWS streams: #{e.message}"
+      end
+
+      self.class.active_sessions.delete(session_id)
+      self.class.session_creation_mutexes.delete(session_id)
+      self.class.session_cleanup_in_progress.delete(session_id)
+
+      @logger.info "Session #{session_id} closed and cleaned up"
+    end
+
+    def stream_audio(audio_data)
+      decoded_audio = Base64.decode64(audio_data)
+      stream_audio_data(decoded_audio)
+    end
+
+    def stream_audio_data(audio_data)
+      return if audio_data.nil? || audio_data.empty?
+
+      session_data = self.class.active_sessions[session_id]
+      unless session_data&.[](:is_active)
+        @logger.error "Session #{session_id} not found or inactive"
+        return
+      end
+
+      @logger.debug "Streaming audio data: #{audio_data.length} bytes"
+
+      if !session_data[:is_audio_content_start_sent] && session_data[:input_stream].present?
+        @logger.debug "Sending audio content start"
+        send_audio_content_start
+        session_data[:is_audio_content_start_sent] = true
+      end
+
+      stream_audio_chunk(audio_data)
+    end
+
+    def setup_system_prompt(content)
+      return false if content.blank?
+
+      @custom_system_prompt = content
+      @logger.info "Custom system prompt stored for session #{session_id}"
+      true
+    end
+
+    def on_event(event_type, &handler)
+      @event_handlers ||= {}
+      @event_handlers[event_type] = handler
+      self
+    end
+
+    private
+
+    def initialize_aws_client
+      client_options = {
+        region: 'us-east-1',
+        http_wire_trace: false,
+        enable_alpn: true
+      }
+
+      @bedrock_runtime_client = Aws::BedrockRuntime::AsyncClient.new(client_options)
+    end
+
+    def create_stream_session
+      session_mutex = self.class.session_creation_mutexes[session_id] ||= Mutex.new
+
+      session_mutex.synchronize do
+        return if self.class.active_sessions[session_id]
+
+        session_data = {
+          queue_mutex: Mutex.new,
+          prompt_name: SecureRandom.uuid,
+          inference_config: @inference_config,
+          is_active: true,
+          is_prompt_start_sent: false,
+          is_audio_content_start_sent: false,
+          audio_content_id: SecureRandom.uuid
+        }
+
+        self.class.active_sessions[session_id] = session_data
+      end
+    end
+
+    def initiate_session
+      session_data = self.class.active_sessions[session_id]
+      unless session_data
+        @logger.error "Stream session #{session_id} not found in active_sessions"
+        raise "Stream session #{session_id} not found"
+      end
+
+      session_data[:queue_mutex].synchronize do
+        return if session_data[:initializing]
+        return if session_data[:is_active] && session_data[:input_stream]
+
+        session_data[:initializing] = true
+      end
+
+      begin
+        initialize_session_core(session_data)
+        @logger.info "Session #{session_id} initialization completed successfully"
+      rescue StandardError => e
+        @logger.error "Session #{session_id} initialization failed: #{e.class} - #{e.message}"
+        handle_initialization_error(e)
+
+        raise e
+      ensure
+        session_data[:initializing] = false
+      end
+    end
+
+    def initialize_session_core(session_data)
+      input_stream = Aws::BedrockRuntime::EventStreams::InvokeModelWithBidirectionalStreamInput.new
+      output_stream = Aws::BedrockRuntime::EventStreams::InvokeModelWithBidirectionalStreamOutput.new
+
+      setup_output_stream_handlers(output_stream)
+
+      prompt_id = SecureRandom.uuid
+      text_content_id = SecureRandom.uuid
+      audio_content_id = SecureRandom.uuid
+
+      session_data[:prompt_name] = prompt_id
+      session_data[:audio_content_id] = audio_content_id
+
+      async_resp = @bedrock_runtime_client.invoke_model_with_bidirectional_stream(
+        model_id: 'amazon.nova-sonic-v1:0',
+        input_event_stream_handler: input_stream,
+        output_event_stream_handler: output_stream
+      )
+
+      session_data[:input_stream] = input_stream
+      session_data[:async_resp] = async_resp
+
+      send_initial_events(input_stream, prompt_id, text_content_id, session_data[:audio_content_id])
+
+      session_data[:is_active] = true
+      session_data[:is_prompt_start_sent] = true
+      session_data[:is_audio_content_start_sent] = true
+    end
+
+    def setup_output_stream_handlers(output_stream)
+      output_stream.on_event do |event|
+        handle_aws_event(event)
+      rescue StandardError => e
+        @logger.error "Error handling AWS event: #{e.message}"
+      end
+    end
+
+    def handle_aws_event(event)
+      if event.is_a?(Hash) && event[:event_type] == :chunk && event[:bytes]
+        begin
+          response_data = JSON.parse(event[:bytes])
+
+          if response_data['event']
+            event_type = response_data['event'].keys.first
+            event_data = response_data['event'][event_type]
+
+            dispatch_event(event_type, event_data)
+          end
+        rescue JSON::ParserError => e
+          @logger.error "Failed to parse JSON response: #{e.message}"
+        end
+      elsif event.respond_to?(:event_type) && event.event_type == :chunk && event.respond_to?(:bytes)
+        begin
+          response_data = JSON.parse(event.bytes)
+
+          if response_data['event']
+            event_type = response_data['event'].keys.first
+            event_data = response_data['event'][event_type]
+
+            dispatch_event(event_type, event_data)
+          end
+        rescue JSON::ParserError => e
+          @logger.error "Failed to parse JSON response: #{e.message}"
+        end
+      end
+    end
+
+    def dispatch_event(event_type, data)
+      @logger.debug "Dispatching event: #{event_type}"
+      @event_handlers[event_type]&.call(data)
+
+      return unless @event_handlers&.[]('any')
+
+      @event_handlers['any'].call({ type: event_type, data: })
+    end
+
+    def send_initial_events(input_stream, prompt_id, text_content_id, audio_content_id)
+      events = build_initial_events(prompt_id, text_content_id, audio_content_id)
+      send_events_to_stream(input_stream, events)
+    end
+
+    def build_initial_events(prompt_id, text_content_id, audio_content_id)
+      system_prompt = @custom_system_prompt || default_system_prompt
+
+      [
+        build_session_start_event,
+        build_prompt_start_event(prompt_id),
+        build_text_content_start_event(prompt_id, text_content_id),
+        build_system_prompt_event(prompt_id, text_content_id, system_prompt),
+        build_text_content_end_event(prompt_id, text_content_id),
+        build_audio_content_start_event(prompt_id, audio_content_id)
+      ]
+    end
+
+    def send_events_to_stream(input_stream, events)
+      events.each_with_index do |event, index|
+        input_stream.signal_chunk_event(bytes: event)
+      rescue StandardError => e
+        @logger.error "Error sending initial event #{index + 1}: #{e.message}"
+        raise "Failed to send initial event: #{e.message}"
+      end
+    end
+
+    def build_session_start_event
+      {
+        event: {
+          sessionStart: {
+            inferenceConfiguration: @inference_config
+          }
+        }
+      }.to_json
+    end
+
+    def build_prompt_start_event(prompt_id)
+      {
+        event: {
+          promptStart: {
+            promptName: prompt_id,
+            textOutputConfiguration: {
+              mediaType: 'text/plain'
+            },
+            audioOutputConfiguration: {
+              mediaType: 'audio/lpcm',
+              sampleRateHertz: 16_000,
+              sampleSizeBits: 16,
+              channelCount: 1,
+              voiceId: 'en_us_tiffany',
+              encoding: 'base64',
+              audioType: 'SPEECH'
+            },
+            toolUseOutputConfiguration: {
+              mediaType: 'application/json'
+            },
+            toolConfiguration: {
+              tools: []
+            }
+          }
+        }
+      }.to_json
+    end
+
+    def build_text_content_start_event(prompt_id, text_content_id)
+      {
+        event: {
+          contentStart: {
+            promptName: prompt_id,
+            contentName: text_content_id,
+            type: 'TEXT',
+            interactive: true,
+            textInputConfiguration: {
+              mediaType: 'text/plain'
+            }
+          }
+        }
+      }.to_json
+    end
+
+    def build_system_prompt_event(prompt_id, text_content_id, system_prompt)
+      {
+        event: {
+          textInput: {
+            promptName: prompt_id,
+            contentName: text_content_id,
+            content: system_prompt,
+            role: 'SYSTEM'
+          }
+        }
+      }.to_json
+    end
+
+    def build_text_content_end_event(prompt_id, text_content_id)
+      {
+        event: {
+          contentEnd: {
+            promptName: prompt_id,
+            contentName: text_content_id
+          }
+        }
+      }.to_json
+    end
+
+    def build_audio_content_start_event(prompt_id, audio_content_id)
+      {
+        event: {
+          contentStart: {
+            promptName: prompt_id,
+            contentName: audio_content_id,
+            type: 'AUDIO',
+            role: 'USER',
+            interactive: true,
+            audioInputConfiguration: {
+              mediaType: 'audio/lpcm',
+              sampleRateHertz: 16_000,
+              sampleSizeBits: 16,
+              channelCount: 1,
+              audioType: 'SPEECH',
+              encoding: 'base64'
+            }
+          }
+        }
+      }.to_json
+    end
+
+    def stream_audio_chunk(audio_data)
+      session_data = self.class.active_sessions[session_id]
+
+      unless session_data&.[](:is_active)
+        @logger.error "Session #{session_id} not found or inactive"
+        @logger.error "Available sessions: #{self.class.active_sessions.keys}"
+        raise "Session not active or not found: #{session_id}"
+      end
+
+      unless session_data[:input_stream]
+        @logger.error "No input stream available for session #{session_id}"
+        raise "No input stream available for session: #{session_id}"
+      end
+
+      if audio_data.nil? || audio_data.empty?
+        @logger.error "Empty audio data received for session #{session_id}"
+        raise 'Empty audio data received'
+      end
+
+      base64_data = Base64.strict_encode64(audio_data)
+
+      audio_event = {
+        event: {
+          audioInput: {
+            promptName: session_data[:prompt_name],
+            contentName: session_data[:audio_content_id],
+            content: base64_data,
+            role: 'USER'
+          }
+        }
+      }.to_json
+
+      begin
+        session_data[:input_stream].signal_chunk_event(bytes: audio_event)
+      rescue StandardError => e
+        @logger.error "Failed to send audio chunk: #{e.message}"
+        session_data[:is_active] = false
+        session_data[:input_stream] = nil
+        raise e
+      end
+    end
+
+    def default_system_prompt
+      'You are a friendly assistant. The user and you will engage in a spoken dialog ' \
+        'exchanging the transcripts of a natural real-time conversation.' \
+        'Keep your responses short, generally two or three sentences for chatty scenarios.'
+    end
+
+    def handle_initialization_error(error)
+      @logger.error "Initialization error for #{session_id}: #{error.message}"
+
+      session_data = self.class.active_sessions[session_id]
+      session_data[:is_active] = false if session_data
+
+      begin
+        dispatch_event('error', {
+                         source: 'initialization',
+                         error: error.message
+                       })
+      rescue StandardError => e
+        @logger.error "Error dispatching initialization error event: #{e.message}"
+      end
+    end
+
+    def send_audio_content_start
+      session_data = self.class.active_sessions[session_id]
+      return unless session_data&.[](:input_stream)
+
+      event = {
+        event: {
+          contentStart: {
+            promptName: session_data[:prompt_name],
+            contentName: session_data[:audio_content_id],
+            type: 'AUDIO',
+            interactive: true,
+            audioInputConfiguration: {
+              mediaType: 'audio/lpcm',
+              sampleRateHertz: 16_000,
+              sampleSizeBits: 16,
+              channelCount: 1,
+              audioType: 'SPEECH',
+              encoding: 'base64'
+            }
+          }
+        }
+      }.to_json
+
+      begin
+        session_data[:input_stream].signal_chunk_event(bytes: event)
+      rescue StandardError => e
+        @logger.error "Error sending audio content start: #{e.message}"
+      end
+    end
+
+    def send_content_end
+      session_data = self.class.active_sessions[session_id]
+      return unless session_data&.[](:input_stream)
+
+      event = {
+        event: {
+          contentEnd: {
+            promptName: session_data[:prompt_name],
+            contentName: session_data[:audio_content_id]
+          }
+        }
+      }.to_json
+
+      begin
+        session_data[:input_stream].signal_chunk_event(bytes: event)
+      rescue StandardError => e
+        @logger.error "Error sending content end: #{e.message}"
+      end
+    end
+
+    def send_prompt_end
+      session_data = self.class.active_sessions[session_id]
+      return unless session_data&.[](:input_stream)
+
+      event = {
+        event: {
+          promptEnd: {
+            promptName: session_data[:prompt_name]
+          }
+        }
+      }.to_json
+
+      begin
+        session_data[:input_stream].signal_chunk_event(bytes: event)
+      rescue StandardError => e
+        @logger.error "Error sending prompt end: #{e.message}"
+      end
+    end
+
+    def send_session_end
+      session_data = self.class.active_sessions[session_id]
+      return unless session_data&.[](:input_stream)
+
+      event = {
+        event: {
+          sessionEnd: {}
+        }
+      }.to_json
+
+      begin
+        session_data[:input_stream].signal_chunk_event(bytes: event)
+        session_data[:input_stream].signal_end_stream
+      rescue StandardError => e
+        @logger.error "Error sending session end: #{e.message}"
+      end
+    end
+  end
+end
diff --git a/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb b/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb
new file mode 100755
index 00000000..e139a48d
--- /dev/null
+++ b/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb
@@ -0,0 +1,117 @@
+#!/usr/bin/env ruby
+require_relative 'audio_service'
+require_relative 'portaudio'
+require 'base64'
+require 'securerandom'
+require 'logger'
+
+module NovaSonic
+  class SimpleExample
+    def initialize
+      @session_id = SecureRandom.uuid
+      @logger = Logger.new($stdout)
+      @logger.level = Logger::INFO
+      @audio_service = AudioService.new(@session_id, @logger)
+      @audio_handler = PortAudioHandler.new(@logger)
+      @running = false
+    end
+
+    def start
+      setup_event_handlers
+      setup_audio_handlers
+      setup_session
+      start_audio_streams
+    end
+
+    def stop
+      @running = false
+      @audio_handler.stop
+      @audio_service.close_session
+    end
+
+    private
+
+    def setup_event_handlers
+      @audio_service.on_event('textOutput') do |data|
+        @logger.info data.inspect
+      end
+
+      @audio_service.on_event('audioOutput') do |data|
+        audio_data = Base64.decode64(data['content'])
+        @audio_handler.queue_audio_output(audio_data)
+      end
+
+      @audio_service.on_event('error') do |data|
+        @logger.error "❌ Error: #{data['error']}"
+      end
+    end
+
+    def setup_audio_handlers
+      @audio_handler.on_audio_input do |raw_pcm|
+        unless @audio_handler.echo_cancellation_active?
+          @audio_service.stream_audio(Base64.strict_encode64(raw_pcm))
+        end
+      end
+    end
+
+    def setup_session
+      @audio_service.setup_session
+    end
+
+    def start_audio_streams
+      @running = true
+      @audio_handler.start
+      
+      @logger.info "🎤 Audio streaming started"
+      @logger.info "🔊 Simple time-based echo cancellation"
+    end
+  end
+end
+
+# Example usage:
+if __FILE__ == $PROGRAM_NAME
+  example = NovaSonic::SimpleExample.new
+  shutdown_requested = false
+  
+  Signal.trap("INT") do
+    puts "\n🛑 IMMEDIATE SHUTDOWN REQUESTED"
+    shutdown_requested = true
+    # Give it 1 second to cleanup, then force exit
+    Thread.new do
+      sleep 1
+      puts "🚨 FORCING EXIT NOW!"
+      exit!(0)
+    end
+  end
+  
+  Signal.trap("TERM") do
+    puts "\n🛑 TERMINATE SIGNAL - IMMEDIATE EXIT"
+    exit!(0)
+  end
+  
+  begin
+    puts "🚀 Starting Nova Sonic (Simple Echo Cancellation)..."
+    puts "🎤 Should work for multiple conversations!"
+    puts "🔊 1-second echo cancellation after AI speech"
+    puts "Press Ctrl+C to stop"
+    
+    example.start
+    
+    until shutdown_requested
+      sleep 0.05  # Faster response
+    end
+    
+  rescue StandardError => e
+    puts "❌ Error: #{e.message}"
+  ensure
+    puts "🛑 Attempting quick cleanup..."
+    begin
+      # Try cleanup but don't wait long
+      example.stop
+      sleep 0.2  # Give it 200ms max
+    rescue
+      # Ignore cleanup errors
+    end
+    puts "👋 Goodbye!"
+  end
+end 
\ No newline at end of file
diff --git a/speech-to-speech/sample-codes/console-ruby/portaudio.rb b/speech-to-speech/sample-codes/console-ruby/portaudio.rb
new file mode 100644
index 00000000..8a00c357
--- /dev/null
+++ b/speech-to-speech/sample-codes/console-ruby/portaudio.rb
@@ -0,0 +1,251 @@
+#!/usr/bin/env ruby
+require 'ffi-portaudio'
+require 'base64'
+
+class PortAudioHandler
+  include FFI::PortAudio
+  
+  INPUT_SAMPLE_RATE = 16000
+  OUTPUT_SAMPLE_RATE = 16000
+  CHANNELS = 1
+  FRAMES_PER_BUFFER = 2048
+  
+  def initialize(logger)
+    @logger = logger
+    @running = false
+    @input_stream = nil
+    @output_stream = nil
+    @input_callback = nil
+    
+    # Much simpler buffering
+    @sample_buffer = []
+    @buffer_mutex = Mutex.new
+    @playing_audio = false
+    @last_output_time = Time.now
+    
+    initialize_portaudio
+  end
+
+  def on_audio_input(&block)
+    @input_callback = block
+  end
+
+  def queue_audio_output(audio_data)
+    @buffer_mutex.synchronize do
+      # Convert raw PCM to samples and add to buffer
+      new_samples = audio_data.unpack('s*')
+      @sample_buffer.concat(new_samples)
+      @playing_audio = true
+      @last_output_time = Time.now
+      
+      @logger.debug "Queued #{new_samples.length} samples, buffer now #{@sample_buffer.length} samples"
+    end
+  end
+
+  def start
+    return if @running
+    
+    @running = true
+    start_input_stream
+    start_output_stream
+    @logger.info "🎵 Fixed PortAudio streams started"
+  end
+
+  def stop
+    return unless @running
+    
+    @running = false
+    
+    # Force exit in background if this takes too long
+    timeout_thread = Thread.new do
+      sleep 0.5
+      puts "🚨 AUDIO SHUTDOWN TIMEOUT - FORCING EXIT!"
+      exit!(0)
+    end
+    
+    begin
+      stop_streams
+      cleanup_portaudio
+      @logger.info "🔇 PortAudio streams stopped"
+      
+      # Cancel timeout
+      timeout_thread.kill
+    rescue
+      # If cleanup fails, just exit
+      puts "⚠️ Audio cleanup failed, forcing exit"
+      exit!(0)
+    end
+  end
+
+  def echo_cancellation_active?
+    # Simple time-based approach: block input for 1 second after last audio output
+    @playing_audio && (Time.now - @last_output_time) < 1.0
+  end
+
+  private
+
+  def initialize_portaudio
+    result = API.Pa_Initialize
+    if result == :paNoError
+      @logger.info "🎤 PortAudio initialized"
+    else
+      @logger.error "❌ Failed to initialize PortAudio: #{result}"
+    end
+  end
+
+  def start_input_stream
+    @input_stream = InputStreamHandler.new(@logger, @input_callback, self)
+    
+    input_device = API.Pa_GetDefaultInputDevice
+    if input_device < 0
+      @logger.error "❌ No default input device found"
+      return
+    end
+
+    input_params = API::PaStreamParameters.new
+    input_params[:device] = input_device
+    input_params[:channelCount] = CHANNELS
+    input_params[:sampleFormat] = API::Int16
+    input_params[:suggestedLatency] = 0.05
+    input_params[:hostApiSpecificStreamInfo] = nil
+
+    @input_stream.open(input_params, nil, INPUT_SAMPLE_RATE, FRAMES_PER_BUFFER)
+    @input_stream.start
+    @logger.info "🎤 Input stream started"
+  end
+
+  def start_output_stream
+    @output_stream = OutputStreamHandler.new(@logger, @sample_buffer, @buffer_mutex, self)
+    
+    output_device = API.Pa_GetDefaultOutputDevice
+    if output_device < 0
+      @logger.error "❌ No default output device found"
+      return
+    end
+
+    output_params = API::PaStreamParameters.new
+    output_params[:device] = output_device
+    output_params[:channelCount] = CHANNELS
+    output_params[:sampleFormat] = API::Int16
+    output_params[:suggestedLatency] = 0.1
+    output_params[:hostApiSpecificStreamInfo] = nil
+
+    @output_stream.open(nil, output_params, OUTPUT_SAMPLE_RATE, FRAMES_PER_BUFFER)
+    @output_stream.start
+    @logger.info "🔊 Output stream started"
+  end
+
+  def stop_streams
+    @running = false
+    
+    begin
+      if @input_stream
+        @input_stream.stop rescue nil
+        @input_stream.close rescue nil
+        @input_stream = nil
+      end
+    rescue => e
+      @logger.error "Error stopping input stream: #{e.message}"
+    end
+
+    begin
+      if @output_stream
+        @output_stream.stop rescue nil
+        @output_stream.close rescue nil
+        @output_stream = nil
+      end
+    rescue => e
+      @logger.error "Error stopping output stream: #{e.message}"
+    end
+  end
+
+  def cleanup_portaudio
+    API.Pa_Terminate
+  end
+
+  def mark_output_finished
+    @buffer_mutex.synchronize do
+      if @sample_buffer.empty?
+        @playing_audio = false
+        @logger.debug "Output finished - microphone re-enabled"
+      end
+    end
+  end
+
+  # Input stream handler class
+  class InputStreamHandler < FFI::PortAudio::Stream
+    def initialize(logger, callback, parent_handler)
+      @logger = logger
+      @callback = callback
+      @parent_handler = parent_handler
+      @input_count = 0
+    end
+
+    def process(input, output, frameCount, timeInfo, statusFlags, userData)
+      @input_count += 1
+      
+      # Log periodically
+      if @input_count % 1000 == 0
+        echo_active = @parent_handler.echo_cancellation_active?
+        @logger.debug "Input callback ##{@input_count}, echo_cancellation=#{echo_active}"
+      end
+      
+      unless @parent_handler.echo_cancellation_active?
+        if input && frameCount > 0
+          audio_data = input.read_array_of_int16(frameCount * CHANNELS)
+          raw_pcm = audio_data.pack('s*')
+          @callback.call(raw_pcm) if @callback
+        end
+      end
+      
+      :paContinue
+    end
+  end
+
+  # Fixed output stream handler class
+  class OutputStreamHandler < FFI::PortAudio::Stream
+    def initialize(logger, sample_buffer, mutex, parent_handler)
+      @logger = logger
+      @sample_buffer = sample_buffer
+      @mutex = mutex
+      @parent_handler = parent_handler
+      @output_count = 0
+    end
+
+    def process(input, output, frameCount, timeInfo, statusFlags, userData)
+      @output_count += 1
+      required_samples = frameCount * CHANNELS
+      
+      @mutex.synchronize do
+        if @sample_buffer.length >= required_samples
+          # We have enough samples
+          samples = @sample_buffer.shift(required_samples)
+          output.write_array_of_int16(samples)
+          
+          if @output_count % 100 == 0
+            @logger.debug "Output ##{@output_count}: played #{required_samples} samples, #{@sample_buffer.length} remaining"
+          end
+          
+        elsif @sample_buffer.length > 0
+          # Use what we have and pad with silence
+          available_samples = @sample_buffer.shift(@sample_buffer.length)
+          padding_needed = required_samples - available_samples.length
+          complete_samples = available_samples + [0] * padding_needed
+          output.write_array_of_int16(complete_samples)
+          
+          @logger.debug "Output ##{@output_count}: used #{available_samples.length} samples, padded #{padding_needed}"
+          
+        else
+          # Output silence
+          silence = [0] * required_samples
+          output.write_array_of_int16(silence)
+          
+          # Mark output as finished when buffer is empty
+          @parent_handler.mark_output_finished
+        end
+      end
+      
+      :paContinue
+    end
+  end
+end 
\ No newline at end of file

From 410c31ff2c137600d2bc5daebc2ecdd969816849 Mon Sep 17 00:00:00 2001
From: Paul Shippy <paul.shippy@strongmind.com>
Date: Mon, 16 Jun 2025 15:00:26 -0700
Subject: [PATCH 2/5] Dependencies

---
 .../sample-codes/console-ruby/Gemfile         |  2 ++
 .../sample-codes/console-ruby/Gemfile.lock    | 32 +++++++++++++++++++
 2 files changed, 34 insertions(+)
 create mode 100644 speech-to-speech/sample-codes/console-ruby/Gemfile
 create mode 100644 speech-to-speech/sample-codes/console-ruby/Gemfile.lock

diff --git a/speech-to-speech/sample-codes/console-ruby/Gemfile b/speech-to-speech/sample-codes/console-ruby/Gemfile
new file mode 100644
index 00000000..6cdf9c6a
--- /dev/null
+++ b/speech-to-speech/sample-codes/console-ruby/Gemfile
@@ -0,0 +1,2 @@
+gem 'aws-sdk-bedrockruntime'
+gem 'ffi-portaudio'
\ No newline at end of file
diff --git a/speech-to-speech/sample-codes/console-ruby/Gemfile.lock b/speech-to-speech/sample-codes/console-ruby/Gemfile.lock
new file mode 100644
index 00000000..398fb223
--- /dev/null
+++ b/speech-to-speech/sample-codes/console-ruby/Gemfile.lock
@@ -0,0 +1,32 @@
+GEM
+  specs:
+    aws-eventstream (1.4.0)
+    aws-partitions (1.1114.0)
+    aws-sdk-bedrockruntime (1.49.0)
+      aws-sdk-core (~> 3, >= 3.225.0)
+      aws-sigv4 (~> 1.5)
+    aws-sdk-core (3.225.1)
+      aws-eventstream (~> 1, >= 1.3.0)
+      aws-partitions (~> 1, >= 1.992.0)
+      aws-sigv4 (~> 1.9)
+      base64
+      jmespath (~> 1, >= 1.6.1)
+      logger
+    aws-sigv4 (1.12.0)
+      aws-eventstream (~> 1, >= 1.0.2)
+    base64 (0.3.0)
+    ffi (1.17.2-arm64-darwin)
+    ffi-portaudio (0.1.3)
+      ffi
+    jmespath (1.6.2)
+    logger (1.7.0)
+
+PLATFORMS
+  arm64-darwin-24
+
+DEPENDENCIES
+  aws-sdk-bedrockruntime
+  ffi-portaudio
+
+BUNDLED WITH
+   2.6.9

From 4efe7bf6a86f034a1c217ca69252a7653d22ee40 Mon Sep 17 00:00:00 2001
From: Paul Shippy <paul.shippy@strongmind.com>
Date: Mon, 16 Jun 2025 15:06:36 -0700
Subject: [PATCH 3/5] Show role

---
 speech-to-speech/sample-codes/console-ruby/nova_sonic.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb b/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb
index e139a48d..476bf5f8 100755
--- a/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb
+++ b/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb
@@ -33,7 +33,7 @@ def stop
 
     def setup_event_handlers
       @audio_service.on_event('textOutput') do |data|
-        @logger.info data.inspect
+        @logger.info "#{data['role']}: #{data['content']}"
       end
 
       @audio_service.on_event('audioOutput') do |data|

From a82a9d11d96f5275ee3f85d4bd535b0047d5994b Mon Sep 17 00:00:00 2001
From: Paul Shippy <paul.shippy@strongmind.com>
Date: Mon, 16 Jun 2025 15:37:52 -0700
Subject: [PATCH 4/5] Only show speculative stage assistant messages

---
 .../sample-codes/console-ruby/nova_sonic.rb     | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb b/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb
index 476bf5f8..8a0a4025 100755
--- a/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb
+++ b/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb
@@ -14,6 +14,7 @@ def initialize
       @audio_service = AudioService.new(@session_id, @logger)
       @audio_handler = PortAudioHandler.new(@logger)
       @running = false
+      @current_stage = nil
     end
 
     def start
@@ -32,8 +33,22 @@ def stop
     private
 
     def setup_event_handlers
+      @audio_service.on_event('contentStart') do |data|
+        additionalModelFields = data['additionalModelFields']
+        additionalModelFields = JSON.parse(additionalModelFields) if additionalModelFields.is_a?(String)
+
+        if additionalModelFields != nil
+          stage = additionalModelFields['generationStage']
+          @current_stage = stage if stage
+        end
+      end
+
       @audio_service.on_event('textOutput') do |data|
-        @logger.info "#{data['role']}: #{data['content']}"
+        if @current_stage == 'SPECULATIVE' && data['role'] == 'ASSISTANT'
+          @logger.info "🎤 #{data['role']}: #{data['content']}"
+        elsif data['role'] == 'USER'
+          @logger.info "👤 #{data['role']}: #{data['content']}"
+        end
       end
 
       @audio_service.on_event('audioOutput') do |data|

From 13c8d362426be676c79ad4e8903e71e075e4b584 Mon Sep 17 00:00:00 2001
From: Paul Shippy <paul.shippy@strongmind.com>
Date: Mon, 23 Jun 2025 15:37:01 -0700
Subject: [PATCH 5/5] Fix issue stopping

---
 speech-to-speech/sample-codes/console-ruby/nova_sonic.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb b/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb
index 8a0a4025..0251b6ef 100755
--- a/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb
+++ b/speech-to-speech/sample-codes/console-ruby/nova_sonic.rb
@@ -26,8 +26,8 @@ def start
 
     def stop
       @running = false
-      @audio_handler.stop
       @audio_service.close_session
+      @audio_handler.stop
     end
 
     private