From f4994ca0394f84051a0aa5ed63723f169074e648 Mon Sep 17 00:00:00 2001 From: Clay Sheaff Date: Sun, 22 Feb 2026 07:52:14 -0800 Subject: [PATCH] Fix socat dropping transcription results on longer recordings socat's -t flag (shutdown timeout after stdin EOF) defaults to 0.5s. When the transcription daemon takes longer than that to process dense speech, socat disconnects before receiving the result, causing a BrokenPipeError in the daemon and "no speech detected" for the user. Add -t 120 to all socat calls so it waits for the daemon's response. Also add daemon logging to $XDG_RUNTIME_DIR/talktype-whisper.log and retry logic in transcribe-server when the daemon dies mid-request. Co-Authored-By: Claude Opus 4.6 --- backends/moonshine-server | 2 +- backends/parakeet-server | 2 +- transcribe-server | 11 +++++++++-- whisper-daemon.py | 26 ++++++++++++++++++++++++-- 4 files changed, 35 insertions(+), 6 deletions(-) diff --git a/backends/moonshine-server b/backends/moonshine-server index 9bef3b6..80d1112 100755 --- a/backends/moonshine-server +++ b/backends/moonshine-server @@ -62,7 +62,7 @@ case "${1:-}" in if [ ! -S "$SOCK" ]; then "$0" start >&2 || exit 1 fi - echo "$2" | socat -T 30 - UNIX-CONNECT:"$SOCK" + echo "$2" | socat -t 120 -T 120 - UNIX-CONNECT:"$SOCK" ;; *) echo "Usage: moonshine-server {start|stop|transcribe }" >&2 diff --git a/backends/parakeet-server b/backends/parakeet-server index 2590b9f..ad44a59 100755 --- a/backends/parakeet-server +++ b/backends/parakeet-server @@ -61,7 +61,7 @@ case "${1:-}" in if [ ! -S "$SOCK" ]; then "$0" start >&2 || exit 1 fi - echo "$2" | socat -T 30 - UNIX-CONNECT:"$SOCK" + echo "$2" | socat -t 120 -T 120 - UNIX-CONNECT:"$SOCK" ;; *) echo "Usage: parakeet-server {start|stop|transcribe }" >&2 diff --git a/transcribe-server b/transcribe-server index 27e8ccd..537e36c 100755 --- a/transcribe-server +++ b/transcribe-server @@ -61,13 +61,20 @@ case "${1:-}" in ;; transcribe) # Ensure daemon is alive (not just a stale socket from a crash) - if [ -S "$SOCK" ] && [ -f "$PIDFILE" ] && ! kill -0 "$(cat "$PIDFILE")" 2>/dev/null; then + if [ -f "$PIDFILE" ] && ! kill -0 "$(cat "$PIDFILE")" 2>/dev/null; then rm -f "$PIDFILE" "$SOCK" fi if [ ! -S "$SOCK" ]; then "$0" start >&2 || exit 1 fi - echo "$2" | socat -T 30 - UNIX-CONNECT:"$SOCK" + TEXT=$(echo "$2" | socat -t 120 -T 120 - UNIX-CONNECT:"$SOCK" 2>/dev/null) || true + if [ -z "$TEXT" ] && [ -f "$PIDFILE" ] && ! kill -0 "$(cat "$PIDFILE")" 2>/dev/null; then + # Daemon died during transcription — restart and retry once + rm -f "$PIDFILE" "$SOCK" + "$0" start >&2 || exit 1 + TEXT=$(echo "$2" | socat -t 120 -T 120 - UNIX-CONNECT:"$SOCK" 2>/dev/null) || true + fi + printf '%s' "$TEXT" ;; *) echo "Usage: transcribe-server {start|stop|transcribe }" >&2 diff --git a/whisper-daemon.py b/whisper-daemon.py index 345a9b3..fc18c13 100644 --- a/whisper-daemon.py +++ b/whisper-daemon.py @@ -3,6 +3,8 @@ import sys import socket import signal +import logging +import time from faster_whisper import WhisperModel SOCK_PATH = sys.argv[1] @@ -11,10 +13,19 @@ DEVICE = sys.argv[4] COMPUTE = sys.argv[5] +LOG_PATH = os.path.join(os.environ.get("XDG_RUNTIME_DIR", "/tmp"), "talktype-whisper.log") +logging.basicConfig( + filename=LOG_PATH, level=logging.INFO, + format="%(asctime)s %(levelname)s %(message)s", +) +log = logging.getLogger("whisper-daemon") + # Load model once +log.info("Loading faster-whisper %s (device=%s, compute=%s)...", MODEL_NAME, DEVICE, COMPUTE) print(f"Loading faster-whisper {MODEL_NAME}...", flush=True) model = WhisperModel(MODEL_NAME, device=DEVICE, compute_type=COMPUTE) print("Model loaded.", flush=True) +log.info("Model loaded.") def transcribe(audio_path): @@ -23,6 +34,7 @@ def transcribe(audio_path): def cleanup(*_): + log.info("Shutting down (signal).") try: os.unlink(SOCK_PATH) except OSError: @@ -45,12 +57,22 @@ def cleanup(*_): try: audio_path = conn.recv(4096).decode().strip() if audio_path and os.path.isfile(audio_path): + file_size = os.path.getsize(audio_path) + log.info("Transcribing %s (%d bytes)...", audio_path, file_size) + t0 = time.monotonic() text = transcribe(audio_path) + elapsed = time.monotonic() - t0 + log.info("Done in %.1fs, %d chars: %s", elapsed, len(text), + text[:200] if text else "(empty)") conn.sendall(text.encode()) else: + log.warning("Bad path: %r", audio_path) conn.sendall(b"") except Exception as e: - print(f"Error: {e}", file=sys.stderr, flush=True) - conn.sendall(b"") + log.exception("Error during transcription") + try: + conn.sendall(b"") + except Exception: + pass finally: conn.close()