SimpleOpenSoftware · AnkushMalaker · Feb 27, 2026 · Feb 24, 2026 · Feb 24, 2026 · Feb 24, 2026
diff --git a/.gitignore b/.gitignore
@@ -83,6 +83,7 @@ extras/speaker-recognition/Caddyfile
 # Cache
 extras/speaker-recognition/cache/*
 extras/speaker-recognition/outputs/*
+**/model_cache_strix_test/*
 
 # my backup
 backends/advanced/src/_webui_original/*

diff --git a/backends/advanced/uv.lock b/backends/advanced/uv.lock
diff --git a/extras/asr-services/docker-compose-test-strixhalo.yml b/extras/asr-services/docker-compose-test-strixhalo.yml
@@ -0,0 +1,37 @@
+# docker-compose-test-strixhalo.yml
+# Strix Halo (ROCm) test environment for ASR services
+
+services:
+  parakeet-asr-strixhalo-test:
+    build:
+      context: .
+      dockerfile: providers/nemo/Dockerfile.strixhalo
+    image: parakeet-asr-strixhalo:test
+    ports:
+      - "8768:8765"
+    volumes:
+      - ./model_cache_strix_test:/models
+      - ./debug_test:/app/debug
+      - ./results_test:/app/results
+    user: "0:0"
+    devices:
+      - /dev/kfd:/dev/kfd
+      - /dev/dri:/dev/dri
+    group_add:
+      - "${VIDEO_GID:-985}"
+      - "${RENDER_GID:-989}"
+    environment:
+      - HF_HOME=/models
+      - ASR_MODEL=nvidia/parakeet-tdt-0.6b-v3
+      - HSA_OVERRIDE_GFX_VERSION=11.5.1
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8765/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 10
+      start_period: 120s
+
+networks:
+  default:
+    name: asr-test-strix-network
diff --git a/extras/asr-services/docker-compose.yml b/extras/asr-services/docker-compose.yml
@@ -232,6 +232,88 @@ services:
       - qwen3-asr
     restart: unless-stopped
 
+  # ============================================================================
+  # VibeVoice Provider for AMD Strix Halo (gfx1151 / Ryzen AI Max)
+  # Uses ROCm nightly torch wheels; requires /dev/kfd and /dev/dri
+  # ============================================================================
+  vibevoice-asr-strixhalo:
+    build:
+      context: .
+      dockerfile: providers/vibevoice/Dockerfile.strixhalo
+    image: chronicle-asr-vibevoice-strixhalo:latest
+    ports:
+      - "${ASR_PORT:-8767}:8765"
+    volumes:
+      - ./model_cache:/models
+      - ./debug:/app/debug
+      - ./results:/app/results
+      - ../../config:/app/config:ro
+      - ./lora_adapters:/models/lora_adapters
+    devices:
+      - /dev/kfd:/dev/kfd
+      - /dev/dri:/dev/dri
+    group_add:
+      # Use numeric GIDs - group names (video/render) don't exist in slim Python image.
+      # Find your system's GIDs with: getent group video render
+      - "${VIDEO_GID:-985}"   # video group (Arch default; Ubuntu typically 44)
+      - "${RENDER_GID:-989}"  # render group (Arch default; Ubuntu typically 107)
+    environment:
+      - HF_HOME=/models
+      - ASR_MODEL=${ASR_MODEL:-microsoft/VibeVoice-ASR}
+      - VIBEVOICE_LLM_MODEL=${VIBEVOICE_LLM_MODEL:-Qwen/Qwen2.5-7B}
+      # sdpa is supported on ROCm; flash_attention_2 requires CUDA
+      - VIBEVOICE_ATTN_IMPL=sdpa
+      - DEVICE=${VIBEVOICE_DEVICE:-cuda}
+      - TORCH_DTYPE=${VIBEVOICE_TORCH_DTYPE:-bfloat16}
+      - MAX_NEW_TOKENS=${MAX_NEW_TOKENS:-8192}
+      # bitsandbytes quantization requires CUDA - disable for ROCm
+      # Strix Halo has lot of unified memory, quantization is not needed
+      - QUANTIZATION=
+      - LORA_ADAPTER_PATH=${LORA_ADAPTER_PATH:-}
+      - HSA_OVERRIDE_GFX_VERSION=11.5.1
+    dns:
+      - 8.8.8.8
+      - 8.8.4.4
+    restart: unless-stopped
+
+  # ============================================================================
+  # NeMo Provider for AMD Strix Halo (gfx1151 / Ryzen AI Max)
+  # Uses ROCm nightly torch wheels; requires /dev/kfd and /dev/dri
+  # NeMo without cuda-python (CUDA-only binding); ROCm support is experimental
+  # ============================================================================
+  nemo-asr-strixhalo:
+    build:
+      context: .
+      dockerfile: providers/nemo/Dockerfile.strixhalo
+    image: chronicle-asr-nemo-strixhalo:latest
+    ports:
+      - "${ASR_PORT:-8767}:8765"
+    volumes:
+      - ./model_cache:/models
+      - ./debug:/app/debug
+      - ./results:/app/results
+    devices:
+      - /dev/kfd:/dev/kfd
+      - /dev/dri:/dev/dri
+    group_add:
+      # Use numeric GIDs - group names (video/render) don't exist in slim Python image.
+      # Find your system's GIDs with: getent group video render
+      - "${VIDEO_GID:-985}"   # video group (Arch default; Ubuntu typically 44)
+      - "${RENDER_GID:-989}"  # render group (Arch default; Ubuntu typically 107)
+    environment:
+      - HF_HOME=/models
+      - ASR_MODEL=${ASR_MODEL:-nvidia/parakeet-tdt-0.6b-v3}
+      # Legacy support
+      - PARAKEET_MODEL=${PARAKEET_MODEL:-${ASR_MODEL:-nvidia/parakeet-tdt-0.6b-v3}}
+      # Enhanced chunking configuration
+      - CHUNKING_ENABLED=${CHUNKING_ENABLED:-true}
+      - CHUNK_DURATION_SECONDS=${CHUNK_DURATION_SECONDS:-30.0}
+      - OVERLAP_DURATION_SECONDS=${OVERLAP_DURATION_SECONDS:-5.0}
+      - MIN_AUDIO_FOR_CHUNKING=${MIN_AUDIO_FOR_CHUNKING:-60.0}
+      - CONFIDENCE_THRESHOLD=${CONFIDENCE_THRESHOLD:-0.8}
+      - HSA_OVERRIDE_GFX_VERSION=11.5.1
+    restart: unless-stopped
+
   # ============================================================================
   # Legacy Parakeet Service (backward compatibility)
   # ============================================================================