From 4e0ea7b621b6c1cf2033f38e7acc91e6033b23a0 Mon Sep 17 00:00:00 2001 From: 0xrushi <0xrushi@gmail.com> Date: Tue, 24 Feb 2026 18:12:20 -0500 Subject: [PATCH 1/3] embedding model --- extras/openmemory-mcp/.env.template | 20 ++- extras/openmemory-mcp/README.md | 8 +- extras/openmemory-mcp/setup.sh | 185 ++++++++++++++++++--- tests/unit/test_openmemory_setup_script.py | 112 +++++++++++++ wizard.py | 158 +++++++++++++++++- 5 files changed, 449 insertions(+), 34 deletions(-) create mode 100644 tests/unit/test_openmemory_setup_script.py diff --git a/extras/openmemory-mcp/.env.template b/extras/openmemory-mcp/.env.template index 10c790bd..4bc09f57 100644 --- a/extras/openmemory-mcp/.env.template +++ b/extras/openmemory-mcp/.env.template @@ -1,11 +1,27 @@ # OpenMemory MCP Configuration # Copy this file to .env and fill in your values -# Required: OpenAI API Key for memory processing +# Required: OpenAI-compatible API key used by OpenMemory defaults OPENAI_API_KEY= +# Optional: OpenAI-compatible base URL (for local providers) +# Example: http://host.docker.internal:11434/v1 +OPENAI_BASE_URL= + +# Optional: Embedding model metadata (for local embedding setups) +OPENAI_EMBEDDING_MODEL= +OPENAI_EMBEDDING_DIMENSIONS= + +# Wizard metadata for embedding provider selection +# Supported values: openai, local +OPENMEMORY_EMBEDDINGS_PROVIDER=openai +OPENMEMORY_EMBEDDINGS_BASE_URL= +OPENMEMORY_EMBEDDINGS_MODEL= +OPENMEMORY_EMBEDDINGS_API_KEY= +OPENMEMORY_EMBEDDINGS_DIMENSIONS= + # Optional: User identifier (defaults to system username) USER=openmemory # Optional: Frontend URL (if using UI) -NEXT_PUBLIC_API_URL=http://localhost:8765 \ No newline at end of file +NEXT_PUBLIC_API_URL=http://localhost:8765 diff --git a/extras/openmemory-mcp/README.md b/extras/openmemory-mcp/README.md index 940a33e5..65d45c51 100644 --- a/extras/openmemory-mcp/README.md +++ b/extras/openmemory-mcp/README.md @@ -17,7 +17,9 @@ OpenMemory MCP is a memory service from mem0.ai that provides: ```bash cp .env.template .env -# Edit .env and add your OPENAI_API_KEY +# Edit .env and add your embedding provider settings +# - OpenAI: OPENAI_API_KEY +# - Local OpenAI-compatible: OPENAI_BASE_URL, OPENAI_API_KEY, OPENAI_EMBEDDING_MODEL, OPENAI_EMBEDDING_DIMENSIONS ``` ### 2. Start Services @@ -64,7 +66,7 @@ The deployment includes: - **MCP Server**: http://localhost:8765 - REST API: `/api/v1/memories` - MCP SSE: `/mcp/{client_name}/sse/{user_id}` - + - **Qdrant Dashboard**: http://localhost:6334/dashboard - **UI** (if enabled): http://localhost:3001 @@ -184,4 +186,4 @@ OpenMemory uses OpenAI by default. To use different models, you would need to mo - [OpenMemory Documentation](https://docs.mem0.ai/open-memory/introduction) - [MCP Protocol Spec](https://github.com/mem0ai/mem0/tree/main/openmemory) -- [Chronicle Memory Docs](../../backends/advanced/MEMORY_PROVIDERS.md) \ No newline at end of file +- [Chronicle Memory Docs](../../backends/advanced/MEMORY_PROVIDERS.md) diff --git a/extras/openmemory-mcp/setup.sh b/extras/openmemory-mcp/setup.sh index afa8cf57..8d3f0e69 100755 --- a/extras/openmemory-mcp/setup.sh +++ b/extras/openmemory-mcp/setup.sh @@ -5,6 +5,11 @@ set -euo pipefail # Parse command line arguments OPENAI_API_KEY="" +EMBEDDINGS_PROVIDER="" +LOCAL_EMBEDDINGS_BASE_URL="" +LOCAL_EMBEDDINGS_MODEL="" +LOCAL_EMBEDDINGS_API_KEY="" +LOCAL_EMBEDDINGS_DIMENSIONS="" while [[ $# -gt 0 ]]; do case $1 in @@ -12,6 +17,26 @@ while [[ $# -gt 0 ]]; do OPENAI_API_KEY="$2" shift 2 ;; + --embeddings-provider) + EMBEDDINGS_PROVIDER="$2" + shift 2 + ;; + --embeddings-base-url) + LOCAL_EMBEDDINGS_BASE_URL="$2" + shift 2 + ;; + --embeddings-model) + LOCAL_EMBEDDINGS_MODEL="$2" + shift 2 + ;; + --embeddings-api-key) + LOCAL_EMBEDDINGS_API_KEY="$2" + shift 2 + ;; + --embeddings-dimensions) + LOCAL_EMBEDDINGS_DIMENSIONS="$2" + shift 2 + ;; *) echo "Unknown argument: $1" exit 1 @@ -43,36 +68,154 @@ fi # Set restrictive permissions (owner read/write only) chmod 600 .env -# Get OpenAI API Key (prompt only if not provided via command line) -if [ -z "$OPENAI_API_KEY" ]; then +# Utility: replace env key or append if missing +upsert_env_key() { + local key="$1" + local value="$2" + local temp_file + + temp_file=$(mktemp) + awk -v key="$key" -v value="$value" ' + BEGIN { found=0 } + $0 ~ ("^" key "=") { print key "=" value; found=1; next } + { print } + END { if (!found) print key "=" value } + ' .env > "$temp_file" + mv "$temp_file" .env +} + +if [ -z "$EMBEDDINGS_PROVIDER" ]; then echo "" - echo "šŸ”‘ OpenAI API Key (required for memory extraction)" - echo "Get yours from: https://platform.openai.com/api-keys" + echo "🧩 Embedding provider" + echo "1) OpenAI embeddings" + echo "2) Local OpenAI-compatible embeddings" while true; do - read -s -r -p "OpenAI API Key: " OPENAI_API_KEY - echo # Print newline after silent input - if [ -n "$OPENAI_API_KEY" ]; then - break - fi - echo "Error: OpenAI API Key cannot be empty. Please try again." + read -r -p "Choose provider [1/2]: " provider_choice + case "$provider_choice" in + 1) + EMBEDDINGS_PROVIDER="openai" + break + ;; + 2) + EMBEDDINGS_PROVIDER="local" + break + ;; + *) + echo "Error: Please enter 1 or 2." + ;; + esac done -else - echo "āœ… OpenAI API key configured from command line" fi -# Update .env file safely using awk - replace existing line or append if missing -temp_file=$(mktemp) -awk -v key="$OPENAI_API_KEY" ' - /^OPENAI_API_KEY=/ { print "OPENAI_API_KEY=" key; found=1; next } - { print } - END { if (!found) print "OPENAI_API_KEY=" key } -' .env > "$temp_file" -mv "$temp_file" .env +if [ "$EMBEDDINGS_PROVIDER" != "openai" ] && [ "$EMBEDDINGS_PROVIDER" != "local" ]; then + echo "Error: --embeddings-provider must be 'openai' or 'local'" >&2 + exit 1 +fi + +if [ "$EMBEDDINGS_PROVIDER" = "openai" ]; then + # Get OpenAI API Key (prompt only if not provided via command line) + if [ -z "$OPENAI_API_KEY" ]; then + echo "" + echo "šŸ”‘ OpenAI API Key (required for memory extraction + embeddings)" + echo "Get yours from: https://platform.openai.com/api-keys" + while true; do + read -s -r -p "OpenAI API Key: " OPENAI_API_KEY + echo # Print newline after silent input + if [ -n "$OPENAI_API_KEY" ]; then + break + fi + echo "Error: OpenAI API Key cannot be empty. Please try again." + done + else + echo "āœ… OpenAI API key configured from command line" + fi + + upsert_env_key "OPENMEMORY_EMBEDDINGS_PROVIDER" "openai" + upsert_env_key "OPENAI_API_KEY" "$OPENAI_API_KEY" + + # Clear local embedding overrides for pure OpenAI mode + upsert_env_key "OPENAI_BASE_URL" "" + upsert_env_key "OPENAI_EMBEDDING_MODEL" "" + upsert_env_key "OPENAI_EMBEDDING_DIMENSIONS" "" + upsert_env_key "OPENMEMORY_EMBEDDINGS_BASE_URL" "" + upsert_env_key "OPENMEMORY_EMBEDDINGS_MODEL" "" + upsert_env_key "OPENMEMORY_EMBEDDINGS_API_KEY" "" + upsert_env_key "OPENMEMORY_EMBEDDINGS_DIMENSIONS" "" +else + echo "" + echo "šŸ  Local embeddings configuration (OpenAI-compatible endpoint)" + + if [ -z "$LOCAL_EMBEDDINGS_BASE_URL" ]; then + while true; do + read -r -p "Embeddings base URL (e.g. http://host.docker.internal:11434/v1): " LOCAL_EMBEDDINGS_BASE_URL + if [ -n "$LOCAL_EMBEDDINGS_BASE_URL" ]; then + break + fi + echo "Error: Base URL cannot be empty. Please try again." + done + fi + + if [ -z "$LOCAL_EMBEDDINGS_MODEL" ]; then + while true; do + read -r -p "Embeddings model name: " LOCAL_EMBEDDINGS_MODEL + if [ -n "$LOCAL_EMBEDDINGS_MODEL" ]; then + break + fi + echo "Error: Model name cannot be empty. Please try again." + done + fi + + if [ -z "$LOCAL_EMBEDDINGS_API_KEY" ]; then + while true; do + read -s -r -p "Embeddings API key: " LOCAL_EMBEDDINGS_API_KEY + echo + if [ -n "$LOCAL_EMBEDDINGS_API_KEY" ]; then + break + fi + echo "Error: API key cannot be empty. Please try again." + done + fi + + if [ -z "$LOCAL_EMBEDDINGS_DIMENSIONS" ]; then + while true; do + read -r -p "Embedding dimensions (e.g. 768): " LOCAL_EMBEDDINGS_DIMENSIONS + if [[ "$LOCAL_EMBEDDINGS_DIMENSIONS" =~ ^[0-9]+$ ]] && [ "$LOCAL_EMBEDDINGS_DIMENSIONS" -gt 0 ]; then + break + fi + echo "Error: Dimensions must be a positive integer." + done + fi + + upsert_env_key "OPENMEMORY_EMBEDDINGS_PROVIDER" "local" + + # Keep OpenAI-compatible defaults pointed at the local embeddings endpoint. + # OpenMemory reads OPENAI_API_KEY by default, and OPENAI_BASE_URL can redirect + # OpenAI client calls to local-compatible servers. + upsert_env_key "OPENAI_API_KEY" "$LOCAL_EMBEDDINGS_API_KEY" + upsert_env_key "OPENAI_BASE_URL" "$LOCAL_EMBEDDINGS_BASE_URL" + upsert_env_key "OPENAI_EMBEDDING_MODEL" "$LOCAL_EMBEDDINGS_MODEL" + upsert_env_key "OPENAI_EMBEDDING_DIMENSIONS" "$LOCAL_EMBEDDINGS_DIMENSIONS" + + # Also store explicit OpenMemory-local embedding fields for future tooling. + upsert_env_key "OPENMEMORY_EMBEDDINGS_BASE_URL" "$LOCAL_EMBEDDINGS_BASE_URL" + upsert_env_key "OPENMEMORY_EMBEDDINGS_MODEL" "$LOCAL_EMBEDDINGS_MODEL" + upsert_env_key "OPENMEMORY_EMBEDDINGS_API_KEY" "$LOCAL_EMBEDDINGS_API_KEY" + upsert_env_key "OPENMEMORY_EMBEDDINGS_DIMENSIONS" "$LOCAL_EMBEDDINGS_DIMENSIONS" +fi echo "" echo "āœ… OpenMemory MCP configured!" echo "šŸ“ Configuration saved to .env" echo "" +if [ "$EMBEDDINGS_PROVIDER" = "local" ]; then + echo "ā„¹ļø Local embeddings mode enabled" + echo " Base URL: $LOCAL_EMBEDDINGS_BASE_URL" + echo " Model: $LOCAL_EMBEDDINGS_MODEL" + echo " Dimensions: $LOCAL_EMBEDDINGS_DIMENSIONS" +else + echo "ā„¹ļø OpenAI embeddings mode enabled" +fi +echo "" echo "šŸš€ To start: docker compose up -d" echo "🌐 MCP Server: http://localhost:8765" -echo "šŸ“± Web UI: http://localhost:3001" \ No newline at end of file +echo "šŸ“± Web UI: http://localhost:3001" diff --git a/tests/unit/test_openmemory_setup_script.py b/tests/unit/test_openmemory_setup_script.py new file mode 100644 index 00000000..d03649b0 --- /dev/null +++ b/tests/unit/test_openmemory_setup_script.py @@ -0,0 +1,112 @@ +import shutil +import stat +import subprocess +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[2] +OPENMEMORY_DIR = REPO_ROOT / "extras" / "openmemory-mcp" + + +def _prepare_tmp_setup(tmp_path: Path) -> Path: + setup_src = OPENMEMORY_DIR / "setup.sh" + template_src = OPENMEMORY_DIR / ".env.template" + + setup_dst = tmp_path / "setup.sh" + template_dst = tmp_path / ".env.template" + + shutil.copy2(setup_src, setup_dst) + shutil.copy2(template_src, template_dst) + + setup_dst.chmod(setup_dst.stat().st_mode | stat.S_IXUSR) + return setup_dst + + +def _read_env_map(env_path: Path) -> dict[str, str]: + data = {} + for raw_line in env_path.read_text(encoding="utf-8").splitlines(): + line = raw_line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + key, value = line.split("=", 1) + data[key] = value + return data + + +def test_setup_openai_embeddings_mode_writes_expected_env(tmp_path): + setup_script = _prepare_tmp_setup(tmp_path) + + subprocess.run( + [ + "bash", + str(setup_script), + "--embeddings-provider", + "openai", + "--openai-api-key", + "sk-test-openai", + ], + cwd=tmp_path, + check=True, + capture_output=True, + text=True, + ) + + env_map = _read_env_map(tmp_path / ".env") + assert env_map["OPENMEMORY_EMBEDDINGS_PROVIDER"] == "openai" + assert env_map["OPENAI_API_KEY"] == "sk-test-openai" + assert env_map["OPENAI_BASE_URL"] == "" + assert env_map["OPENAI_EMBEDDING_MODEL"] == "" + assert env_map["OPENAI_EMBEDDING_DIMENSIONS"] == "" + + +def test_setup_local_embeddings_mode_writes_expected_env(tmp_path): + setup_script = _prepare_tmp_setup(tmp_path) + + subprocess.run( + [ + "bash", + str(setup_script), + "--embeddings-provider", + "local", + "--embeddings-base-url", + "http://host.docker.internal:11434/v1", + "--embeddings-model", + "nomic-embed-text", + "--embeddings-api-key", + "local-key", + "--embeddings-dimensions", + "768", + ], + cwd=tmp_path, + check=True, + capture_output=True, + text=True, + ) + + env_map = _read_env_map(tmp_path / ".env") + assert env_map["OPENMEMORY_EMBEDDINGS_PROVIDER"] == "local" + assert env_map["OPENAI_API_KEY"] == "local-key" + assert env_map["OPENAI_BASE_URL"] == "http://host.docker.internal:11434/v1" + assert env_map["OPENAI_EMBEDDING_MODEL"] == "nomic-embed-text" + assert env_map["OPENAI_EMBEDDING_DIMENSIONS"] == "768" + assert ( + env_map["OPENMEMORY_EMBEDDINGS_BASE_URL"] + == "http://host.docker.internal:11434/v1" + ) + assert env_map["OPENMEMORY_EMBEDDINGS_MODEL"] == "nomic-embed-text" + assert env_map["OPENMEMORY_EMBEDDINGS_API_KEY"] == "local-key" + assert env_map["OPENMEMORY_EMBEDDINGS_DIMENSIONS"] == "768" + + +def test_setup_rejects_invalid_embeddings_provider(tmp_path): + setup_script = _prepare_tmp_setup(tmp_path) + + result = subprocess.run( + ["bash", str(setup_script), "--embeddings-provider", "invalid-provider"], + cwd=tmp_path, + check=False, + capture_output=True, + text=True, + ) + + assert result.returncode != 0 + assert "--embeddings-provider must be 'openai' or 'local'" in result.stderr diff --git a/wizard.py b/wizard.py index 784fdfe4..00348056 100755 --- a/wizard.py +++ b/wizard.py @@ -250,6 +250,7 @@ def run_service_setup( langfuse_secret_key=None, langfuse_host=None, streaming_provider=None, + hardware_profile=None, ): """Execute individual service setup script""" if service_name == "advanced": @@ -302,6 +303,14 @@ def run_service_setup( # Define the speaker env path speaker_env_path = "extras/speaker-recognition/.env" + # Pass explicit hardware profile selection when provided by wizard + if hardware_profile == "strixhalo": + cmd.extend(["--pytorch-cuda-version", "strixhalo"]) + cmd.extend(["--compute-mode", "gpu"]) + console.print( + "[blue][INFO][/blue] Using AMD Strix Halo profile for speaker recognition" + ) + # HF Token should have been provided via setup_hf_token_if_needed() if hf_token: cmd.extend(["--hf-token", hf_token]) @@ -323,7 +332,7 @@ def run_service_setup( # Pass compute mode from existing .env if available compute_mode = read_env_value(speaker_env_path, "COMPUTE_MODE") - if compute_mode in ["cpu", "gpu"]: + if hardware_profile != "strixhalo" and compute_mode in ["cpu", "gpu"]: cmd.extend(["--compute-mode", compute_mode]) console.print( f"[blue][INFO][/blue] Found existing COMPUTE_MODE ({compute_mode}), reusing" @@ -332,11 +341,18 @@ def run_service_setup( # For asr-services, pass provider from wizard's transcription choice and reuse CUDA version if service_name == "asr-services": # Map wizard transcription provider to asr-services provider name - wizard_to_asr_provider = { - "vibevoice": "vibevoice", - "parakeet": "nemo", - "qwen3-asr": "qwen3-asr", - } + if hardware_profile == "strixhalo": + wizard_to_asr_provider = { + "vibevoice": "vibevoice-strixhalo", + "parakeet": "nemo-strixhalo", + "qwen3-asr": "qwen3-asr", + } + else: + wizard_to_asr_provider = { + "vibevoice": "vibevoice", + "parakeet": "nemo", + "qwen3-asr": "qwen3-asr", + } asr_provider = wizard_to_asr_provider.get(transcription_provider) if asr_provider: cmd.extend(["--provider", asr_provider]) @@ -346,7 +362,17 @@ def run_service_setup( speaker_env_path = "extras/speaker-recognition/.env" cuda_version = read_env_value(speaker_env_path, "PYTORCH_CUDA_VERSION") - if cuda_version and cuda_version in ["cu121", "cu126", "cu128"]: + if hardware_profile == "strixhalo": + cmd.extend(["--pytorch-cuda-version", "strixhalo"]) + console.print( + "[blue][INFO][/blue] Using AMD Strix Halo profile for ASR services" + ) + elif cuda_version and cuda_version in [ + "cu121", + "cu126", + "cu128", + "strixhalo", + ]: cmd.extend(["--pytorch-cuda-version", cuda_version]) console.print( f"[blue][INFO][/blue] Found existing PYTORCH_CUDA_VERSION ({cuda_version}) from speaker-recognition, reusing" @@ -362,14 +388,78 @@ def run_service_setup( # For openmemory-mcp, try to pass OpenAI API key from backend if available if service_name == "openmemory-mcp": backend_env_path = "backends/advanced/.env" + openmemory_env_path = "extras/openmemory-mcp/.env" openai_key = read_env_value(backend_env_path, "OPENAI_API_KEY") - if openai_key and not is_placeholder( + backend_openai_base_url = read_env_value( + backend_env_path, "OPENAI_BASE_URL" + ) + backend_embedding_model = read_env_value( + backend_env_path, "OPENAI_EMBEDDING_MODEL" + ) + backend_embedding_dims = read_env_value( + backend_env_path, "OPENAI_EMBEDDING_DIMENSIONS" + ) + + existing_embeddings_provider = read_env_value( + openmemory_env_path, "OPENMEMORY_EMBEDDINGS_PROVIDER" + ) + existing_embeddings_base_url = read_env_value( + openmemory_env_path, "OPENMEMORY_EMBEDDINGS_BASE_URL" + ) + existing_embeddings_model = read_env_value( + openmemory_env_path, "OPENMEMORY_EMBEDDINGS_MODEL" + ) + existing_embeddings_api_key = read_env_value( + openmemory_env_path, "OPENMEMORY_EMBEDDINGS_API_KEY" + ) + existing_embeddings_dims = read_env_value( + openmemory_env_path, "OPENMEMORY_EMBEDDINGS_DIMENSIONS" + ) + + def _has_value(value): + return value and value.strip() + + has_openai_key = _has_value(openai_key) and not is_placeholder( openai_key, "your_openai_api_key_here", "your-openai-api-key-here", "your_openai_key_here", "your-openai-key-here", + ) + + # Prefer an existing OpenMemory local embedding configuration if available. + if ( + existing_embeddings_provider == "local" + and _has_value(existing_embeddings_base_url) + and _has_value(existing_embeddings_model) + and _has_value(existing_embeddings_api_key) + and _has_value(existing_embeddings_dims) + ): + cmd.extend(["--embeddings-provider", "local"]) + cmd.extend(["--embeddings-base-url", existing_embeddings_base_url]) + cmd.extend(["--embeddings-model", existing_embeddings_model]) + cmd.extend(["--embeddings-api-key", existing_embeddings_api_key]) + cmd.extend(["--embeddings-dimensions", existing_embeddings_dims]) + console.print( + "[blue][INFO][/blue] Found existing local embeddings config for OpenMemory, reusing" + ) + elif ( + has_openai_key + and _has_value(backend_openai_base_url) + and "api.openai.com" not in backend_openai_base_url ): + # Backend appears to use a local OpenAI-compatible endpoint. + cmd.extend(["--embeddings-provider", "local"]) + cmd.extend(["--embeddings-base-url", backend_openai_base_url]) + cmd.extend(["--embeddings-api-key", openai_key]) + if _has_value(backend_embedding_model): + cmd.extend(["--embeddings-model", backend_embedding_model]) + if _has_value(backend_embedding_dims): + cmd.extend(["--embeddings-dimensions", backend_embedding_dims]) + console.print( + "[blue][INFO][/blue] Found OpenAI-compatible local endpoint in backend config, pre-filling OpenMemory local embeddings" + ) + elif has_openai_key: cmd.extend(["--openai-api-key", openai_key]) console.print( "[blue][INFO][/blue] Found existing OPENAI_API_KEY from backend config, reusing" @@ -870,6 +960,53 @@ def setup_langfuse_choice(): } +def select_hardware_profile( + selected_services, transcription_provider, streaming_provider +): + """Select hardware profile for GPU-backed optional services. + + Returns: + "strixhalo" for AMD Strix Halo profile, otherwise None. + """ + strix_capable_providers = {"parakeet", "vibevoice"} + needs_hardware_choice = ( + "speaker-recognition" in selected_services + or transcription_provider in strix_capable_providers + or streaming_provider in strix_capable_providers + ) + + if not needs_hardware_choice: + return None + + console.print("\n🧠 [bold cyan]Hardware Profile[/bold cyan]") + console.print( + "Choose target hardware for GPU services (speaker recognition and offline ASR):" + ) + choices = { + "1": "Standard (CPU/NVIDIA CUDA)", + "2": "AMD Strix Halo (ROCm, gfx1151 / Ryzen AI Max)", + } + for key, desc in choices.items(): + console.print(f" {key}) {desc}") + console.print() + + while True: + try: + choice = Prompt.ask("Enter choice", default="1") + if choice == "1": + return None + if choice == "2": + console.print( + "[green]āœ…[/green] Using AMD Strix Halo profile where supported" + ) + return "strixhalo" + console.print( + f"[red]Invalid choice. Please select from {list(choices.keys())}[/red]" + ) + except EOFError: + return None + + def main(): """Main orchestration logic""" console.print("šŸŽ‰ [bold green]Welcome to Chronicle![/bold green]\n") @@ -925,6 +1062,10 @@ def main(): selected_services.append("langfuse") # HF Token Configuration (if services require it) + hardware_profile = select_hardware_profile( + selected_services, transcription_provider, streaming_provider + ) + hf_token = setup_hf_token_if_needed(selected_services) # HTTPS Configuration (for services that need it) @@ -1101,6 +1242,7 @@ def main(): langfuse_secret_key=langfuse_secret_key, langfuse_host=langfuse_host, streaming_provider=streaming_provider, + hardware_profile=hardware_profile, ): success_count += 1 From bdaee7aeb38076d98280090fc6564880d308a594 Mon Sep 17 00:00:00 2001 From: 0xrushi <0xrushi@gmail.com> Date: Thu, 26 Feb 2026 15:05:45 -0500 Subject: [PATCH 2/3] Enhance audio streaming functionality and codec handling - Updated `useAudioStreamingOrchestrator` to ensure WebSocket URLs are correctly formatted and include codec parameters for Opus and PCM. - Introduced a new utility function `is_opus_header_stripped` to determine if incoming Opus payloads have the BLE header removed, improving audio processing accuracy. - Modified the WebSocket handling in `websocket_controller.py` to accommodate the new header stripping logic and updated audio session initialization. - Added unit tests for the new utility function to ensure correct behavior across various input scenarios. --- .../hooks/useAudioStreamingOrchestrator.ts | 12 ++++- .../controllers/websocket_controller.py | 48 +++++++++++-------- .../utils/omi_codec_utils.py | 26 ++++++++++ .../advanced/tests/test_omi_codec_utils.py | 35 ++++++++++++++ .../local-wearable-client/backend_sender.py | 7 ++- 5 files changed, 107 insertions(+), 21 deletions(-) create mode 100644 backends/advanced/src/advanced_omi_backend/utils/omi_codec_utils.py create mode 100644 backends/advanced/tests/test_omi_codec_utils.py diff --git a/app/src/hooks/useAudioStreamingOrchestrator.ts b/app/src/hooks/useAudioStreamingOrchestrator.ts index 43e78511..02097df1 100644 --- a/app/src/hooks/useAudioStreamingOrchestrator.ts +++ b/app/src/hooks/useAudioStreamingOrchestrator.ts @@ -46,6 +46,14 @@ export const useAudioStreamingOrchestrator = ({ const buildWebSocketUrl = useCallback((baseUrl: string): string => { let url = baseUrl.trim(); + url = url.replace(/^http:/, 'ws:').replace(/^https:/, 'wss:'); + if (!url.includes('/ws')) url = url.replace(/\/$/, '') + '/ws'; + if (/[?&]codec=/i.test(url)) { + url = url.replace(/([?&])codec=[^&]*/i, '$1codec=opus'); + } else { + const sep = url.includes('?') ? '&' : '?'; + url = url + sep + 'codec=opus'; + } const isAdvanced = settings.jwtToken && settings.isAuthenticated; if (isAdvanced) { @@ -63,7 +71,9 @@ export const useAudioStreamingOrchestrator = ({ let url = baseUrl.trim(); url = url.replace(/^http:/, 'ws:').replace(/^https:/, 'wss:'); if (!url.includes('/ws')) url = url.replace(/\/$/, '') + '/ws'; - if (!url.includes('codec=')) { + if (/[?&]codec=/i.test(url)) { + url = url.replace(/([?&])codec=[^&]*/i, '$1codec=pcm'); + } else { const sep = url.includes('?') ? '&' : '?'; url = url + sep + 'codec=pcm'; } diff --git a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py index bab956ec..06595f7f 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py @@ -31,6 +31,7 @@ from advanced_omi_backend.services.audio_stream.producer import ( get_audio_stream_producer, ) +from advanced_omi_backend.utils.omi_codec_utils import is_opus_header_stripped # Thread pool executors for audio decoding _DEC_IO_EXECUTOR = concurrent.futures.ThreadPoolExecutor( @@ -667,6 +668,7 @@ async def _handle_omi_audio_chunk( audio_stream_producer, opus_payload: bytes, decode_packet_fn, + strip_header: bool, user_id: str, client_id: str, packet_count: int, @@ -679,6 +681,7 @@ async def _handle_omi_audio_chunk( audio_stream_producer: Audio stream producer instance opus_payload: Opus-encoded audio bytes decode_packet_fn: Opus decoder function + strip_header: Whether to strip 3-byte BLE header before decoding user_id: User ID client_id: Client ID packet_count: Current packet number for logging @@ -687,7 +690,7 @@ async def _handle_omi_audio_chunk( start_time = time.time() loop = asyncio.get_running_loop() pcm_data = await loop.run_in_executor( - _DEC_IO_EXECUTOR, decode_packet_fn, opus_payload + _DEC_IO_EXECUTOR, decode_packet_fn, opus_payload, strip_header ) decode_time = time.time() - start_time @@ -1031,8 +1034,7 @@ async def _handle_button_event( audio_uuid = client_state.current_audio_uuid application_logger.info( - f"šŸ”˜ Button event from {client_id}: {button_state} " - f"(audio_uuid={audio_uuid})" + f"šŸ”˜ Button event from {client_id}: {button_state} (audio_uuid={audio_uuid})" ) # Store marker on client state for later persistence to conversation @@ -1353,7 +1355,7 @@ async def handle_omi_websocket( # OMI-specific: Setup Opus decoder decoder = OmiOpusDecoder() - _decode_packet = partial(decoder.decode_packet, strip_header=False) + _decode_packet = decoder.decode_packet packet_count = 0 total_bytes = 0 @@ -1368,20 +1370,26 @@ async def handle_omi_websocket( ) application_logger.info(f"šŸŽ™ļø OMI audio session started for {client_id}") + audio_start_data = header.get("data", {}) + # Most current clients (mobile app, local wearable relay) send Opus + # payloads with BLE header already removed. + # Allow explicit override for raw BLE packet sources. + client_state.opus_header_stripped = is_opus_header_stripped( + audio_start_data + ) + interim_holder[0] = await _initialize_streaming_session( client_state, audio_stream_producer, user.user_id, user.email, client_id, - header.get( - "data", - { - "rate": OMI_SAMPLE_RATE, - "width": OMI_SAMPLE_WIDTH, - "channels": OMI_CHANNELS, - }, - ), + audio_start_data + or { + "rate": OMI_SAMPLE_RATE, + "width": OMI_SAMPLE_WIDTH, + "channels": OMI_CHANNELS, + }, websocket=ws, ) @@ -1399,6 +1407,7 @@ async def handle_omi_websocket( audio_stream_producer, payload, _decode_packet, + not getattr(client_state, "opus_header_stripped", False), user.user_id, client_id, packet_count, @@ -1476,13 +1485,14 @@ async def handle_pcm_websocket( ) # Handle audio session start (pass websocket for error handling) - audio_streaming, recording_mode = ( - await _handle_audio_session_start( - client_state, - header.get("data", {}), - client_id, - websocket=ws, - ) + ( + audio_streaming, + recording_mode, + ) = await _handle_audio_session_start( + client_state, + header.get("data", {}), + client_id, + websocket=ws, ) # Initialize streaming session diff --git a/backends/advanced/src/advanced_omi_backend/utils/omi_codec_utils.py b/backends/advanced/src/advanced_omi_backend/utils/omi_codec_utils.py new file mode 100644 index 00000000..cea4acb5 --- /dev/null +++ b/backends/advanced/src/advanced_omi_backend/utils/omi_codec_utils.py @@ -0,0 +1,26 @@ +"""Helpers for OMI Opus payload metadata handling.""" + +from typing import Any + + +def is_opus_header_stripped(audio_start_data: dict[str, Any] | None) -> bool: + """ + Determine whether incoming OMI Opus payloads already have BLE header removed. + + Defaults to True because current mobile and relay clients send header-stripped + payload bytes. Raw BLE packet sources can override with + ``opus_header_stripped: false``. + """ + if not audio_start_data: + return True + + value = audio_start_data.get("opus_header_stripped", True) + + if isinstance(value, str): + normalized = value.strip().lower() + if normalized in {"false", "0", "no", "off"}: + return False + if normalized in {"true", "1", "yes", "on"}: + return True + + return bool(value) diff --git a/backends/advanced/tests/test_omi_codec_utils.py b/backends/advanced/tests/test_omi_codec_utils.py new file mode 100644 index 00000000..d8741ef3 --- /dev/null +++ b/backends/advanced/tests/test_omi_codec_utils.py @@ -0,0 +1,35 @@ +import pytest + +from advanced_omi_backend.utils.omi_codec_utils import is_opus_header_stripped + + +@pytest.mark.unit +def test_defaults_to_header_stripped_when_metadata_missing(): + assert is_opus_header_stripped(None) is True + assert is_opus_header_stripped({}) is True + + +@pytest.mark.unit +def test_respects_explicit_boolean_flag(): + assert is_opus_header_stripped({"opus_header_stripped": True}) is True + assert is_opus_header_stripped({"opus_header_stripped": False}) is False + + +@pytest.mark.unit +@pytest.mark.parametrize( + ("value", "expected"), + [ + ("true", True), + ("TRUE", True), + ("1", True), + ("yes", True), + ("on", True), + ("false", False), + ("FALSE", False), + ("0", False), + ("no", False), + ("off", False), + ], +) +def test_handles_string_flags(value, expected): + assert is_opus_header_stripped({"opus_header_stripped": value}) is expected diff --git a/extras/local-wearable-client/backend_sender.py b/extras/local-wearable-client/backend_sender.py index f5b37625..6986d19f 100644 --- a/extras/local-wearable-client/backend_sender.py +++ b/extras/local-wearable-client/backend_sender.py @@ -105,7 +105,11 @@ async def receive_handler(websocket, logger) -> None: if msg_type == "interim_transcript": text = data.get("data", {}).get("text", "")[:50] is_final = data.get("data", {}).get("is_final", False) - logger.debug("Interim transcript (%s): %s...", "FINAL" if is_final else "partial", text) + logger.debug( + "Interim transcript (%s): %s...", + "FINAL" if is_final else "partial", + text, + ) elif msg_type == "ready": logger.info("Backend ready message: %s", data.get("message")) else: @@ -165,6 +169,7 @@ async def stream_to_backend( "width": 2, "channels": 1, "mode": "streaming", + "opus_header_stripped": True, }, "payload_length": None, } From 0f281e9d10ac109ed70940b5e450e366794449eb Mon Sep 17 00:00:00 2001 From: 0xrushi <0xrushi@gmail.com> Date: Thu, 26 Feb 2026 15:37:02 -0500 Subject: [PATCH 3/3] Enhance WebSocket URL handling in `useAudioStreamingOrchestrator` - Improved URL normalization for WebSocket connections by trimming user input and converting HTTP(S) schemes to WS(S). - Ensured the presence of the `/ws` endpoint path in the WebSocket URL. - Updated codec handling to enforce Opus as the stream codec, either by replacing existing codec parameters or appending it if missing. --- app/src/hooks/useAudioStreamingOrchestrator.ts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/app/src/hooks/useAudioStreamingOrchestrator.ts b/app/src/hooks/useAudioStreamingOrchestrator.ts index 4547a950..6e09951c 100644 --- a/app/src/hooks/useAudioStreamingOrchestrator.ts +++ b/app/src/hooks/useAudioStreamingOrchestrator.ts @@ -45,9 +45,18 @@ export const useAudioStreamingOrchestrator = ({ const [isPhoneAudioMode, setIsPhoneAudioMode] = useState(false); const buildWebSocketUrl = useCallback((baseUrl: string): string => { + // Normalize user input so we can safely build a websocket URL. let url = baseUrl.trim(); + + // Convert HTTP(S) scheme to WS(S) for websocket connections. url = url.replace(/^http:/, 'ws:').replace(/^https:/, 'wss:'); + + // Ensure the websocket endpoint path is present. if (!url.includes('/ws')) url = url.replace(/\/$/, '') + '/ws'; + + // Force OMI stream codec to Opus: + // - replace existing codec query value, or + // - append codec=opus if missing. if (/[?&]codec=/i.test(url)) { url = url.replace(/([?&])codec=[^&]*/i, '$1codec=opus'); } else {