diff --git a/backends/advanced/init.py b/backends/advanced/init.py index 995eb47b..85381153 100644 --- a/backends/advanced/init.py +++ b/backends/advanced/init.py @@ -23,7 +23,12 @@ # Add repo root to path for imports sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent)) from config_manager import ConfigManager -from setup_utils import detect_tailscale_info, mask_value +from setup_utils import ( + detect_tailscale_info, + get_stt_provider_by_id, + load_stt_provider_catalog, + mask_value, +) from setup_utils import prompt_password as util_prompt_password from setup_utils import prompt_with_existing_masked, read_env_value @@ -33,7 +38,9 @@ def __init__(self, args=None): self.console = Console() self.config: Dict[str, Any] = {} self.args = args or argparse.Namespace() - self.config_yml_path = Path("../../config/config.yml") # Main config at config/config.yml + self.config_yml_path = Path( + "../../config/config.yml" + ) # Main config at config/config.yml # Check if we're in the right directory if not Path("pyproject.toml").exists() or not Path("src").exists(): @@ -87,7 +94,9 @@ def prompt_password(self, prompt: str) -> str: """Prompt for password (delegates to shared utility)""" return util_prompt_password(prompt, min_length=8, allow_generated=True) - def prompt_choice(self, prompt: str, choices: Dict[str, str], default: str = "1") -> str: + def prompt_choice( + self, prompt: str, choices: Dict[str, str], default: str = "1" + ) -> str: """Prompt for a choice from options""" self.console.print(prompt) for key, desc in choices.items(): @@ -117,7 +126,9 @@ def _ensure_plugins_yml_exists(self): "[blue][INFO][/blue] plugins.yml not found, creating from template..." ) shutil.copy2(plugins_template, plugins_yml) - self.console.print(f"[green]✅[/green] Created {plugins_yml} from template") + self.console.print( + f"[green]✅[/green] Created {plugins_yml} from template" + ) self.console.print( "[yellow][NOTE][/yellow] Edit config/plugins.yml to configure plugins" ) @@ -139,7 +150,9 @@ def backup_existing_env(self): timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") backup_path = f".env.backup.{timestamp}" shutil.copy2(env_path, backup_path) - self.console.print(f"[blue][INFO][/blue] Backed up existing .env file to {backup_path}") + self.console.print( + f"[blue][INFO][/blue] Backed up existing .env file to {backup_path}" + ) def read_existing_env_value(self, key: str) -> str: """Read a value from existing .env file (delegates to shared utility)""" @@ -202,7 +215,9 @@ def setup_authentication(self): ) self.config["ADMIN_PASSWORD"] = password else: - self.config["ADMIN_PASSWORD"] = self.prompt_password("Admin password (min 8 chars)") + self.config["ADMIN_PASSWORD"] = self.prompt_password( + "Admin password (min 8 chars)" + ) # Preserve existing AUTH_SECRET_KEY to avoid invalidating JWTs existing_secret = self.read_existing_env_value("AUTH_SECRET_KEY") @@ -218,204 +233,128 @@ def setup_authentication(self): def setup_transcription(self): """Configure transcription provider - updates config.yml and .env""" - # Check if transcription provider was provided via command line - if hasattr(self.args, "transcription_provider") and self.args.transcription_provider: - provider = self.args.transcription_provider + catalog = load_stt_provider_catalog() + + # Resolve provider_id: from CLI arg or interactive menu + if ( + hasattr(self.args, "transcription_provider") + and self.args.transcription_provider + ): + provider_id = self.args.transcription_provider self.console.print( - f"[green]✅[/green] Transcription: {provider} (configured via wizard)" - ) - - # Map provider to choice - if provider == "deepgram": - choice = "1" - elif provider == "parakeet": - choice = "2" - elif provider == "vibevoice": - choice = "3" - elif provider == "qwen3-asr": - choice = "4" - elif provider == "smallest": - choice = "5" - elif provider == "none": - choice = "6" - else: - choice = "1" # Default to Deepgram + f"[green]✅[/green] Transcription: {provider_id} (configured via wizard)" + ) else: self.print_section("Speech-to-Text Configuration") - self.console.print( "[blue][INFO][/blue] Provider selection is configured in config.yml (defaults.stt)" ) self.console.print("[blue][INFO][/blue] API keys are stored in .env") self.console.print() - # Interactive prompt is_macos = platform.system() == "Darwin" + choices = {} + for i, p in enumerate(catalog, 1): + if is_macos and "description_macos" in p: + desc = p["description_macos"] + elif not is_macos and "description_linux" in p: + desc = p["description_linux"] + else: + desc = p.get("description", p["display_name"]) + choices[str(i)] = desc + skip_num = str(len(catalog) + 1) + choices[skip_num] = "None (skip transcription setup)" - if is_macos: - parakeet_desc = "Offline (Parakeet ASR - CPU-based, runs locally)" - vibevoice_desc = "Offline (VibeVoice - CPU-based, built-in diarization)" - else: - parakeet_desc = "Offline (Parakeet ASR - GPU recommended, runs locally)" - vibevoice_desc = "Offline (VibeVoice - GPU recommended, built-in diarization)" - - qwen3_desc = "Offline (Qwen3-ASR - GPU required, 52 languages, streaming + batch)" - - smallest_desc = "Smallest.ai Pulse (cloud-based, fast, requires API key)" - - choices = { - "1": "Deepgram (recommended - high quality, cloud-based)", - "2": parakeet_desc, - "3": vibevoice_desc, - "4": qwen3_desc, - "5": smallest_desc, - "6": "None (skip transcription setup)", - } - - choice = self.prompt_choice("Choose your transcription provider:", choices, "1") - - if choice == "1": - self.console.print("[blue][INFO][/blue] Deepgram selected") - self.console.print("Get your API key from: https://console.deepgram.com/") - - # Use the new masked prompt function - api_key = self.prompt_with_existing_masked( - prompt_text="Deepgram API key (leave empty to skip)", - env_key="DEEPGRAM_API_KEY", - placeholders=["your_deepgram_api_key_here", "your-deepgram-key-here"], - is_password=True, - default="", - ) - - if api_key: - # Write API key to .env - self.config["DEEPGRAM_API_KEY"] = api_key - - # Update config.yml to use Deepgram - self.config_manager.update_config_defaults({"stt": "stt-deepgram"}) - - self.console.print( - "[green][SUCCESS][/green] Deepgram configured in config.yml and .env" - ) - self.console.print("[blue][INFO][/blue] Set defaults.stt: stt-deepgram") - else: - self.console.print( - "[yellow][WARNING][/yellow] No API key provided - transcription will not work" - ) - - elif choice == "2": - self.console.print("[blue][INFO][/blue] Offline Parakeet ASR selected") - existing_parakeet_url = ( - read_env_value(".env", "PARAKEET_ASR_URL") or "http://host.docker.internal:8767" + choice = self.prompt_choice( + "Choose your transcription provider:", choices, "1" ) - parakeet_url = self.prompt_value("Parakeet ASR URL", existing_parakeet_url) + if choice == skip_num: + self.console.print("[blue][INFO][/blue] Skipping transcription setup") + return + provider_id = catalog[int(choice) - 1]["id"] - # Write URL to .env for ${PARAKEET_ASR_URL} placeholder in config.yml - self.config["PARAKEET_ASR_URL"] = parakeet_url - - # Update config.yml to use Parakeet - self.config_manager.update_config_defaults({"stt": "stt-parakeet-batch"}) + if provider_id == "none": + self.console.print("[blue][INFO][/blue] Skipping transcription setup") + return + provider = get_stt_provider_by_id(provider_id, catalog) + if not provider: self.console.print( - "[green][SUCCESS][/green] Parakeet configured in config.yml and .env" - ) - self.console.print("[blue][INFO][/blue] Set defaults.stt: stt-parakeet-batch") - self.console.print( - "[yellow][WARNING][/yellow] Remember to start Parakeet service: cd ../../extras/asr-services && docker compose up nemo-asr" + f"[yellow][WARNING][/yellow] Unknown transcription provider: {provider_id}" ) + return - elif choice == "3": - self.console.print( - "[blue][INFO][/blue] Offline VibeVoice ASR selected (built-in speaker diarization)" - ) - existing_vibevoice_url = ( - read_env_value(".env", "VIBEVOICE_ASR_URL") or "http://host.docker.internal:8767" - ) - vibevoice_url = self.prompt_value("VibeVoice ASR URL", existing_vibevoice_url) + self.console.print(f"[blue][INFO][/blue] {provider['display_name']} selected") - # Write URL to .env for ${VIBEVOICE_ASR_URL} placeholder in config.yml - self.config["VIBEVOICE_ASR_URL"] = vibevoice_url + # Show setup notes (informational messages specific to this provider) + for note in provider.get("setup_notes", []): + self.console.print(f"[blue][INFO][/blue] {note}") - # Update config.yml to use VibeVoice - self.config_manager.update_config_defaults({"stt": "stt-vibevoice"}) + # Process env_vars generically + configured = True + for ev in provider.get("env_vars", []): + if ev.get("prompt") is None or ev.get("type") == "derived": + continue # Skip derived vars (e.g. QWEN3_ASR_STREAM_URL) - self.console.print( - "[green][SUCCESS][/green] VibeVoice configured in config.yml and .env" - ) - self.console.print("[blue][INFO][/blue] Set defaults.stt: stt-vibevoice") - self.console.print( - "[blue][INFO][/blue] VibeVoice provides built-in speaker diarization - pyannote will be skipped" - ) - self.console.print( - "[yellow][WARNING][/yellow] Remember to start VibeVoice service: cd ../../extras/asr-services && docker compose up vibevoice-asr" - ) + ev_name = ev["name"] + ev_type = ev["type"] - elif choice == "4": - self.console.print( - "[blue][INFO][/blue] Qwen3-ASR selected (52 languages, streaming + batch via vLLM)" - ) - existing_qwen3_url_raw = read_env_value(".env", "QWEN3_ASR_URL") - existing_qwen3_url = ( - f"http://{existing_qwen3_url_raw}" - if existing_qwen3_url_raw - else "http://host.docker.internal:8767" - ) - qwen3_url = self.prompt_value("Qwen3-ASR URL", existing_qwen3_url) - - # Write URL to .env for ${QWEN3_ASR_URL} placeholder in config.yml - self.config["QWEN3_ASR_URL"] = qwen3_url.replace("http://", "").rstrip("/") + if ev_type == "secret": + # Show optional description line (e.g. "Get your API key from: ...") + if ev.get("description"): + self.console.print(ev["description"]) + api_key = self.prompt_with_existing_masked( + prompt_text=ev["prompt"], + env_key=ev_name, + placeholders=ev.get("placeholders", []), + is_password=True, + default="", + ) + if api_key: + self.config[ev_name] = api_key + elif ev.get("required"): + configured = False + self.console.print( + "[yellow][WARNING][/yellow] No API key provided - transcription will not work" + ) - # Also set streaming URL (same host, port 8769) - stream_host = qwen3_url.replace("http://", "").split(":")[0] - self.config["QWEN3_ASR_STREAM_URL"] = f"{stream_host}:8769" + elif ev_type == "url": + existing = read_env_value(".env", ev_name) or ev.get("default", "") + url = self.prompt_value(ev["prompt"], existing) + self.config[ev_name] = url - # Update config.yml to use Qwen3-ASR - self.config_manager.update_config_defaults({"stt": "stt-qwen3-asr"}) + elif ev_type == "url_strip_http": + existing_raw = read_env_value(".env", ev_name) + # Re-add http:// prefix so the prompt shows a familiar URL + existing = ( + f"http://{existing_raw}" if existing_raw else ev.get("default", "") + ) + url = self.prompt_value(ev["prompt"], existing) + self.config[ev_name] = url.replace("http://", "").rstrip("/") + # Special case: Qwen3-ASR URL derives the streaming URL (same host, port 8769) + if ev_name == "QWEN3_ASR_URL": + stream_host = url.replace("http://", "").split(":")[0] + self.config["QWEN3_ASR_STREAM_URL"] = f"{stream_host}:8769" + + if configured: + batch_stt = provider["defaults"]["batch"] + self.config_manager.update_config_defaults({"stt": batch_stt}) + stream_stt = provider["defaults"].get("stream") + if stream_stt: + self.config_manager.update_config_defaults({"stt_stream": stream_stt}) self.console.print( - "[green][SUCCESS][/green] Qwen3-ASR configured in config.yml and .env" + f"[green][SUCCESS][/green] {provider['display_name']} configured in config.yml and .env" ) - self.console.print("[blue][INFO][/blue] Set defaults.stt: stt-qwen3-asr") - self.console.print( - "[yellow][WARNING][/yellow] Remember to start Qwen3-ASR: cd ../../extras/asr-services && docker compose up qwen3-asr-wrapper qwen3-asr-bridge -d" - ) - - elif choice == "5": - self.console.print("[blue][INFO][/blue] Smallest.ai Pulse selected") - self.console.print("Get your API key from: https://smallest.ai/") - - # Use the new masked prompt function - api_key = self.prompt_with_existing_masked( - prompt_text="Smallest.ai API key (leave empty to skip)", - env_key="SMALLEST_API_KEY", - placeholders=["your_smallest_api_key_here", "your-smallest-key-here"], - is_password=True, - default="", - ) - - if api_key: - # Write API key to .env - self.config["SMALLEST_API_KEY"] = api_key - - # Update config.yml to use Smallest.ai (batch + streaming) - self.config_manager.update_config_defaults( - {"stt": "stt-smallest", "stt_stream": "stt-smallest-stream"} - ) - - self.console.print( - "[green][SUCCESS][/green] Smallest.ai configured in config.yml and .env" - ) - self.console.print("[blue][INFO][/blue] Set defaults.stt: stt-smallest") + self.console.print(f"[blue][INFO][/blue] Set defaults.stt: {batch_stt}") + if stream_stt: self.console.print( - "[blue][INFO][/blue] Set defaults.stt_stream: stt-smallest-stream" - ) - else: - self.console.print( - "[yellow][WARNING][/yellow] No API key provided - transcription will not work" + f"[blue][INFO][/blue] Set defaults.stt_stream: {stream_stt}" ) - elif choice == "6": - self.console.print("[blue][INFO][/blue] Skipping transcription setup") + # Show warning notes (e.g. "Remember to start X service") + for note in provider.get("notes", []): + self.console.print(f"[yellow][WARNING][/yellow] {note}") def setup_streaming_provider(self): """Configure a separate streaming provider if --streaming-provider was passed. @@ -423,28 +362,35 @@ def setup_streaming_provider(self): When a different streaming provider is specified, sets defaults.stt_stream and enables always_batch_retranscribe (batch provider was set by setup_transcription). """ - if not hasattr(self.args, "streaming_provider") or not self.args.streaming_provider: + if ( + not hasattr(self.args, "streaming_provider") + or not self.args.streaming_provider + ): return - streaming_provider = self.args.streaming_provider - self.console.print( - f"\n[green]✅[/green] Streaming provider: {streaming_provider} (configured via wizard)" - ) - - # Map streaming provider to stt_stream config value - provider_to_stt_stream = { - "deepgram": "stt-deepgram-stream", - "smallest": "stt-smallest-stream", - "qwen3-asr": "stt-qwen3-asr", - } + streaming_provider_id = self.args.streaming_provider + catalog = load_stt_provider_catalog() + provider = get_stt_provider_by_id(streaming_provider_id, catalog) + if not provider: + self.console.print( + f"[yellow][WARNING][/yellow] Unknown streaming provider: {streaming_provider_id}" + ) + return - stream_stt = provider_to_stt_stream.get(streaming_provider) + # Prefer the dedicated stream config value; fall back to batch if streaming uses same model + stream_stt = provider["defaults"].get("stream") or provider["defaults"].get( + "batch" + ) if not stream_stt: self.console.print( - f"[yellow][WARNING][/yellow] Unknown streaming provider: {streaming_provider}" + f"[yellow][WARNING][/yellow] Provider {streaming_provider_id} has no streaming config" ) return + self.console.print( + f"\n[green]✅[/green] Streaming provider: {provider['display_name']} (configured via wizard)" + ) + # Set stt_stream (batch stt was already set by setup_transcription) self.config_manager.update_config_defaults({"stt_stream": stream_stt}) @@ -458,53 +404,49 @@ def setup_streaming_provider(self): self.config_manager.save_full_config(full_config) self.console.print(f"[blue][INFO][/blue] Set defaults.stt_stream: {stream_stt}") - self.console.print(f"[blue][INFO][/blue] Enabled always_batch_retranscribe") - - # Prompt for streaming provider env vars if not already set - if streaming_provider == "deepgram": - existing_key = read_env_value(".env", "DEEPGRAM_API_KEY") - if not existing_key or existing_key in ( - "your_deepgram_api_key_here", - "your-deepgram-key-here", - ): - api_key = self.prompt_with_existing_masked( - prompt_text="Deepgram API key for streaming", - env_key="DEEPGRAM_API_KEY", - placeholders=[ - "your_deepgram_api_key_here", - "your-deepgram-key-here", - ], - is_password=True, - default="", - ) - if api_key: - self.config["DEEPGRAM_API_KEY"] = api_key - elif streaming_provider == "smallest": - existing_key = read_env_value(".env", "SMALLEST_API_KEY") - if not existing_key or existing_key in ( - "your_smallest_api_key_here", - "your-smallest-key-here", - ): + self.console.print("[blue][INFO][/blue] Enabled always_batch_retranscribe") + + # Prompt for streaming provider env vars if not already set in .env + for ev in provider.get("env_vars", []): + ev_name = ev["name"] + ev_type = ev.get("type", "") + + # Handle the Qwen3 derived stream URL — only needed when used as streaming-only + if ev_type == "derived": + if ev_name == "QWEN3_ASR_STREAM_URL": + existing = read_env_value(".env", ev_name) + if not existing: + qwen3_url = self.prompt_value( + "Qwen3-ASR streaming URL", + "http://host.docker.internal:8769", + ) + self.config[ev_name] = qwen3_url.replace("http://", "").rstrip( + "/" + ) + continue + + if ev.get("prompt") is None: + continue + + if ev_type == "secret": + # prompt_with_existing_masked handles the "already set" case gracefully api_key = self.prompt_with_existing_masked( - prompt_text="Smallest.ai API key for streaming", - env_key="SMALLEST_API_KEY", - placeholders=[ - "your_smallest_api_key_here", - "your-smallest-key-here", - ], + prompt_text=f"{ev['prompt']} for streaming", + env_key=ev_name, + placeholders=ev.get("placeholders", []), is_password=True, default="", ) if api_key: - self.config["SMALLEST_API_KEY"] = api_key - elif streaming_provider == "qwen3-asr": - existing_url = read_env_value(".env", "QWEN3_ASR_STREAM_URL") - if not existing_url: - qwen3_url = self.prompt_value( - "Qwen3-ASR streaming URL", "http://host.docker.internal:8769" - ) - stream_host = qwen3_url.replace("http://", "").rstrip("/") - self.config["QWEN3_ASR_STREAM_URL"] = stream_host + self.config[ev_name] = api_key + + elif ev_type in ("url", "url_strip_http"): + existing = read_env_value(".env", ev_name) or ev.get("default", "") + url = self.prompt_value(ev["prompt"], existing) + if ev_type == "url_strip_http": + self.config[ev_name] = url.replace("http://", "").rstrip("/") + else: + self.config[ev_name] = url def setup_llm(self): """Configure LLM provider - updates config.yml and .env""" @@ -526,7 +468,9 @@ def setup_llm(self): existing_choice = "1" self.print_section("LLM Provider Configuration") - self.console.print("[blue][INFO][/blue] LLM configuration will be saved to config.yml") + self.console.print( + "[blue][INFO][/blue] LLM configuration will be saved to config.yml" + ) self.console.print() choices = { @@ -541,7 +485,9 @@ def setup_llm(self): if choice == "1": self.console.print("[blue][INFO][/blue] OpenAI selected") - self.console.print("Get your API key from: https://platform.openai.com/api-keys") + self.console.print( + "Get your API key from: https://platform.openai.com/api-keys" + ) # Use the new masked prompt function api_key = self.prompt_with_existing_masked( @@ -558,9 +504,13 @@ def setup_llm(self): self.config_manager.update_config_defaults( {"llm": "openai-llm", "embedding": "openai-embed"} ) - self.console.print("[green][SUCCESS][/green] OpenAI configured in config.yml") + self.console.print( + "[green][SUCCESS][/green] OpenAI configured in config.yml" + ) self.console.print("[blue][INFO][/blue] Set defaults.llm: openai-llm") - self.console.print("[blue][INFO][/blue] Set defaults.embedding: openai-embed") + self.console.print( + "[blue][INFO][/blue] Set defaults.embedding: openai-embed" + ) else: self.console.print( "[yellow][WARNING][/yellow] No API key provided - memory extraction will not work" @@ -572,21 +522,31 @@ def setup_llm(self): self.config_manager.update_config_defaults( {"llm": "local-llm", "embedding": "local-embed"} ) - self.console.print("[green][SUCCESS][/green] Ollama configured in config.yml") + self.console.print( + "[green][SUCCESS][/green] Ollama configured in config.yml" + ) self.console.print("[blue][INFO][/blue] Set defaults.llm: local-llm") - self.console.print("[blue][INFO][/blue] Set defaults.embedding: local-embed") + self.console.print( + "[blue][INFO][/blue] Set defaults.embedding: local-embed" + ) self.console.print( "[yellow][WARNING][/yellow] Make sure Ollama is running and models are pulled" ) elif choice == "3": - self.console.print("[blue][INFO][/blue] OpenAI-Compatible custom endpoint selected") - self.console.print("This works with any provider that exposes an OpenAI-compatible API") + self.console.print( + "[blue][INFO][/blue] OpenAI-Compatible custom endpoint selected" + ) + self.console.print( + "This works with any provider that exposes an OpenAI-compatible API" + ) self.console.print("(e.g., Groq, Together AI, LM Studio, vLLM, etc.)") self.console.print() # Prompt for base URL (required) - base_url = self.prompt_value("API Base URL (e.g., https://api.groq.com/openai/v1)", "") + base_url = self.prompt_value( + "API Base URL (e.g., https://api.groq.com/openai/v1)", "" + ) if not base_url: self.console.print( "[yellow][WARNING][/yellow] No base URL provided - skipping custom LLM setup" @@ -604,7 +564,9 @@ def setup_llm(self): self.config["CUSTOM_LLM_API_KEY"] = api_key # Prompt for model name (required) - model_name = self.prompt_value("LLM Model name (e.g., llama-3.1-70b-versatile)", "") + model_name = self.prompt_value( + "LLM Model name (e.g., llama-3.1-70b-versatile)", "" + ) if not model_name: self.console.print( "[yellow][WARNING][/yellow] No model name provided - skipping custom LLM setup" @@ -663,7 +625,9 @@ def setup_llm(self): self.console.print( "[green][SUCCESS][/green] Custom LLM and embedding configured in config.yml" ) - self.console.print("[blue][INFO][/blue] Set defaults.llm: custom-llm") + self.console.print( + "[blue][INFO][/blue] Set defaults.llm: custom-llm" + ) self.console.print( "[blue][INFO][/blue] Set defaults.embedding: custom-embed" ) @@ -674,7 +638,9 @@ def setup_llm(self): self.console.print( "[green][SUCCESS][/green] Custom LLM configured in config.yml" ) - self.console.print("[blue][INFO][/blue] Set defaults.llm: custom-llm") + self.console.print( + "[blue][INFO][/blue] Set defaults.llm: custom-llm" + ) self.console.print( "[blue][INFO][/blue] Set defaults.embedding: local-embed (Ollama)" ) @@ -702,7 +668,9 @@ def setup_memory(self): # Standalone init.py run — read existing config as default existing_choice = "1" full_config = self.config_manager.get_full_config() - existing_provider = full_config.get("memory", {}).get("provider", "chronicle") + existing_provider = full_config.get("memory", {}).get( + "provider", "chronicle" + ) if existing_provider == "openmemory_mcp": existing_choice = "2" @@ -718,7 +686,9 @@ def setup_memory(self): ) if choice == "1": - self.console.print("[blue][INFO][/blue] Chronicle Native memory provider selected") + self.console.print( + "[blue][INFO][/blue] Chronicle Native memory provider selected" + ) qdrant_url = self.prompt_value("Qdrant URL", "qdrant") self.config["QDRANT_BASE_URL"] = qdrant_url @@ -764,7 +734,9 @@ def setup_optional_services(self): has_speaker_arg = ( hasattr(self.args, "speaker_service_url") and self.args.speaker_service_url ) - has_asr_arg = hasattr(self.args, "parakeet_asr_url") and self.args.parakeet_asr_url + has_asr_arg = ( + hasattr(self.args, "parakeet_asr_url") and self.args.parakeet_asr_url + ) if has_speaker_arg: self.config["SPEAKER_SERVICE_URL"] = self.args.speaker_service_url @@ -781,7 +753,9 @@ def setup_optional_services(self): # Only show interactive section if not all configured via args if not has_speaker_arg: try: - enable_speaker = Confirm.ask("Enable Speaker Recognition?", default=False) + enable_speaker = Confirm.ask( + "Enable Speaker Recognition?", default=False + ) except EOFError: self.console.print("Using default: No") enable_speaker = False @@ -792,7 +766,9 @@ def setup_optional_services(self): "http://host.docker.internal:8001", ) self.config["SPEAKER_SERVICE_URL"] = speaker_url - self.console.print("[green][SUCCESS][/green] Speaker Recognition configured") + self.console.print( + "[green][SUCCESS][/green] Speaker Recognition configured" + ) self.console.print( "[blue][INFO][/blue] Start with: cd ../../extras/speaker-recognition && docker compose up -d" ) @@ -809,12 +785,16 @@ def setup_neo4j(self): neo4j_password = getattr(self.args, "neo4j_password", None) if neo4j_password: - self.console.print(f"[green]✅[/green] Neo4j: password configured via wizard") + self.console.print( + f"[green]✅[/green] Neo4j: password configured via wizard" + ) else: # Interactive prompt (standalone init.py run) self.console.print() self.console.print("[bold cyan]Neo4j Configuration[/bold cyan]") - self.console.print("Neo4j is used for Knowledge Graph (entity/relationship extraction)") + self.console.print( + "Neo4j is used for Knowledge Graph (entity/relationship extraction)" + ) self.console.print() neo4j_password = self.prompt_with_existing_masked( "Neo4j password (min 8 chars)", @@ -836,10 +816,14 @@ def setup_obsidian(self): if has_enable: enable_obsidian = True - self.console.print(f"[green]✅[/green] Obsidian: enabled (configured via wizard)") + self.console.print( + f"[green]✅[/green] Obsidian: enabled (configured via wizard)" + ) elif has_disable: enable_obsidian = False - self.console.print(f"[blue][INFO][/blue] Obsidian: disabled (configured via wizard)") + self.console.print( + f"[blue][INFO][/blue] Obsidian: disabled (configured via wizard)" + ) else: # Standalone init.py run — read existing config as default full_config = self.config_manager.get_full_config() @@ -849,7 +833,9 @@ def setup_obsidian(self): self.console.print() self.console.print("[bold cyan]Obsidian Integration (Optional)[/bold cyan]") - self.console.print("Enable graph-based knowledge management for Obsidian vault notes") + self.console.print( + "Enable graph-based knowledge management for Obsidian vault notes" + ) self.console.print() try: @@ -857,7 +843,9 @@ def setup_obsidian(self): "Enable Obsidian integration?", default=existing_enabled ) except EOFError: - self.console.print(f"Using default: {'Yes' if existing_enabled else 'No'}") + self.console.print( + f"Using default: {'Yes' if existing_enabled else 'No'}" + ) enable_obsidian = existing_enabled if enable_obsidian: @@ -874,9 +862,12 @@ def setup_obsidian(self): def setup_knowledge_graph(self): """Configure Knowledge Graph (Neo4j-based entity/relationship extraction - enabled by default)""" has_enable = ( - hasattr(self.args, "enable_knowledge_graph") and self.args.enable_knowledge_graph + hasattr(self.args, "enable_knowledge_graph") + and self.args.enable_knowledge_graph + ) + has_disable = ( + hasattr(self.args, "no_knowledge_graph") and self.args.no_knowledge_graph ) - has_disable = hasattr(self.args, "no_knowledge_graph") and self.args.no_knowledge_graph if has_enable: enable_kg = True @@ -892,20 +883,28 @@ def setup_knowledge_graph(self): # Standalone init.py run — read existing config as default full_config = self.config_manager.get_full_config() existing_enabled = ( - full_config.get("memory", {}).get("knowledge_graph", {}).get("enabled", True) + full_config.get("memory", {}) + .get("knowledge_graph", {}) + .get("enabled", True) ) self.console.print() - self.console.print("[bold cyan]Knowledge Graph (Entity Extraction)[/bold cyan]") + self.console.print( + "[bold cyan]Knowledge Graph (Entity Extraction)[/bold cyan]" + ) self.console.print( "Extract people, places, organizations, events, and tasks from conversations" ) self.console.print() try: - enable_kg = Confirm.ask("Enable Knowledge Graph?", default=existing_enabled) + enable_kg = Confirm.ask( + "Enable Knowledge Graph?", default=existing_enabled + ) except EOFError: - self.console.print(f"Using default: {'Yes' if existing_enabled else 'No'}") + self.console.print( + f"Using default: {'Yes' if existing_enabled else 'No'}" + ) enable_kg = existing_enabled if enable_kg: @@ -937,7 +936,9 @@ def setup_knowledge_graph(self): def setup_langfuse(self): """Configure LangFuse observability and prompt management""" self.console.print() - self.console.print("[bold cyan]LangFuse Observability & Prompt Management[/bold cyan]") + self.console.print( + "[bold cyan]LangFuse Observability & Prompt Management[/bold cyan]" + ) # Check if keys were passed from wizard (langfuse init already ran) langfuse_pub = getattr(self.args, "langfuse_public_key", None) @@ -945,26 +946,39 @@ def setup_langfuse(self): if langfuse_pub and langfuse_sec: # Auto-configure from wizard — no prompts needed - langfuse_host = getattr(self.args, "langfuse_host", None) or "http://langfuse-web:3000" + langfuse_host = ( + getattr(self.args, "langfuse_host", None) or "http://langfuse-web:3000" + ) self.config["LANGFUSE_HOST"] = langfuse_host self.config["LANGFUSE_PUBLIC_KEY"] = langfuse_pub self.config["LANGFUSE_SECRET_KEY"] = langfuse_sec self.config["LANGFUSE_BASE_URL"] = langfuse_host # Derive browser-accessible URL for deep-links - public_url = getattr(self.args, "langfuse_public_url", None) or "http://localhost:3002" + public_url = ( + getattr(self.args, "langfuse_public_url", None) + or "http://localhost:3002" + ) self._save_langfuse_public_url(public_url) source = "external" if "langfuse-web" not in langfuse_host else "local" - self.console.print(f"[green][SUCCESS][/green] LangFuse auto-configured ({source})") + self.console.print( + f"[green][SUCCESS][/green] LangFuse auto-configured ({source})" + ) self.console.print(f"[blue][INFO][/blue] Host: {langfuse_host}") self.console.print(f"[blue][INFO][/blue] Public URL: {public_url}") - self.console.print(f"[blue][INFO][/blue] Public key: {self.mask_api_key(langfuse_pub)}") + self.console.print( + f"[blue][INFO][/blue] Public key: {self.mask_api_key(langfuse_pub)}" + ) return # Manual configuration (standalone init.py run) - self.console.print("Enable LLM tracing, observability, and prompt management with LangFuse") - self.console.print("Self-host: cd ../../extras/langfuse && docker compose up -d") + self.console.print( + "Enable LLM tracing, observability, and prompt management with LangFuse" + ) + self.console.print( + "Self-host: cd ../../extras/langfuse && docker compose up -d" + ) self.console.print() try: @@ -1039,13 +1053,17 @@ def setup_https(self): if hasattr(self.args, "enable_https") and self.args.enable_https: enable_https = True server_ip = getattr(self.args, "server_ip", "localhost") - self.console.print(f"[green]✅[/green] HTTPS: {server_ip} (configured via wizard)") + self.console.print( + f"[green]✅[/green] HTTPS: {server_ip} (configured via wizard)" + ) else: # Interactive configuration self.print_section("HTTPS Configuration (Optional)") try: - enable_https = Confirm.ask("Enable HTTPS for microphone access?", default=False) + enable_https = Confirm.ask( + "Enable HTTPS for microphone access?", default=False + ) except EOFError: self.console.print("Using default: No") enable_https = False @@ -1059,12 +1077,18 @@ def setup_https(self): ts_dns, ts_ip = detect_tailscale_info() if ts_dns: - self.console.print(f"[green][AUTO-DETECTED][/green] Tailscale DNS: {ts_dns}") + self.console.print( + f"[green][AUTO-DETECTED][/green] Tailscale DNS: {ts_dns}" + ) if ts_ip: - self.console.print(f"[green][AUTO-DETECTED][/green] Tailscale IP: {ts_ip}") + self.console.print( + f"[green][AUTO-DETECTED][/green] Tailscale IP: {ts_ip}" + ) default_address = ts_dns elif ts_ip: - self.console.print(f"[green][AUTO-DETECTED][/green] Tailscale IP: {ts_ip}") + self.console.print( + f"[green][AUTO-DETECTED][/green] Tailscale IP: {ts_ip}" + ) default_address = ts_ip else: self.console.print("[blue][INFO][/blue] Tailscale not detected") @@ -1073,7 +1097,9 @@ def setup_https(self): ) default_address = "localhost" - self.console.print("[blue][INFO][/blue] For local-only access, use 'localhost'") + self.console.print( + "[blue][INFO][/blue] For local-only access, use 'localhost'" + ) # Use the new masked prompt function (not masked for IP, but shows existing) server_ip = self.prompt_with_existing_masked( @@ -1100,7 +1126,9 @@ def setup_https(self): ) # Generate Caddyfile from template - self.console.print("[blue][INFO][/blue] Creating Caddyfile configuration...") + self.console.print( + "[blue][INFO][/blue] Creating Caddyfile configuration..." + ) caddyfile_template = script_dir / "Caddyfile.template" caddyfile_path = script_dir / "Caddyfile" @@ -1111,8 +1139,12 @@ def setup_https(self): self.console.print( "[red]❌ ERROR: 'Caddyfile' exists as a directory![/red]" ) - self.console.print("[yellow] Please remove it manually:[/yellow]") - self.console.print(f"[yellow] rm -rf {caddyfile_path}[/yellow]") + self.console.print( + "[yellow] Please remove it manually:[/yellow]" + ) + self.console.print( + f"[yellow] rm -rf {caddyfile_path}[/yellow]" + ) self.console.print( "[red] HTTPS will NOT work without a proper Caddyfile![/red]" ) @@ -1122,7 +1154,9 @@ def setup_https(self): caddyfile_content = f.read() # Replace TAILSCALE_IP with server_ip - caddyfile_content = caddyfile_content.replace("TAILSCALE_IP", server_ip) + caddyfile_content = caddyfile_content.replace( + "TAILSCALE_IP", server_ip + ) with open(caddyfile_path, "w") as f: f.write(caddyfile_content) @@ -1134,14 +1168,18 @@ def setup_https(self): self.config["SERVER_IP"] = server_ip except Exception as e: - self.console.print(f"[red]❌ ERROR: Caddyfile generation failed: {e}[/red]") + self.console.print( + f"[red]❌ ERROR: Caddyfile generation failed: {e}[/red]" + ) self.console.print( "[red] HTTPS will NOT work without a proper Caddyfile![/red]" ) self.config["HTTPS_ENABLED"] = "false" else: self.console.print("[red]❌ ERROR: Caddyfile.template not found[/red]") - self.console.print("[red] HTTPS will NOT work without a proper Caddyfile![/red]") + self.console.print( + "[red] HTTPS will NOT work without a proper Caddyfile![/red]" + ) self.config["HTTPS_ENABLED"] = "false" else: self.config["HTTPS_ENABLED"] = "false" @@ -1190,14 +1228,18 @@ def copy_config_templates(self): and Path("diarization_config.json.template").exists() ): shutil.copy2("diarization_config.json.template", "diarization_config.json") - self.console.print("[green][SUCCESS][/green] diarization_config.json created") + self.console.print( + "[green][SUCCESS][/green] diarization_config.json created" + ) def show_summary(self): """Show configuration summary""" self.print_section("Configuration Summary") self.console.print() - self.console.print(f"✅ Admin Account: {self.config.get('ADMIN_EMAIL', 'Not configured')}") + self.console.print( + f"✅ Admin Account: {self.config.get('ADMIN_EMAIL', 'Not configured')}" + ) # Get current config from ConfigManager (single source of truth) config_yml = self.config_manager.get_full_config() @@ -1208,8 +1250,14 @@ def show_summary(self): (m for m in config_yml.get("models", []) if m.get("name") == stt_default), None, ) - stt_provider = stt_model.get("model_provider", "unknown") if stt_model else "not configured" - self.console.print(f"✅ Transcription: {stt_provider} ({stt_default}) - config.yml") + stt_provider = ( + stt_model.get("model_provider", "unknown") + if stt_model + else "not configured" + ) + self.console.print( + f"✅ Transcription: {stt_provider} ({stt_default}) - config.yml" + ) # Show LLM config from config.yml llm_default = config_yml.get("defaults", {}).get("llm", "not set") @@ -1271,7 +1319,9 @@ def show_next_steps(self): self.console.print(f" [cyan]http://localhost:{webui_port}[/cyan]") self.console.print() self.console.print("3. Check service health:") - self.console.print(f" [cyan]curl http://localhost:{backend_port}/health[/cyan]") + self.console.print( + f" [cyan]curl http://localhost:{backend_port}/health[/cyan]" + ) if self.config.get("MEMORY_PROVIDER") == "openmemory_mcp": self.console.print() @@ -1296,7 +1346,9 @@ def run(self): self.console.print( "[dim]Safe to run again — it backs up your config and preserves previous values.[/dim]" ) - self.console.print("[dim]When unsure, just press Enter — the defaults will work.[/dim]") + self.console.print( + "[dim]When unsure, just press Enter — the defaults will work.[/dim]" + ) self.console.print() try: @@ -1351,6 +1403,17 @@ def run(self): def main(): """Main entry point""" + # Load catalog for dynamic argparse choices + try: + _catalog = load_stt_provider_catalog() + _transcription_choices = [p["id"] for p in _catalog] + ["none"] + _streaming_choices = [ + p["id"] for p in _catalog if p.get("capabilities", {}).get("streaming") + ] + except Exception: + _transcription_choices = None # No restriction if catalog unavailable + _streaming_choices = None + parser = argparse.ArgumentParser(description="Chronicle Advanced Backend Setup") parser.add_argument( "--speaker-service-url", @@ -1361,7 +1424,7 @@ def main(): ) parser.add_argument( "--transcription-provider", - choices=["deepgram", "parakeet", "vibevoice", "qwen3-asr", "smallest", "none"], + choices=_transcription_choices, help="Transcription provider (default: prompt user)", ) parser.add_argument( @@ -1383,7 +1446,9 @@ def main(): action="store_true", help="Enable Knowledge Graph entity extraction (default: prompt user)", ) - parser.add_argument("--neo4j-password", help="Neo4j password (default: prompt user)") + parser.add_argument( + "--neo4j-password", help="Neo4j password (default: prompt user)" + ) parser.add_argument( "--ts-authkey", help="Tailscale auth key for Docker integration (default: prompt user)", @@ -1402,7 +1467,7 @@ def main(): ) parser.add_argument( "--streaming-provider", - choices=["deepgram", "smallest", "qwen3-asr"], + choices=_streaming_choices, help="Streaming provider when different from batch (enables batch re-transcription)", ) parser.add_argument( diff --git a/config/stt-providers-catalog.yml b/config/stt-providers-catalog.yml new file mode 100644 index 00000000..4a405130 --- /dev/null +++ b/config/stt-providers-catalog.yml @@ -0,0 +1,166 @@ +# config/stt-providers-catalog.yml +# Catalog of STT providers for the Chronicle setup wizard. +# +# Adding a new STT provider requires: +# 1. Add a model definition to config/defaults.yml (for runtime) +# 2. Add a provider stanza here (for wizard UI and setup) +# No Python code changes needed in wizard.py or init.py. +# +# Fields: +# id: Unique identifier used in CLI args and config +# display_name: Human-readable name +# wizard_description: Shown in wizard.py's numbered menu +# description: Shown in init.py's interactive menu (platform-agnostic) +# description_macos: Shown in init.py on macOS (overrides description) +# description_linux: Shown in init.py on Linux (overrides description) +# type: "cloud" or "local" +# capabilities.batch: Supports batch transcription +# capabilities.streaming: Supports real-time streaming +# defaults.batch: config.yml defaults.stt value for this provider +# defaults.stream: config.yml defaults.stt_stream value (null if batch-only) +# local_asr_service: Extra service to auto-add (null for cloud providers) +# local_asr_provider: asr-services --provider value (null for cloud providers) +# local_asr_provider_strixhalo: AMD Strix Halo variant (same as local_asr_provider if no variant) +# env_vars: List of env vars to configure +# name: Environment variable name +# prompt: Prompt text (null = derived, skip in setup_transcription) +# description: Optional info line printed before the prompt +# required: If true and empty, skip config_defaults update (cloud providers only) +# type: "secret" | "url" | "url_strip_http" | "derived" +# default: Default value for the prompt +# placeholders: List of values treated as "not set" +# notes: Warning messages printed after configuration +# setup_notes: Info messages printed after provider selection, before prompts + +providers: + - id: deepgram + display_name: "Deepgram" + wizard_description: "Deepgram (cloud, streaming + batch)" + description: "Deepgram (recommended - high quality, cloud-based)" + type: cloud + capabilities: + batch: true + streaming: true + defaults: + batch: stt-deepgram + stream: stt-deepgram-stream + local_asr_service: null + local_asr_provider: null + local_asr_provider_strixhalo: null + env_vars: + - name: DEEPGRAM_API_KEY + prompt: "Deepgram API key (leave empty to skip)" + description: "Get your API key from: https://console.deepgram.com/" + required: true + type: secret + placeholders: + - "your_deepgram_api_key_here" + - "your-deepgram-key-here" + notes: [] + setup_notes: [] + + - id: parakeet + display_name: "Parakeet ASR" + wizard_description: "Parakeet ASR (offline, batch only, GPU)" + description_macos: "Offline (Parakeet ASR - CPU-based, runs locally)" + description_linux: "Offline (Parakeet ASR - GPU recommended, runs locally)" + type: local + capabilities: + batch: true + streaming: false + defaults: + batch: stt-parakeet-batch + stream: null + local_asr_service: asr-services + local_asr_provider: nemo + local_asr_provider_strixhalo: nemo-strixhalo + env_vars: + - name: PARAKEET_ASR_URL + prompt: "Parakeet ASR URL" + required: false + type: url + default: "http://host.docker.internal:8767" + notes: + - "Remember to start Parakeet: cd ../../extras/asr-services && docker compose up nemo-asr" + setup_notes: [] + + - id: vibevoice + display_name: "VibeVoice ASR" + wizard_description: "VibeVoice ASR (offline, batch only, built-in diarization, GPU)" + description_macos: "Offline (VibeVoice - CPU-based, built-in diarization)" + description_linux: "Offline (VibeVoice - GPU recommended, built-in diarization)" + type: local + capabilities: + batch: true + streaming: false + defaults: + batch: stt-vibevoice + stream: null + local_asr_service: asr-services + local_asr_provider: vibevoice + local_asr_provider_strixhalo: vibevoice-strixhalo + env_vars: + - name: VIBEVOICE_ASR_URL + prompt: "VibeVoice ASR URL" + required: false + type: url + default: "http://host.docker.internal:8767" + notes: + - "Remember to start VibeVoice: cd ../../extras/asr-services && docker compose up vibevoice-asr" + setup_notes: + - "VibeVoice provides built-in speaker diarization - pyannote will be skipped" + + - id: qwen3-asr + display_name: "Qwen3-ASR" + wizard_description: "Qwen3-ASR (offline, streaming + batch, 52 languages, GPU)" + description: "Offline (Qwen3-ASR - GPU required, 52 languages, streaming + batch)" + type: local + capabilities: + batch: true + streaming: true + defaults: + batch: stt-qwen3-asr + stream: stt-qwen3-asr-stream + local_asr_service: asr-services + local_asr_provider: qwen3-asr + local_asr_provider_strixhalo: qwen3-asr + env_vars: + - name: QWEN3_ASR_URL + prompt: "Qwen3-ASR URL" + required: false + type: url_strip_http + default: "http://host.docker.internal:8767" + - name: QWEN3_ASR_STREAM_URL + prompt: null + required: false + type: derived + notes: + - "Remember to start Qwen3-ASR: cd ../../extras/asr-services && docker compose up qwen3-asr-wrapper qwen3-asr-bridge -d" + setup_notes: + - "Qwen3-ASR selected (52 languages, streaming + batch via vLLM)" + + - id: smallest + display_name: "Smallest.ai Pulse" + wizard_description: "Smallest.ai Pulse (cloud, streaming + batch)" + description: "Smallest.ai Pulse (cloud-based, fast, requires API key)" + type: cloud + capabilities: + batch: true + streaming: true + defaults: + batch: stt-smallest + stream: stt-smallest-stream + local_asr_service: null + local_asr_provider: null + local_asr_provider_strixhalo: null + env_vars: + - name: SMALLEST_API_KEY + prompt: "Smallest.ai API key (leave empty to skip)" + description: "Get your API key from: https://smallest.ai/" + required: true + type: secret + placeholders: + - "your_smallest_api_key_here" + - "your-smallest-key-here" + notes: [] + setup_notes: [] diff --git a/scripts/pull-images.sh b/scripts/pull-images.sh index caf86bbe..5e759c20 100755 --- a/scripts/pull-images.sh +++ b/scripts/pull-images.sh @@ -54,6 +54,7 @@ IMAGES=( "chronicle-asr-vibevoice:chronicle-asr-vibevoice" "chronicle-asr-vibevoice-strixhalo:chronicle-asr-vibevoice-strixhalo" "chronicle-asr-transformers:chronicle-asr-transformers" + "chronicle-asr-qwen3-vllm:chronicle-asr-qwen3-vllm" "chronicle-asr-qwen3-wrapper:chronicle-asr-qwen3-wrapper" "chronicle-asr-qwen3-bridge:chronicle-asr-qwen3-bridge" "chronicle-havpe-relay:chronicle-havpe-relay" diff --git a/setup_utils.py b/setup_utils.py index ac9b4036..4b0f4c50 100644 --- a/setup_utils.py +++ b/setup_utils.py @@ -463,6 +463,56 @@ def generate_self_signed_certs(server_address: str, certs_dir: str) -> bool: return False +def load_stt_provider_catalog(catalog_path=None): + """Load stt-providers-catalog.yml and return the list of provider dicts. + + Args: + catalog_path: Path to the catalog YAML file. Defaults to + /config/stt-providers-catalog.yml. + + Returns: + List of provider dicts as defined in the catalog. + """ + import yaml + + if catalog_path is None: + catalog_path = Path(__file__).parent / "config" / "stt-providers-catalog.yml" + with open(catalog_path, "r") as f: + data = yaml.safe_load(f) + return data.get("providers", []) + + +def get_stt_provider_by_id(provider_id, catalog): + """Find a provider dict by its id field. + + Args: + provider_id: The provider id string (e.g. "deepgram", "parakeet"). + catalog: List of provider dicts from load_stt_provider_catalog(). + + Returns: + The matching provider dict, or None if not found. + """ + for p in catalog: + if p["id"] == provider_id: + return p + return None + + +def read_config_yml() -> dict: + """Read config/config.yml relative to CWD and return parsed YAML as dict. + + Returns an empty dict if the file does not exist or is empty. + """ + import yaml + + config_path = Path("config") / "config.yml" + if not config_path.exists(): + return {} + with open(config_path) as f: + data = yaml.safe_load(f) + return data or {} + + def detect_cuda_version(default: str = "cu126") -> str: """ Detect system CUDA version from nvidia-smi output. diff --git a/tests/unit/test_wizard_strixhalo.py b/tests/unit/test_wizard_strixhalo.py index 9be615c5..d2a84049 100644 --- a/tests/unit/test_wizard_strixhalo.py +++ b/tests/unit/test_wizard_strixhalo.py @@ -34,16 +34,38 @@ def ask(*args, **kwargs): def _install_setup_utils_stub(): + # Local import: captured in _load_catalog's closure; keeps stub self-contained. + import yaml as _yaml + + _catalog_path = ( + Path(__file__).resolve().parents[2] / "config" / "stt-providers-catalog.yml" + ) + + def _load_catalog(catalog_path=None): + p = catalog_path or _catalog_path + with open(p) as f: + data = _yaml.safe_load(f) + return data.get("providers", []) + + def _get_provider_by_id(provider_id, catalog): + for p in catalog: + if p["id"] == provider_id: + return p + return None + setup_utils_mod = types.ModuleType("setup_utils") setup_utils_mod.detect_tailscale_info = lambda: (None, None) setup_utils_mod.generate_self_signed_certs = lambda *_: True setup_utils_mod.generate_tailscale_certs = lambda *_: False + setup_utils_mod.get_stt_provider_by_id = _get_provider_by_id setup_utils_mod.is_placeholder = lambda value, *placeholders: value in placeholders + setup_utils_mod.load_stt_provider_catalog = _load_catalog setup_utils_mod.mask_value = lambda value, *_: value setup_utils_mod.prompt_password = lambda *_, **__: "" setup_utils_mod.prompt_with_existing_masked = ( lambda *_, existing_value=None, default="", **__: existing_value or default ) + setup_utils_mod.read_config_yml = lambda: {} setup_utils_mod.read_env_value = lambda *_: None sys.modules.setdefault("setup_utils", setup_utils_mod) diff --git a/wizard.py b/wizard.py index 5347e9b0..04c4f100 100755 --- a/wizard.py +++ b/wizard.py @@ -18,41 +18,53 @@ detect_tailscale_info, generate_self_signed_certs, generate_tailscale_certs, + get_stt_provider_by_id, is_placeholder, + load_stt_provider_catalog, mask_value, prompt_password, prompt_with_existing_masked, + read_config_yml, read_env_value, ) +# Catalog cache — loaded once per process +_catalog_cache = None + + +def _get_catalog(): + """Return the STT provider catalog, loading from disk on first call.""" + global _catalog_cache + if _catalog_cache is None: + _catalog_cache = load_stt_provider_catalog() + return _catalog_cache + + console = Console() def get_existing_stt_provider(config_yml: dict): - """Map config.yml defaults.stt value back to wizard provider name, or None.""" + """Map config.yml defaults.stt value back to wizard provider id, or None.""" stt = config_yml.get("defaults", {}).get("stt", "") - mapping = { - "stt-deepgram": "deepgram", - "stt-deepgram-stream": "deepgram", - "stt-parakeet-batch": "parakeet", - "stt-vibevoice": "vibevoice", - "stt-qwen3-asr": "qwen3-asr", - "stt-smallest": "smallest", - "stt-smallest-stream": "smallest", - } - return mapping.get(stt) + if not stt: + return None + for p in _get_catalog(): + defaults = p.get("defaults", {}) + if defaults.get("batch") == stt or defaults.get("stream") == stt: + return p["id"] + return None def get_existing_stream_provider(config_yml: dict): - """Map config.yml defaults.stt_stream value back to wizard streaming provider name, or None.""" + """Map config.yml defaults.stt_stream value back to wizard streaming provider id, or None.""" stt_stream = config_yml.get("defaults", {}).get("stt_stream", "") - mapping = { - "stt-deepgram-stream": "deepgram", - "stt-smallest-stream": "smallest", - "stt-qwen3-asr": "qwen3-asr", - "stt-qwen3-asr-stream": "qwen3-asr", - } - return mapping.get(stt_stream) + if not stt_stream: + return None + for p in _get_catalog(): + defaults = p.get("defaults", {}) + if defaults.get("stream") == stt_stream or defaults.get("batch") == stt_stream: + return p["id"] + return None SERVICES = { @@ -195,8 +207,12 @@ def select_services(transcription_provider=None, config_yml=None, memory_provide selected.append("advanced") # Services that will be auto-added based on transcription provider choice + catalog = _get_catalog() + local_asr_ids = { + p["id"] for p in catalog if p.get("local_asr_service") == "asr-services" + } auto_added = set() - if transcription_provider in ("parakeet", "vibevoice", "qwen3-asr"): + if transcription_provider in local_asr_ids: auto_added.add("asr-services") # Optional extras @@ -204,11 +220,12 @@ def select_services(transcription_provider=None, config_yml=None, memory_provide for service_name, service_config in SERVICES["extras"].items(): # Skip services that will be auto-added based on earlier choices if service_name in auto_added: - provider_label = { - "vibevoice": "VibeVoice", - "parakeet": "Parakeet", - "qwen3-asr": "Qwen3-ASR", - }.get(transcription_provider, transcription_provider) + provider_entry = get_stt_provider_by_id(transcription_provider, catalog) + provider_label = ( + provider_entry["display_name"] + if provider_entry + else transcription_provider + ) console.print( f" ✅ {service_config['description']} ({provider_label}) [dim](auto-selected)[/dim]" ) @@ -405,21 +422,18 @@ def run_service_setup( # For asr-services, pass provider from wizard's transcription choice and reuse CUDA version if service_name == "asr-services": - # Map wizard transcription provider to asr-services provider name - if hardware_profile == "strixhalo": - wizard_to_asr_provider = { - "vibevoice": "vibevoice-strixhalo", - "parakeet": "nemo-strixhalo", - "qwen3-asr": "qwen3-asr", - } - else: - wizard_to_asr_provider = { - "vibevoice": "vibevoice", - "parakeet": "nemo", - "qwen3-asr": "qwen3-asr", - } - asr_provider = wizard_to_asr_provider.get(transcription_provider) - if asr_provider: + # Derive ASR provider name from catalog + provider_entry = get_stt_provider_by_id( + transcription_provider, _get_catalog() + ) + if provider_entry and provider_entry.get("local_asr_provider"): + if hardware_profile == "strixhalo": + asr_provider = ( + provider_entry.get("local_asr_provider_strixhalo") + or provider_entry["local_asr_provider"] + ) + else: + asr_provider = provider_entry["local_asr_provider"] cmd.extend(["--provider", asr_provider]) console.print( f"[blue][INFO][/blue] Pre-selecting ASR provider: {asr_provider} (from wizard choice: {transcription_provider})" @@ -793,24 +807,17 @@ def setup_hf_token_if_needed(selected_services): return None -# Providers that support real-time streaming -STREAMING_CAPABLE = {"deepgram", "smallest", "qwen3-asr"} - - def select_transcription_provider(config_yml: dict = None): """Ask user which transcription provider they want (batch/primary).""" config_yml = config_yml or {} existing_provider = get_existing_stt_provider(config_yml) + catalog = _get_catalog() - provider_to_choice = { - "deepgram": "1", - "parakeet": "2", - "vibevoice": "3", - "qwen3-asr": "4", - "smallest": "5", - "none": "6", - } - choice_to_provider = {v: k for k, v in provider_to_choice.items()} + # Build numbered menu from catalog + choice_to_provider = {str(i + 1): p["id"] for i, p in enumerate(catalog)} + skip_key = str(len(catalog) + 1) + choice_to_provider[skip_key] = "none" + provider_to_choice = {v: k for k, v in choice_to_provider.items()} default_choice = provider_to_choice.get(existing_provider, "1") console.print("\n🎤 [bold cyan]Transcription Provider[/bold cyan]") @@ -821,26 +828,15 @@ def select_transcription_provider(config_yml: dict = None): "[dim]If it also supports streaming, it will be used for real-time too by default.[/dim]" ) if existing_provider: - provider_labels = { - "deepgram": "Deepgram", - "parakeet": "Parakeet ASR", - "vibevoice": "VibeVoice ASR", - "qwen3-asr": "Qwen3-ASR", - "smallest": "Smallest.ai Pulse", - } - console.print( - f"[blue][INFO][/blue] Current: {provider_labels.get(existing_provider, existing_provider)}" + existing_entry = get_stt_provider_by_id(existing_provider, catalog) + existing_label = ( + existing_entry["display_name"] if existing_entry else existing_provider ) + console.print(f"[blue][INFO][/blue] Current: {existing_label}") console.print() - choices = { - "1": "Deepgram (cloud, streaming + batch)", - "2": "Parakeet ASR (offline, batch only, GPU)", - "3": "VibeVoice ASR (offline, batch only, built-in diarization, GPU)", - "4": "Qwen3-ASR (offline, streaming + batch, 52 languages, GPU)", - "5": "Smallest.ai Pulse (cloud, streaming + batch)", - "6": "None (skip transcription setup)", - } + choices = {str(i + 1): p["wizard_description"] for i, p in enumerate(catalog)} + choices[skip_key] = "None (skip transcription setup)" for key, desc in choices.items(): marker = " [dim](current)[/dim]" if key == default_choice else "" @@ -873,11 +869,16 @@ def select_streaming_provider(batch_provider, config_yml: dict = None): if batch_provider in ("none", None): return None + catalog = _get_catalog() existing_stream = get_existing_stream_provider(config_yml) - if batch_provider in STREAMING_CAPABLE: + # Derive streaming-capable set from catalog + streaming_capable = { + p["id"] for p in catalog if p.get("capabilities", {}).get("streaming") + } + + if batch_provider in streaming_capable: # Batch provider can already stream — just confirm - # Default to "use different" if a different streaming provider was previously configured has_different_stream = bool( existing_stream and existing_stream != batch_provider ) @@ -899,19 +900,14 @@ def select_streaming_provider(batch_provider, config_yml: dict = None): f"{batch_provider} is batch-only. Pick a streaming provider for real-time transcription:" ) - # Show streaming-capable providers (excluding the batch provider) + # Build menu from streaming-capable providers (excluding the batch provider) streaming_choices = {} provider_map = {} idx = 1 - - for name, desc in [ - ("deepgram", "Deepgram (cloud, streaming)"), - ("smallest", "Smallest.ai Pulse (cloud, streaming)"), - ("qwen3-asr", "Qwen3-ASR (offline, streaming)"), - ]: - if name != batch_provider: - streaming_choices[str(idx)] = desc - provider_map[str(idx)] = name + for p in catalog: + if p.get("capabilities", {}).get("streaming") and p["id"] != batch_provider: + streaming_choices[str(idx)] = f"{p['wizard_description']}" + provider_map[str(idx)] = p["id"] idx += 1 skip_key = str(idx) @@ -1044,7 +1040,13 @@ def select_hardware_profile( Returns: "strixhalo" for AMD Strix Halo profile, otherwise None. """ - strix_capable_providers = {"parakeet", "vibevoice"} + catalog = _get_catalog() + strix_capable_providers = { + p["id"] + for p in catalog + if p.get("local_asr_provider_strixhalo") + and p["local_asr_provider_strixhalo"] != p.get("local_asr_provider") + } needs_hardware_choice = ( "speaker-recognition" in selected_services or transcription_provider in strix_capable_providers @@ -1236,7 +1238,9 @@ def main(): ) # Auto-add asr-services if any local ASR was chosen (batch or streaming) - local_asr_providers = ("parakeet", "vibevoice", "qwen3-asr") + local_asr_providers = { + p["id"] for p in _get_catalog() if p.get("local_asr_service") == "asr-services" + } needs_asr = transcription_provider in local_asr_providers or ( streaming_provider and streaming_provider in local_asr_providers )