From 77c0002c5bbf3a61166c1aa7f7ebf00eaf0489c7 Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Tue, 24 Feb 2026 19:17:17 +0000 Subject: [PATCH 1/4] Enhance user experience and configuration management in setup process - Updated `wizard.py` to prompt for Neo4j password with improved handling, allowing users to remember previous values on re-runs. - Enhanced `app.json` to include camera permissions for QR code scanning, improving user interaction during backend connection setup. - Added new dependencies for `expo-camera` and `expo-image-picker` in `package.json` to support camera functionalities. - Introduced a new `QRScanner` component for scanning QR codes, integrated into the `BackendStatus` component for seamless user experience. - Implemented utility functions for URL conversion to support WebSocket connections, enhancing backend integration capabilities. - Improved error handling and logging in various components to provide clearer feedback during setup and operation. --- app/app.json | 13 +- app/package-lock.json | 81 +- app/package.json | 4 +- app/src/components/BackendStatus.tsx | 36 +- app/src/components/QRScanner.tsx | 225 +++++ app/src/utils/urlConversion.ts | 46 + backends/advanced/init.py | 8 +- .../observability/otel_setup.py | 28 +- .../advanced_omi_backend/plugins/router.py | 42 + .../routers/modules/obsidian_routes.py | 100 +- .../memory/providers/llm_providers.py | 133 ++- .../services/obsidian_service.py | 151 ++- .../services/transcription/__init__.py | 71 +- .../utils/text_chunking.py | 188 ++++ .../workers/conversation_jobs.py | 859 ++++++++++-------- .../workers/obsidian_jobs.py | 19 +- .../advanced/src/scripts/cleanup_state.py | 418 ++++++--- .../advanced/tests/test_obsidian_service.py | 157 ++-- backends/advanced/tests/test_text_chunking.py | 376 ++++++++ backends/advanced/webui/package-lock.json | 10 + backends/advanced/webui/package.json | 3 +- backends/advanced/webui/src/App.tsx | 8 + .../src/components/PluginSettingsForm.tsx | 6 +- .../webui/src/components/layout/Layout.tsx | 3 +- .../plugins/OrchestrationSection.tsx | 82 +- .../components/plugins/PluginConfigPanel.tsx | 3 +- .../advanced/webui/src/pages/ConnectApp.tsx | 120 +++ .../webui/src/pages/ConversationDetail.tsx | 32 +- backends/advanced/webui/src/pages/Queue.tsx | 23 +- backends/advanced/webui/src/services/api.ts | 16 +- tests/Makefile | 18 +- .../websocket_streaming_tests.robot | 4 +- .../websocket_transcription_e2e_test.robot | 25 +- wizard.py | 12 +- 34 files changed, 2449 insertions(+), 871 deletions(-) create mode 100644 app/src/components/QRScanner.tsx create mode 100644 app/src/utils/urlConversion.ts create mode 100644 backends/advanced/src/advanced_omi_backend/utils/text_chunking.py create mode 100644 backends/advanced/tests/test_text_chunking.py create mode 100644 backends/advanced/webui/src/pages/ConnectApp.tsx diff --git a/app/app.json b/app/app.json index 2315b46b..d2bf04ec 100644 --- a/app/app.json +++ b/app/app.json @@ -19,6 +19,7 @@ "supportsTablet": true, "bundleIdentifier": "com.cupbearer5517.chronicle", "infoPlist": { + "NSCameraUsageDescription": "Chronicle uses the camera to scan QR codes for backend connection setup.", "NSMicrophoneUsageDescription": "Chronicle needs access to your microphone to stream audio to the backend for processing.", "NSAppTransportSecurity": { "NSAllowsArbitraryLoads": true, @@ -40,7 +41,8 @@ "android.permission.FOREGROUND_SERVICE", "android.permission.FOREGROUND_SERVICE_DATA_SYNC", "android.permission.POST_NOTIFICATIONS", - "android.permission.RECORD_AUDIO" + "android.permission.RECORD_AUDIO", + "android.permission.CAMERA" ], "usesCleartextTraffic": true }, @@ -96,6 +98,13 @@ } } ], + [ + "expo-camera", + { + "cameraPermission": "Chronicle uses the camera to scan QR codes for backend connection setup." + } + ], + "expo-image-picker", "./plugins/with-ats" ], "extra": { @@ -104,4 +113,4 @@ } } } -} \ No newline at end of file +} diff --git a/app/package-lock.json b/app/package-lock.json index c76c02ac..c4ceb0c8 100644 --- a/app/package-lock.json +++ b/app/package-lock.json @@ -16,7 +16,9 @@ "deprecated-react-native-prop-types": "^5.0.0", "expo": "~53.0.9", "expo-build-properties": "~0.14.8", + "expo-camera": "~16.1.11", "expo-dev-client": "~5.2.4", + "expo-image-picker": "~16.1.4", "expo-router": "~5.0.6", "expo-status-bar": "~2.2.3", "friend-lite-react-native": "^1.0.2", @@ -80,7 +82,6 @@ "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.28.4.tgz", "integrity": "sha512-2BCOP7TN8M+gVDj7/ht3hsaO/B/n5oDbiAyyvnRlNOs+u1o+JWNYTQrmpuNp1/Wq2gcFrI01JAW+paEKDMx/CA==", "license": "MIT", - "peer": true, "dependencies": { "@babel/code-frame": "^7.27.1", "@babel/generator": "^7.28.3", @@ -2813,7 +2814,6 @@ "integrity": "sha512-Q7UnBqOO/JsWfgmO9qZjrKgMi/0U9ih0FywXXheml8VH1hn/pBXKIeO/BvzA6g5gHIvBZ/6KyhdGoNok1R/ZJw==", "devOptional": true, "license": "MIT", - "peer": true, "dependencies": { "@react-native-community/cli-clean": "20.0.1", "@react-native-community/cli-config": "20.0.1", @@ -3429,7 +3429,6 @@ "resolved": "https://registry.npmjs.org/@react-navigation/native/-/native-7.1.28.tgz", "integrity": "sha512-d1QDn+KNHfHGt3UIwOZvupvdsDdiHYZBEj7+wL2yDVo3tMezamYy60H9s3EnNVE1Ae1ty0trc7F2OKqo/RmsdQ==", "license": "MIT", - "peer": true, "dependencies": { "@react-navigation/core": "^7.14.0", "escape-string-regexp": "^4.0.0", @@ -3625,7 +3624,6 @@ "integrity": "sha512-ixLZ7zG7j1fM0DijL9hDArwhwcCb4vqmePgwtV0GfnkHRSCUEv4LvzarcTdhoqgyMznUx/EhoTUv31CKZzkQlw==", "devOptional": true, "license": "MIT", - "peer": true, "dependencies": { "csstype": "^3.0.2" } @@ -3750,7 +3748,6 @@ "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", "license": "MIT", - "peer": true, "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", @@ -4334,7 +4331,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "caniuse-lite": "^1.0.30001737", "electron-to-chromium": "^1.5.211", @@ -5290,7 +5286,6 @@ "resolved": "https://registry.npmjs.org/expo/-/expo-53.0.22.tgz", "integrity": "sha512-sJ2I4W/e5iiM4u/wYCe3qmW4D7WPCRqByPDD0hJcdYNdjc9HFFFdO4OAudZVyC/MmtoWZEIH5kTJP1cw9FjzYA==", "license": "MIT", - "peer": true, "dependencies": { "@babel/runtime": "^7.20.0", "@expo/cli": "0.24.21", @@ -5374,12 +5369,31 @@ "node": ">=10" } }, + "node_modules/expo-camera": { + "version": "16.1.11", + "resolved": "https://registry.npmjs.org/expo-camera/-/expo-camera-16.1.11.tgz", + "integrity": "sha512-etA5ZKoC6nPBnWWqiTmlX//zoFZ6cWQCCIdmpUHTGHAKd4qZNCkhPvBWbi8o32pDe57lix1V4+TPFgEcvPwsaA==", + "license": "MIT", + "dependencies": { + "invariant": "^2.2.4" + }, + "peerDependencies": { + "expo": "*", + "react": "*", + "react-native": "*", + "react-native-web": "*" + }, + "peerDependenciesMeta": { + "react-native-web": { + "optional": true + } + } + }, "node_modules/expo-constants": { "version": "17.1.7", "resolved": "https://registry.npmjs.org/expo-constants/-/expo-constants-17.1.7.tgz", "integrity": "sha512-byBjGsJ6T6FrLlhOBxw4EaiMXrZEn/MlUYIj/JAd+FS7ll5X/S4qVRbIimSJtdW47hXMq0zxPfJX6njtA56hHA==", "license": "MIT", - "peer": true, "dependencies": { "@expo/config": "~11.0.12", "@expo/env": "~1.0.7" @@ -5472,7 +5486,6 @@ "resolved": "https://registry.npmjs.org/expo-font/-/expo-font-13.3.2.tgz", "integrity": "sha512-wUlMdpqURmQ/CNKK/+BIHkDA5nGjMqNlYmW0pJFXY/KE/OG80Qcavdu2sHsL4efAIiNGvYdBS10WztuQYU4X0A==", "license": "MIT", - "peer": true, "dependencies": { "fontfaceobserver": "^2.1.0" }, @@ -5481,6 +5494,27 @@ "react": "*" } }, + "node_modules/expo-image-loader": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/expo-image-loader/-/expo-image-loader-5.1.0.tgz", + "integrity": "sha512-sEBx3zDQIODWbB5JwzE7ZL5FJD+DK3LVLWBVJy6VzsqIA6nDEnSFnsnWyCfCTSvbGigMATs1lgkC2nz3Jpve1Q==", + "license": "MIT", + "peerDependencies": { + "expo": "*" + } + }, + "node_modules/expo-image-picker": { + "version": "16.1.4", + "resolved": "https://registry.npmjs.org/expo-image-picker/-/expo-image-picker-16.1.4.tgz", + "integrity": "sha512-bTmmxtw1AohUT+HxEBn2vYwdeOrj1CLpMXKjvi9FKSoSbpcarT4xxI0z7YyGwDGHbrJqyyic3I9TTdP2J2b4YA==", + "license": "MIT", + "dependencies": { + "expo-image-loader": "~5.1.0" + }, + "peerDependencies": { + "expo": "*" + } + }, "node_modules/expo-json-utils": { "version": "0.15.0", "resolved": "https://registry.npmjs.org/expo-json-utils/-/expo-json-utils-0.15.0.tgz", @@ -5502,6 +5536,7 @@ "resolved": "https://registry.npmjs.org/expo-linking/-/expo-linking-8.0.11.tgz", "integrity": "sha512-+VSaNL5om3kOp/SSKO5qe6cFgfSIWnnQDSbA7XLs3ECkYzXRquk5unxNS3pg7eK5kNUmQ4kgLI7MhTggAEUBLA==", "license": "MIT", + "peer": true, "dependencies": { "expo-constants": "~18.0.12", "invariant": "^2.2.4" @@ -5516,6 +5551,7 @@ "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.10.4.tgz", "integrity": "sha512-vG6SvB6oYEhvgisZNFRmRCUkLz11c7rp+tbNTynGqc6mS1d5ATd/sGyV6W0KZZnXRKMTzZDRgQT3Ou9jhpAfUg==", "license": "MIT", + "peer": true, "dependencies": { "@babel/highlight": "^7.10.4" } @@ -5525,6 +5561,7 @@ "resolved": "https://registry.npmjs.org/@expo/config/-/config-12.0.13.tgz", "integrity": "sha512-Cu52arBa4vSaupIWsF0h7F/Cg//N374nYb7HAxV0I4KceKA7x2UXpYaHOL7EEYYvp7tZdThBjvGpVmr8ScIvaQ==", "license": "MIT", + "peer": true, "dependencies": { "@babel/code-frame": "~7.10.4", "@expo/config-plugins": "~54.0.4", @@ -5546,6 +5583,7 @@ "resolved": "https://registry.npmjs.org/@expo/config-plugins/-/config-plugins-54.0.4.tgz", "integrity": "sha512-g2yXGICdoOw5i3LkQSDxl2Q5AlQCrG7oniu0pCPPO+UxGb7He4AFqSvPSy8HpRUj55io17hT62FTjYRD+d6j3Q==", "license": "MIT", + "peer": true, "dependencies": { "@expo/config-types": "^54.0.10", "@expo/json-file": "~10.0.8", @@ -5567,13 +5605,15 @@ "version": "54.0.10", "resolved": "https://registry.npmjs.org/@expo/config-types/-/config-types-54.0.10.tgz", "integrity": "sha512-/J16SC2an1LdtCZ67xhSkGXpALYUVUNyZws7v+PVsFZxClYehDSoKLqyRaGkpHlYrCc08bS0RF5E0JV6g50psA==", - "license": "MIT" + "license": "MIT", + "peer": true }, "node_modules/expo-linking/node_modules/@expo/env": { "version": "2.0.8", "resolved": "https://registry.npmjs.org/@expo/env/-/env-2.0.8.tgz", "integrity": "sha512-5VQD6GT8HIMRaSaB5JFtOXuvfDVU80YtZIuUT/GDhUF782usIXY13Tn3IdDz1Tm/lqA9qnRZQ1BF4t7LlvdJPA==", "license": "MIT", + "peer": true, "dependencies": { "chalk": "^4.0.0", "debug": "^4.3.4", @@ -5587,6 +5627,7 @@ "resolved": "https://registry.npmjs.org/@expo/json-file/-/json-file-10.0.8.tgz", "integrity": "sha512-9LOTh1PgKizD1VXfGQ88LtDH0lRwq9lsTb4aichWTWSWqy3Ugfkhfm3BhzBIkJJfQQ5iJu3m/BoRlEIjoCGcnQ==", "license": "MIT", + "peer": true, "dependencies": { "@babel/code-frame": "~7.10.4", "json5": "^2.2.3" @@ -5597,6 +5638,7 @@ "resolved": "https://registry.npmjs.org/@expo/plist/-/plist-0.4.8.tgz", "integrity": "sha512-pfNtErGGzzRwHP+5+RqswzPDKkZrx+Cli0mzjQaus1ZWFsog5ibL+nVT3NcporW51o8ggnt7x813vtRbPiyOrQ==", "license": "MIT", + "peer": true, "dependencies": { "@xmldom/xmldom": "^0.8.8", "base64-js": "^1.2.3", @@ -5608,6 +5650,7 @@ "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz", "integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==", "license": "MIT", + "peer": true, "engines": { "node": "18 || 20 || >=22" } @@ -5617,6 +5660,7 @@ "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.3.tgz", "integrity": "sha512-fy6KJm2RawA5RcHkLa1z/ScpBeA762UF9KmZQxwIbDtRJrgLzM10depAiEQ+CXYcoiqW1/m96OAAoke2nE9EeA==", "license": "MIT", + "peer": true, "dependencies": { "balanced-match": "^4.0.2" }, @@ -5629,6 +5673,7 @@ "resolved": "https://registry.npmjs.org/commander/-/commander-4.1.1.tgz", "integrity": "sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA==", "license": "MIT", + "peer": true, "engines": { "node": ">= 6" } @@ -5638,6 +5683,7 @@ "resolved": "https://registry.npmjs.org/expo-constants/-/expo-constants-18.0.13.tgz", "integrity": "sha512-FnZn12E1dRYKDHlAdIyNFhBurKTS3F9CrfrBDJI5m3D7U17KBHMQ6JEfYlSj7LG7t+Ulr+IKaj58L1k5gBwTcQ==", "license": "MIT", + "peer": true, "dependencies": { "@expo/config": "~12.0.13", "@expo/env": "~2.0.8" @@ -5652,6 +5698,7 @@ "resolved": "https://registry.npmjs.org/glob/-/glob-13.0.6.tgz", "integrity": "sha512-Wjlyrolmm8uDpm/ogGyXZXb1Z+Ca2B8NbJwqBVg0axK9GbBeoS7yGV6vjXnYdGm6X53iehEuxxbyiKp8QmN4Vw==", "license": "BlueOak-1.0.0", + "peer": true, "dependencies": { "minimatch": "^10.2.2", "minipass": "^7.1.3", @@ -5669,6 +5716,7 @@ "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.2.6.tgz", "integrity": "sha512-ESL2CrkS/2wTPfuend7Zhkzo2u0daGJ/A2VucJOgQ/C48S/zB8MMeMHSGKYpXhIjbPxfuezITkaBH1wqv00DDQ==", "license": "BlueOak-1.0.0", + "peer": true, "engines": { "node": "20 || >=22" } @@ -5678,6 +5726,7 @@ "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.2.tgz", "integrity": "sha512-+G4CpNBxa5MprY+04MbgOw1v7So6n5JY166pFi9KfYwT78fxScCeSNQSNzp6dpPSW2rONOps6Ocam1wFhCgoVw==", "license": "BlueOak-1.0.0", + "peer": true, "dependencies": { "brace-expansion": "^5.0.2" }, @@ -5693,6 +5742,7 @@ "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-2.0.2.tgz", "integrity": "sha512-3O/iVVsJAPsOnpwWIeD+d6z/7PmqApyQePUtCndjatj/9I5LylHvt5qluFaBT3I5h3r1ejfR056c+FCv+NnNXg==", "license": "BlueOak-1.0.0", + "peer": true, "dependencies": { "lru-cache": "^11.0.0", "minipass": "^7.1.2" @@ -5709,6 +5759,7 @@ "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", "license": "ISC", + "peer": true, "bin": { "semver": "bin/semver.js" }, @@ -5721,6 +5772,7 @@ "resolved": "https://registry.npmjs.org/sucrase/-/sucrase-3.35.1.tgz", "integrity": "sha512-DhuTmvZWux4H1UOnWMB3sk0sbaCVOoQZjv8u1rDoTV0HTdGem9hkAZtl4JZy8P2z4Bg0nT+YMeOFyVr4zcG5Tw==", "license": "MIT", + "peer": true, "dependencies": { "@jridgewell/gen-mapping": "^0.3.2", "commander": "^4.0.0", @@ -5968,6 +6020,7 @@ "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==", "license": "MIT", + "peer": true, "engines": { "node": ">=12.0.0" }, @@ -8645,7 +8698,6 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-3.0.1.tgz", "integrity": "sha512-I3EurrIQMlRc9IaAZnqRR044Phh2DXY+55o7uJ0V+hYZAcQYSuFWsc9q5PvyDHUSCe1Qxn/iBz+78s86zWnGag==", "license": "MIT", - "peer": true, "engines": { "node": ">=10" }, @@ -8934,7 +8986,6 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.0.0.tgz", "integrity": "sha512-V8AVnmPIICiWpGfm6GLzCR/W5FXLchHop40W4nXBmdlEceh16rCN8O8LNWm5bh5XUX91fh7KpA+W0TgMKmgTpQ==", "license": "MIT", - "peer": true, "engines": { "node": ">=0.10.0" } @@ -8999,7 +9050,6 @@ "resolved": "https://registry.npmjs.org/react-native/-/react-native-0.79.6.tgz", "integrity": "sha512-kvIWSmf4QPfY41HC25TR285N7Fv0Pyn3DAEK8qRL9dA35usSaxsJkHfw+VqnonqJjXOaoKCEanwudRAJ60TBGA==", "license": "MIT", - "peer": true, "dependencies": { "@jest/create-cache-key-function": "^29.7.0", "@react-native/assets-registry": "0.79.6", @@ -9098,7 +9148,6 @@ "resolved": "https://registry.npmjs.org/react-native-safe-area-context/-/react-native-safe-area-context-5.4.0.tgz", "integrity": "sha512-JaEThVyJcLhA+vU0NU8bZ0a1ih6GiF4faZ+ArZLqpYbL6j7R3caRqj+mE3lEtKCuHgwjLg3bCxLL1GPUJZVqUA==", "license": "MIT", - "peer": true, "peerDependencies": { "react": "*", "react-native": "*" @@ -9109,7 +9158,6 @@ "resolved": "https://registry.npmjs.org/react-native-screens/-/react-native-screens-4.11.1.tgz", "integrity": "sha512-F0zOzRVa3ptZfLpD0J8ROdo+y1fEPw+VBFq1MTY/iyDu08al7qFUO5hLMd+EYMda5VXGaTFCa8q7bOppUszhJw==", "license": "MIT", - "peer": true, "dependencies": { "react-freeze": "^1.0.0", "react-native-is-edge-to-edge": "^1.1.7", @@ -10519,6 +10567,7 @@ "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz", "integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==", "license": "MIT", + "peer": true, "dependencies": { "fdir": "^6.5.0", "picomatch": "^4.0.3" @@ -10535,6 +10584,7 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", + "peer": true, "engines": { "node": ">=12" }, @@ -10613,7 +10663,6 @@ "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", "dev": true, "license": "Apache-2.0", - "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" diff --git a/app/package.json b/app/package.json index 2d59b699..dc260c19 100644 --- a/app/package.json +++ b/app/package.json @@ -30,7 +30,9 @@ "setimmediate": "^1.0.5", "webidl-conversions": "^7.0.0", "react-native-screens": "~4.11.1", - "react-native-safe-area-context": "5.4.0" + "react-native-safe-area-context": "5.4.0", + "expo-camera": "~16.1.11", + "expo-image-picker": "~16.1.4" }, "devDependencies": { "@babel/core": "^7.20.0", diff --git a/app/src/components/BackendStatus.tsx b/app/src/components/BackendStatus.tsx index f69f7a57..e0eec8d5 100644 --- a/app/src/components/BackendStatus.tsx +++ b/app/src/components/BackendStatus.tsx @@ -1,6 +1,8 @@ import React, { useState, useEffect } from 'react'; import { View, Text, TextInput, TouchableOpacity, StyleSheet, Alert, ActivityIndicator } from 'react-native'; import { useTheme, ThemeColors } from '../theme'; +import { QRScanner } from './QRScanner'; +import { httpUrlToWebSocketUrl } from '../utils/urlConversion'; interface BackendStatusProps { backendUrl: string; @@ -26,6 +28,7 @@ export const BackendStatus: React.FC = ({ status: 'unknown', message: 'Not checked', }); + const [showQRScanner, setShowQRScanner] = useState(false); const checkBackendHealth = async (showAlert: boolean = false) => { if (!backendUrl.trim()) { @@ -130,6 +133,13 @@ export const BackendStatus: React.FC = ({ )} + setShowQRScanner(true)} + > + Scan QR Code + + checkBackendHealth(true)} @@ -139,8 +149,17 @@ export const BackendStatus: React.FC = ({ - Enter the WebSocket URL of your backend server. Status is automatically checked. + Enter the WebSocket URL or scan a QR code from the Chronicle dashboard. + + { + const wsUrl = httpUrlToWebSocketUrl(httpUrl); + onBackendUrlChange(wsUrl); + }} + onClose={() => setShowQRScanner(false)} + /> ); }; @@ -215,6 +234,21 @@ const createStyles = (colors: ThemeColors) => StyleSheet.create({ textAlign: 'center', fontStyle: 'italic', }, + qrButton: { + backgroundColor: colors.card, + paddingVertical: 12, + paddingHorizontal: 20, + borderRadius: 8, + alignItems: 'center', + marginBottom: 10, + borderWidth: 1, + borderColor: colors.primary, + }, + qrButtonText: { + color: colors.primary, + fontSize: 16, + fontWeight: '600', + }, button: { backgroundColor: colors.primary, paddingVertical: 12, diff --git a/app/src/components/QRScanner.tsx b/app/src/components/QRScanner.tsx new file mode 100644 index 00000000..87bedd2d --- /dev/null +++ b/app/src/components/QRScanner.tsx @@ -0,0 +1,225 @@ +import React, { useState, useEffect } from 'react'; +import { + View, + Text, + TouchableOpacity, + StyleSheet, + Modal, + Alert, +} from 'react-native'; +import { CameraView, useCameraPermissions, scanFromURLAsync } from 'expo-camera'; +import * as ImagePicker from 'expo-image-picker'; +import { isValidBackendUrl } from '../utils/urlConversion'; +import { useTheme, ThemeColors } from '../theme'; + +interface QRScannerProps { + visible: boolean; + onScanned: (url: string) => void; + onClose: () => void; +} + +export const QRScanner: React.FC = ({ visible, onScanned, onClose }) => { + const { colors } = useTheme(); + const s = createStyles(colors); + const [permission, requestPermission] = useCameraPermissions(); + const [scanned, setScanned] = useState(false); + + useEffect(() => { + if (visible) { + setScanned(false); + } + }, [visible]); + + const handleBarCodeScanned = ({ data }: { data: string }) => { + if (scanned) return; + setScanned(true); + + if (isValidBackendUrl(data)) { + onScanned(data); + onClose(); + } else { + Alert.alert( + 'Invalid QR Code', + 'The scanned QR code does not contain a valid backend URL. Please scan the QR code from the Chronicle dashboard.', + [{ text: 'Try Again', onPress: () => setScanned(false) }] + ); + } + }; + + const handlePickFromGallery = async () => { + try { + const result = await ImagePicker.launchImageLibraryAsync({ + mediaTypes: ['images'], + quality: 1, + }); + + if (result.canceled || !result.assets?.[0]?.uri) return; + + const scanResult = await scanFromURLAsync(result.assets[0].uri, ['qr']); + + if (scanResult.length > 0 && scanResult[0].data) { + handleBarCodeScanned({ data: scanResult[0].data }); + } else { + Alert.alert('No QR Code Found', 'Could not find a QR code in the selected image.'); + } + } catch (error) { + console.log('[QRScanner] Gallery scan error:', error); + Alert.alert('Error', 'Failed to scan QR code from image.'); + } + }; + + const renderContent = () => { + if (!permission) { + return Requesting camera permission...; + } + + if (!permission.granted) { + return ( + + Camera access is needed to scan QR codes. + + Grant Camera Access + + or + + Pick from Gallery + + + ); + } + + return ( + + + + Point at QR code on Chronicle dashboard + + + Pick from Gallery + + + ); + }; + + return ( + + + + Scan QR Code + + Close + + + {renderContent()} + + + ); +}; + +const createStyles = (colors: ThemeColors) => + StyleSheet.create({ + container: { + flex: 1, + backgroundColor: colors.background, + }, + header: { + flexDirection: 'row', + justifyContent: 'space-between', + alignItems: 'center', + paddingHorizontal: 20, + paddingTop: 60, + paddingBottom: 15, + borderBottomWidth: 1, + borderBottomColor: colors.separator, + backgroundColor: colors.card, + }, + headerTitle: { + fontSize: 18, + fontWeight: '600', + color: colors.text, + }, + closeButton: { + padding: 8, + }, + closeButtonText: { + fontSize: 16, + color: colors.primary, + fontWeight: '500', + }, + cameraContainer: { + flex: 1, + alignItems: 'center', + }, + camera: { + flex: 1, + width: '100%', + }, + overlay: { + position: 'absolute', + top: 40, + left: 20, + right: 20, + alignItems: 'center', + }, + overlayText: { + color: '#ffffff', + fontSize: 16, + fontWeight: '500', + textAlign: 'center', + backgroundColor: 'rgba(0,0,0,0.5)', + paddingHorizontal: 16, + paddingVertical: 8, + borderRadius: 8, + overflow: 'hidden', + }, + permissionContainer: { + flex: 1, + justifyContent: 'center', + alignItems: 'center', + padding: 30, + }, + messageText: { + fontSize: 16, + color: colors.textSecondary, + textAlign: 'center', + marginBottom: 20, + }, + permissionButton: { + backgroundColor: colors.primary, + paddingVertical: 12, + paddingHorizontal: 24, + borderRadius: 8, + }, + permissionButtonText: { + color: '#ffffff', + fontSize: 16, + fontWeight: '600', + }, + orText: { + fontSize: 14, + color: colors.textTertiary, + marginVertical: 12, + }, + galleryButton: { + paddingVertical: 12, + paddingHorizontal: 24, + borderRadius: 8, + borderWidth: 1, + borderColor: colors.primary, + marginTop: 12, + marginBottom: 20, + }, + galleryButtonText: { + color: colors.primary, + fontSize: 16, + fontWeight: '500', + textAlign: 'center', + }, + }); + +export default QRScanner; diff --git a/app/src/utils/urlConversion.ts b/app/src/utils/urlConversion.ts new file mode 100644 index 00000000..530e53fc --- /dev/null +++ b/app/src/utils/urlConversion.ts @@ -0,0 +1,46 @@ +/** + * Converts an HTTP(S) URL to the corresponding WebSocket URL for the Chronicle backend. + * + * Examples: + * https://100.64.1.5 → wss://100.64.1.5/ws + * http://localhost:8000 → ws://localhost:8000/ws + * https://my.server.com → wss://my.server.com/ws + */ +export function httpUrlToWebSocketUrl(httpUrl: string): string { + let url = httpUrl.trim().replace(/\/+$/, '') + + if (url.startsWith('https://')) { + url = 'wss://' + url.slice('https://'.length) + } else if (url.startsWith('http://')) { + url = 'ws://' + url.slice('http://'.length) + } else { + // If no scheme, assume wss + url = 'wss://' + url + } + + // Append /ws if not already present + if (!url.endsWith('/ws')) { + url += '/ws' + } + + return url +} + +/** + * Validates that a scanned string looks like a valid HTTP(S) backend URL. + */ +export function isValidBackendUrl(url: string): boolean { + if (!url || typeof url !== 'string') return false + + const trimmed = url.trim() + if (!trimmed.startsWith('http://') && !trimmed.startsWith('https://')) { + return false + } + + try { + const parsed = new URL(trimmed) + return parsed.protocol === 'http:' || parsed.protocol === 'https:' + } catch { + return false + } +} diff --git a/backends/advanced/init.py b/backends/advanced/init.py index 375818be..eaf9f92f 100644 --- a/backends/advanced/init.py +++ b/backends/advanced/init.py @@ -837,7 +837,13 @@ def setup_neo4j(self): "Neo4j is used for Knowledge Graph (entity/relationship extraction)" ) self.console.print() - neo4j_password = self.prompt_password("Neo4j password (min 8 chars)") + neo4j_password = self.prompt_with_existing_masked( + "Neo4j password (min 8 chars)", + env_key="NEO4J_PASSWORD", + placeholders=["", "your-neo4j-password"], + is_password=True, + default="neo4jpassword", + ) self.config["NEO4J_HOST"] = "neo4j" self.config["NEO4J_USER"] = "neo4j" diff --git a/backends/advanced/src/advanced_omi_backend/observability/otel_setup.py b/backends/advanced/src/advanced_omi_backend/observability/otel_setup.py index 9c891aeb..488dcb0d 100644 --- a/backends/advanced/src/advanced_omi_backend/observability/otel_setup.py +++ b/backends/advanced/src/advanced_omi_backend/observability/otel_setup.py @@ -69,7 +69,6 @@ def init_otel() -> None: """Initialize OTEL with Galileo exporter and OpenAI instrumentor. Call once at app startup. Safe to call if Galileo is not configured (no-op). - Filters out embedding spans — only LLM (chat completion) calls are exported. """ if not is_galileo_enabled(): logger.info("Galileo not configured, skipping OTEL initialization") @@ -78,41 +77,16 @@ def init_otel() -> None: try: from galileo import otel from openinference.instrumentation.openai import OpenAIInstrumentor - from opentelemetry import context from opentelemetry.sdk import trace as trace_sdk - from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor project = os.getenv("GALILEO_PROJECT", "chronicle") logstream = os.getenv("GALILEO_LOG_STREAM", "default") - class _LLMOnlyProcessor(SpanProcessor): - """Wraps GalileoSpanProcessor, dropping EMBEDDING spans.""" - - def __init__(self, inner: SpanProcessor): - self._inner = inner - - def on_start( - self, span: Span, parent_context: context.Context | None = None - ) -> None: - self._inner.on_start(span, parent_context) - - def on_end(self, span: ReadableSpan) -> None: - kind = span.attributes.get("openinference.span.kind", "") - if kind == "EMBEDDING": - return # drop - self._inner.on_end(span) - - def shutdown(self) -> None: - self._inner.shutdown() - - def force_flush(self, timeout_millis: int = 30000) -> bool: - return self._inner.force_flush(timeout_millis) - tracer_provider = trace_sdk.TracerProvider() galileo_processor = otel.GalileoSpanProcessor( project=project, logstream=logstream ) - tracer_provider.add_span_processor(_LLMOnlyProcessor(galileo_processor)) + tracer_provider.add_span_processor(galileo_processor) # Auto-instrument all OpenAI SDK calls OpenAIInstrumentor().instrument(tracer_provider=tracer_provider) diff --git a/backends/advanced/src/advanced_omi_backend/plugins/router.py b/backends/advanced/src/advanced_omi_backend/plugins/router.py index e06d3043..970ae9e7 100644 --- a/backends/advanced/src/advanced_omi_backend/plugins/router.py +++ b/backends/advanced/src/advanced_omi_backend/plugins/router.py @@ -550,6 +550,48 @@ async def check_connectivity(self) -> Dict[str, Dict[str, Any]]: return results + def get_asr_keywords(self) -> list[str]: + """Collect all wake words and keywords from enabled plugins. + + These are meant to be injected into STT providers as keyword + boosting hints (e.g. Deepgram ``keyterm``, VibeVoice ``context_info``) + so that the transcription engine is more likely to correctly + recognise them. + + Returns: + Deduplicated list of keyword strings. + """ + seen: set[str] = set() + result: list[str] = [] + + for plugin in self.plugins.values(): + if not plugin.enabled: + continue + condition = plugin.condition or {} + condition_type = condition.get("type", "always") + + words: list[str] = [] + if condition_type == "wake_word": + words = condition.get("wake_words", []) + if not words: + w = condition.get("wake_word", "") + if w: + words = [w] + elif condition_type == "keyword_anywhere": + words = condition.get("keywords", []) + if not words: + w = condition.get("keyword", "") + if w: + words = [w] + + for w in words: + normalised = w.strip().lower() + if normalised and normalised not in seen: + seen.add(normalised) + result.append(normalised) + + return result + async def cleanup_all(self): """Clean up all registered plugins""" for plugin_id, plugin in self.plugins.items(): diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/obsidian_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/obsidian_routes.py index b02ed426..d3d6f844 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/obsidian_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/obsidian_routes.py @@ -1,4 +1,3 @@ - import json import logging import os @@ -13,7 +12,7 @@ from advanced_omi_backend.auth import current_active_user, current_superuser from advanced_omi_backend.controllers.queue_controller import default_queue, redis_conn -from advanced_omi_backend.services.obsidian_service import obsidian_service +from advanced_omi_backend.services.obsidian_service import get_obsidian_service from advanced_omi_backend.users import User from advanced_omi_backend.utils.file_utils import ZipExtractionError, extract_zip from advanced_omi_backend.workers.obsidian_jobs import ( @@ -25,23 +24,26 @@ router = APIRouter(prefix="/obsidian", tags=["obsidian"]) + class IngestRequest(BaseModel): vault_path: str + @router.post("/ingest") async def ingest_obsidian_vault( - request: IngestRequest, - current_user: User = Depends(current_active_user) + request: IngestRequest, current_user: User = Depends(current_active_user) ): """ Immediate/synchronous ingestion endpoint (legacy). Not recommended for UI. Prefer the upload_zip + start endpoints to enable progress reporting. """ if not os.path.exists(request.vault_path): - raise HTTPException(status_code=400, detail=f"Path not found: {request.vault_path}") + raise HTTPException( + status_code=400, detail=f"Path not found: {request.vault_path}" + ) try: - result = await obsidian_service.ingest_vault(request.vault_path) + result = await get_obsidian_service().ingest_vault(request.vault_path) return {"message": "Ingestion complete", **result} except Exception as e: logger.error(f"Ingestion failed: {e}") @@ -50,15 +52,16 @@ async def ingest_obsidian_vault( @router.post("/upload_zip") async def upload_obsidian_zip( - file: UploadFile = File(...), - current_user: User = Depends(current_superuser) + file: UploadFile = File(...), current_user: User = Depends(current_superuser) ): """ Upload a zipped Obsidian vault. Returns a job_id that can be started later. Uses upload_files_async pattern from upload_files.py for proper file handling. """ - if not file.filename.lower().endswith('.zip'): - raise HTTPException(status_code=400, detail="Please upload a .zip file of your Obsidian vault") + if not file.filename.lower().endswith(".zip"): + raise HTTPException( + status_code=400, detail="Please upload a .zip file of your Obsidian vault" + ) job_id = str(uuid.uuid4()) base_dir = Path("/app/data/obsidian_jobs") @@ -67,21 +70,23 @@ async def upload_obsidian_zip( job_dir.mkdir(parents=True, exist_ok=True) zip_path = job_dir / "vault.zip" extract_dir = job_dir / "vault" - + # Use upload_files_async pattern for proper file handling with cleanup zip_file_handle = None try: # Read file content file_content = await file.read() - + # Save zip file using proper file handling pattern from upload_files_async try: - zip_file_handle = open(zip_path, 'wb') + zip_file_handle = open(zip_path, "wb") zip_file_handle.write(file_content) except IOError as e: logger.error(f"Error writing zip file {zip_path}: {e}") - raise HTTPException(status_code=500, detail=f"Failed to save uploaded zip: {e}") - + raise HTTPException( + status_code=500, detail=f"Failed to save uploaded zip: {e}" + ) + # Extract zip file using utility function try: extract_zip(zip_path, extract_dir) @@ -90,10 +95,12 @@ async def upload_obsidian_zip( raise HTTPException(status_code=400, detail=f"Invalid zip file: {e}") except ZipExtractionError as e: logger.error(f"Error extracting zip file: {e}") - raise HTTPException(status_code=500, detail=f"Failed to extract zip file: {e}") + raise HTTPException( + status_code=500, detail=f"Failed to extract zip file: {e}" + ) total = count_markdown_files(str(extract_dir)) - + # Store pending job state in Redis pending_state = { "status": "ready", @@ -101,16 +108,20 @@ async def upload_obsidian_zip( "processed": 0, "errors": [], "vault_path": str(extract_dir), - "job_id": job_id + "job_id": job_id, } - redis_conn.set(f"obsidian_pending:{job_id}", json.dumps(pending_state), ex=3600*24) # 24h expiry + redis_conn.set( + f"obsidian_pending:{job_id}", json.dumps(pending_state), ex=3600 * 24 + ) # 24h expiry return {"job_id": job_id, "vault_path": str(extract_dir), "total_files": total} except HTTPException: raise except Exception as e: logger.exception(f"Failed to process uploaded zip: {e}") - raise HTTPException(status_code=500, detail=f"Failed to process uploaded zip: {e}") + raise HTTPException( + status_code=500, detail=f"Failed to process uploaded zip: {e}" + ) finally: # Ensure file handle is closed (following upload_files_async pattern) if zip_file_handle: @@ -123,17 +134,17 @@ async def upload_obsidian_zip( @router.post("/start") async def start_ingestion( job_id: str = Body(..., embed=True), - current_user: User = Depends(current_active_user) + current_user: User = Depends(current_active_user), ): # Check if job is pending pending_key = f"obsidian_pending:{job_id}" pending_data = redis_conn.get(pending_key) - + if pending_data: try: job_data = json.loads(pending_data) vault_path = job_data.get("vault_path") - + # Enqueue to RQ rq_job = default_queue.enqueue( ingest_obsidian_vault_job, @@ -141,27 +152,31 @@ async def start_ingestion( vault_path, # arg2 job_id=job_id, # Set RQ job ID to match our ID description=f"Obsidian ingestion for job {job_id}", - job_timeout=3600 # 1 hour timeout + job_timeout=3600, # 1 hour timeout ) - + # Remove pending key redis_conn.delete(pending_key) - - return {"message": "Ingestion started", "job_id": job_id, "rq_job_id": rq_job.id} + + return { + "message": "Ingestion started", + "job_id": job_id, + "rq_job_id": rq_job.id, + } except Exception as e: logger.exception(f"Failed to start job {job_id}: {e}") raise HTTPException(status_code=500, detail=f"Failed to start job: {e}") - + # Check if already in RQ try: job = Job.fetch(job_id, connection=redis_conn) status = job.get_status() if status in ("queued", "started", "deferred", "scheduled"): - raise HTTPException(status_code=400, detail=f"Job already {status}") - + raise HTTPException(status_code=400, detail=f"Job already {status}") + # If finished/failed, we could potentially restart? But for now let's say it's done. raise HTTPException(status_code=400, detail=f"Job is in state: {status}") - + except NoSuchJobError: raise HTTPException(status_code=404, detail="Job not found") @@ -171,7 +186,7 @@ async def get_status(job_id: str, current_user: User = Depends(current_active_us # 1. Try RQ first try: job = Job.fetch(job_id, connection=redis_conn) - + # Get status status = job.get_status() if status == "started": @@ -181,13 +196,18 @@ async def get_status(job_id: str, current_user: User = Depends(current_active_us meta = job.meta or {} # If meta has status, prefer it (for granular updates) - if "status" in meta and meta["status"] in ("running", "finished", "failed", "canceled"): - status = meta["status"] + if "status" in meta and meta["status"] in ( + "running", + "finished", + "failed", + "canceled", + ): + status = meta["status"] total = meta.get("total_files", 0) processed = meta.get("processed", 0) percent = int((processed / total) * 100) if total else 0 - + return { "job_id": job_id, "status": status, @@ -196,14 +216,14 @@ async def get_status(job_id: str, current_user: User = Depends(current_active_us "percent": percent, "errors": meta.get("errors", []), "vault_path": meta.get("vault_path"), - "rq_job_id": job.id + "rq_job_id": job.id, } - + except NoSuchJobError: # 2. Check pending pending_key = f"obsidian_pending:{job_id}" pending_data = redis_conn.get(pending_key) - + if pending_data: try: job_data = json.loads(pending_data) @@ -214,10 +234,8 @@ async def get_status(job_id: str, current_user: User = Depends(current_active_us "processed": 0, "percent": 0, "errors": [], - "vault_path": job_data.get("vault_path") + "vault_path": job_data.get("vault_path"), } except: raise HTTPException(status_code=500, detail="Failed to get job status") raise HTTPException(status_code=404, detail="Job not found") - - diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py index 2d83d24c..3a81b53e 100644 --- a/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py +++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py @@ -15,8 +15,12 @@ from typing import Any, Dict, List, Optional from advanced_omi_backend.model_registry import ModelDef, get_models_registry -from advanced_omi_backend.openai_factory import create_openai_client, is_langfuse_enabled +from advanced_omi_backend.openai_factory import ( + create_openai_client, + is_langfuse_enabled, +) from advanced_omi_backend.prompt_registry import get_prompt_registry +from advanced_omi_backend.utils.text_chunking import semantic_chunk_text from ..base import LLMProviderBase from ..prompts import ( @@ -77,6 +81,7 @@ async def generate_openai_embeddings( ) return [data.embedding for data in response.data] + # TODO: Re-enable spacy when Docker build is fixed # try: # nlp = spacy.load("en_core_web_sm") @@ -86,6 +91,7 @@ async def generate_openai_embeddings( # nlp = None nlp = None # Temporarily disabled + def chunk_text_with_spacy(text: str, max_tokens: int = 100) -> List[str]: """Split text into chunks using spaCy sentence segmentation. max_tokens is the maximum number of words in a chunk. @@ -93,14 +99,14 @@ def chunk_text_with_spacy(text: str, max_tokens: int = 100) -> List[str]: # Fallback chunking when spacy is not available if nlp is None: # Simple sentence-based chunking - sentences = text.replace('\n', ' ').split('. ') + sentences = text.replace("\n", " ").split(". ") chunks = [] current_chunk = "" current_tokens = 0 - + for sentence in sentences: sentence_tokens = len(sentence.split()) - + if current_tokens + sentence_tokens > max_tokens and current_chunk: chunks.append(current_chunk.strip()) current_chunk = sentence @@ -111,23 +117,23 @@ def chunk_text_with_spacy(text: str, max_tokens: int = 100) -> List[str]: else: current_chunk = sentence current_tokens += sentence_tokens - + if current_chunk.strip(): chunks.append(current_chunk.strip()) - + return chunks if chunks else [text] - + # Original spacy implementation when available doc = nlp(text) - + chunks = [] current_chunk = "" current_tokens = 0 - + for sent in doc.sents: sent_text = sent.text.strip() sent_tokens = len(sent_text.split()) # Simple word count - + if current_tokens + sent_tokens > max_tokens and current_chunk: chunks.append(current_chunk.strip()) current_chunk = sent_text @@ -135,12 +141,13 @@ def chunk_text_with_spacy(text: str, max_tokens: int = 100) -> List[str]: else: current_chunk += " " + sent_text if current_chunk else sent_text current_tokens += sent_tokens - + if current_chunk.strip(): chunks.append(current_chunk.strip()) - + return chunks + class OpenAIProvider(LLMProviderBase): """Config-driven LLM provider using OpenAI SDK (OpenAI-compatible). @@ -153,7 +160,9 @@ def __init__(self, config: Dict[str, Any]): # Ignore provider-specific envs; use registry as single source of truth registry = get_models_registry() if not registry: - raise RuntimeError("config.yml not found or invalid; cannot initialize model registry") + raise RuntimeError( + "config.yml not found or invalid; cannot initialize model registry" + ) self._registry = registry @@ -170,9 +179,15 @@ def __init__(self, config: Dict[str, Any]): self.model = self.llm_def.model_name # Store parameters for embeddings (use separate config if available) - self.embedding_model = (self.embed_def.model_name if self.embed_def else self.llm_def.model_name) - self.embedding_api_key = (self.embed_def.api_key if self.embed_def else self.api_key) - self.embedding_base_url = (self.embed_def.model_url if self.embed_def else self.base_url) + self.embedding_model = ( + self.embed_def.model_name if self.embed_def else self.llm_def.model_name + ) + self.embedding_api_key = ( + self.embed_def.api_key if self.embed_def else self.api_key + ) + self.embedding_base_url = ( + self.embed_def.model_url if self.embed_def else self.base_url + ) # CRITICAL: Validate API keys are present - fail fast instead of hanging if not self.api_key or self.api_key.strip() == "": @@ -182,7 +197,9 @@ def __init__(self, config: Dict[str, Any]): f"Cannot proceed without valid API credentials." ) - if self.embed_def and (not self.embedding_api_key or self.embedding_api_key.strip() == ""): + if self.embed_def and ( + not self.embedding_api_key or self.embedding_api_key.strip() == "" + ): raise RuntimeError( f"API key is missing or empty for embedding provider '{self.embed_def.model_provider}' (model: {self.embedding_model}). " f"Please set the API key in config.yml or environment variables." @@ -192,7 +209,10 @@ def __init__(self, config: Dict[str, Any]): self._client = None async def extract_memories( - self, text: str, prompt: str, user_id: Optional[str] = None, + self, + text: str, + prompt: str, + user_id: Optional[str] = None, langfuse_session_id: Optional[str] = None, ) -> List[str]: """Extract memories using OpenAI API with the enhanced fact retrieval prompt. @@ -219,26 +239,56 @@ async def extract_memories( current_date=datetime.now().strftime("%Y-%m-%d"), ) - # local models can only handle small chunks of input text - text_chunks = chunk_text_with_spacy(text) + # Semantic chunking: split dialogue into turns, then group by topic + async def _embed_for_chunking(texts: List[str]) -> List[List[float]]: + return await generate_openai_embeddings( + texts, + api_key=self.embedding_api_key, + base_url=self.embedding_base_url, + model=self.embedding_model, + ) + + chunking_config = self._registry.memory.get("extraction", {}).get( + "chunking", {} + ) + dialogue_turns = [line for line in text.split("\n") if line.strip()] + text_chunks = await semantic_chunk_text( + text, + embed_fn=_embed_for_chunking, + sentences=dialogue_turns, + join_str="\n", + buffer_size=int(chunking_config.get("buffer_size", 1)), + breakpoint_percentile_threshold=float( + chunking_config.get("breakpoint_percentile_threshold", 90.0) + ), + max_chunk_words=int(chunking_config.get("max_chunk_words", 500)), + ) # Process all chunks in sequence, not concurrently - results = [await self._process_chunk(system_prompt, chunk, i, langfuse_session_id=langfuse_session_id) for i, chunk in enumerate(text_chunks)] - + results = [ + await self._process_chunk( + system_prompt, chunk, i, langfuse_session_id=langfuse_session_id + ) + for i, chunk in enumerate(text_chunks) + ] + # Spread list of list of facts into a single list of facts cleaned_facts = [] for result in results: memory_logger.info(f"Cleaned facts: {result}") cleaned_facts.extend(result) - + return cleaned_facts - + except Exception as e: memory_logger.error(f"OpenAI memory extraction failed: {e}") return [] - + async def _process_chunk( - self, system_prompt: str, chunk: str, index: int, + self, + system_prompt: str, + chunk: str, + index: int, langfuse_session_id: Optional[str] = None, ) -> List[str]: """Process a single text chunk to extract memories using OpenAI API. @@ -312,11 +362,15 @@ async def test_connection(self) -> bool: try: # Add 10-second timeout to prevent hanging on API calls async with asyncio.timeout(10): - client = _get_openai_client(api_key=self.api_key, base_url=self.base_url, is_async=True) + client = _get_openai_client( + api_key=self.api_key, base_url=self.base_url, is_async=True + ) await client.models.list() return True except asyncio.TimeoutError: - memory_logger.error(f"OpenAI connection test timed out after 10s - check network connectivity and API endpoint") + memory_logger.error( + f"OpenAI connection test timed out after 10s - check network connectivity and API endpoint" + ) return False except Exception as e: memory_logger.error(f"OpenAI connection test failed: {e}") @@ -344,11 +398,11 @@ async def propose_memory_actions( # Generate the complete prompt using the helper function memory_logger.debug(f"🧠 Facts passed to prompt builder: {new_facts}") update_memory_messages = build_update_memory_messages( - retrieved_old_memory, - new_facts, - custom_prompt + retrieved_old_memory, new_facts, custom_prompt + ) + memory_logger.debug( + f"🧠 Generated prompt user content: {update_memory_messages[1]['content'][:200]}..." ) - memory_logger.debug(f"🧠 Generated prompt user content: {update_memory_messages[1]['content'][:200]}...") op = self._registry.get_llm_operation("memory_update") client = op.get_client(is_async=True) @@ -374,7 +428,6 @@ async def propose_memory_actions( memory_logger.error(f"OpenAI propose_memory_actions failed: {e}") return {} - async def propose_reprocess_actions( self, existing_memories: List[Dict[str, str]], @@ -466,21 +519,23 @@ async def propose_reprocess_actions( class OllamaProvider(LLMProviderBase): """Ollama LLM provider implementation. - + Provides memory extraction, embedding generation, and memory action proposals using Ollama's GPT and embedding models. - - + + Use the openai provider for ollama with different environment variables - - os.environ["OPENAI_API_KEY"] = "ollama" + + os.environ["OPENAI_API_KEY"] = "ollama" os.environ["OPENAI_BASE_URL"] = "http://localhost:11434/v1" os.environ["QDRANT_BASE_URL"] = "localhost" os.environ["OPENAI_EMBEDDER_MODEL"] = "erwan2/DeepSeek-R1-Distill-Qwen-1.5B:latest" - + """ + pass + def _parse_memories_content(content: str) -> List[str]: """ Parse LLM content to extract memory strings. diff --git a/backends/advanced/src/advanced_omi_backend/services/obsidian_service.py b/backends/advanced/src/advanced_omi_backend/services/obsidian_service.py index b02a6fa0..86943e44 100644 --- a/backends/advanced/src/advanced_omi_backend/services/obsidian_service.py +++ b/backends/advanced/src/advanced_omi_backend/services/obsidian_service.py @@ -16,14 +16,12 @@ import logging import os import re -from pathlib import Path from typing import List, Literal, Optional, TypedDict from advanced_omi_backend.services.memory.config import ( load_config_yml as load_root_config, ) from advanced_omi_backend.services.memory.providers.llm_providers import ( - chunk_text_with_spacy, generate_openai_embeddings, ) from advanced_omi_backend.services.neo4j_client import ( @@ -33,6 +31,7 @@ ) from advanced_omi_backend.utils.config_utils import resolve_value from advanced_omi_backend.utils.model_utils import get_model_config +from advanced_omi_backend.utils.text_chunking import semantic_chunk_text logger = logging.getLogger(__name__) @@ -64,44 +63,11 @@ def __init__(self, stage: Literal["embedding", "database"], message: str): self.stage = stage -def load_env_file(filepath: Path) -> dict[str, str]: - """Load environment variables from a .env file. - - Args: - filepath: Path to the .env file to load. - - Returns: - Dictionary of key-value pairs from the .env file. - """ - env_vars = {} - if filepath.exists(): - with open(filepath, "r") as f: - for line in f: - line = line.strip() - if not line or line.startswith("#"): - continue - if "=" in line: - parts = line.split("=", 1) - key = parts[0].strip() - value = parts[1].strip() if len(parts) > 1 else "" - # Handle quotes - if (value.startswith("'") and value.endswith("'")) or ( - value.startswith('"') and value.endswith('"') - ): - value = value[1:-1] - env_vars[key] = value - return env_vars - - class ObsidianService: """Service for ingesting Obsidian vaults into Neo4j graph database.""" def __init__(self): """Initialize the Obsidian service with configuration from config.yml and environment.""" - # Resolve paths relative to this file - # backends/advanced/src/advanced_omi_backend/services/obsidian_service.py - self.CURRENT_DIR = Path(__file__).parent.resolve() - # Load configuration strictly from standard locations # Prefer /app/config.yml inside containers (mounted by docker-compose) # Fallbacks handled by shared utility @@ -114,46 +80,36 @@ def __init__(self): embed_config = get_model_config(config_data, "embedding") if not embed_config: - raise ValueError("Configuration for 'defaults.embedding' not found in config.yml") - - # Neo4j Connection - Prefer environment variables passed by Docker Compose - neo4j_host = os.getenv("NEO4J_HOST") - # Load .env file as fallback (for local dev or if env vars not set) - candidate_env_files = [ - Path("/app/.env"), - self.CURRENT_DIR.parent.parent.parent.parent - / ".env", # Project root .env file ToDo cleanup needed after k8s is migrated and there is no .env file in the project root. - self.CURRENT_DIR.parent.parent.parent.parent - / "backends" - / "advanced" - / ".env", # repo path - ] - env_data = {} - for p in candidate_env_files: - if p.exists(): - env_data.update(load_env_file(p)) - - # Use env var first, then fallback to .env file - if not neo4j_host: - neo4j_host = env_data.get("NEO4J_HOST") - - if not neo4j_host: - raise KeyError("NEO4J_HOST not found in environment or .env") + raise ValueError( + "Configuration for 'defaults.embedding' not found in config.yml" + ) + # Neo4j Connection - environment variables passed by Docker Compose + neo4j_host = os.getenv("NEO4J_HOST", "neo4j") self.neo4j_uri = f"bolt://{neo4j_host}:7687" - self.neo4j_user = os.getenv("NEO4J_USER") or env_data.get("NEO4J_USER", "neo4j") - self.neo4j_password = os.getenv("NEO4J_PASSWORD") or env_data.get("NEO4J_PASSWORD", "") + self.neo4j_user = os.getenv("NEO4J_USER", "neo4j") + self.neo4j_password = os.getenv("NEO4J_PASSWORD", "password") # Models / API - Loaded strictly from config.yml self.embedding_model = str(resolve_value(embed_config["model_name"])) - self.embedding_dimensions = int(resolve_value(embed_config["embedding_dimensions"])) + self.embedding_dimensions = int( + resolve_value(embed_config["embedding_dimensions"]) + ) self.openai_base_url = str(resolve_value(llm_config["model_url"])) self.openai_api_key = str(resolve_value(llm_config["api_key"])) - # Chunking - uses shared spaCy/text fallback utility - self.chunk_word_limit = 120 + # Semantic chunking configuration (from config.yml with defaults) + obsidian_config = config_data.get("memory", {}).get("obsidian", {}) + chunking_config = obsidian_config.get("chunking", {}) + self.semantic_buffer_size = int(chunking_config.get("buffer_size", 1)) + self.semantic_breakpoint_percentile = float( + chunking_config.get("breakpoint_percentile_threshold", 95.0) + ) + self.max_chunk_words = int(chunking_config.get("max_chunk_words", 300)) - self.neo4j_client = Neo4jClient(self.neo4j_uri, self.neo4j_user, self.neo4j_password) + self.neo4j_client = Neo4jClient( + self.neo4j_uri, self.neo4j_user, self.neo4j_password + ) self.read_interface = Neo4jReadInterface(self.neo4j_client) self.write_interface = Neo4jWriteInterface(self.neo4j_client) @@ -191,7 +147,9 @@ def _clean_text(text: str) -> str: """Normalize whitespace for embedding inputs.""" return re.sub(r"\s+", " ", text).strip() - def parse_obsidian_note(self, root: str, filename: str, vault_path: str) -> NoteData: + def parse_obsidian_note( + self, root: str, filename: str, vault_path: str + ) -> NoteData: """Parse an Obsidian markdown file and extract metadata. Args: @@ -225,14 +183,8 @@ def parse_obsidian_note(self, root: str, filename: str, vault_path: str) -> Note fm_match = re.match(r"^---\s*\n(.*?)\n---\s*\n", raw_text, re.DOTALL) content = raw_text[fm_match.end() :] if fm_match else raw_text - """ - Pattern breakdown: - \[\[ matches [[ - ([^\]|]+) captures the link name (one or more chars except ] or |) - (?:\|[^\]]+)? optionally matches |display text - \]\] matches ]] - Matches: [[note]] and [[note|display text]] - """ + # Pattern: \[\[ matches [[, ([^\]|]+) captures link name, + # (?:\|[^\]]+)? optionally matches |display text, \]\] matches ]] links = re.findall(r"\[\[([^\]|]+)(?:\|[^\]]+)?\]\]", content) tags = re.findall(r"#([a-zA-Z0-9_\-/]+)", content) @@ -247,7 +199,10 @@ def parse_obsidian_note(self, root: str, filename: str, vault_path: str) -> Note } async def chunking_and_embedding(self, note_data: NoteData) -> List[ChunkPayload]: - """Chunk note content and generate embeddings for each chunk. + """Chunk note content semantically and generate embeddings for each chunk. + + Uses embedding-similarity-based semantic chunking to find natural topic + boundaries, then embeds the resulting chunks for vector storage. Args: note_data: Parsed note data to process. @@ -255,9 +210,21 @@ async def chunking_and_embedding(self, note_data: NoteData) -> List[ChunkPayload Returns: List of chunk payloads with text and embedding vectors. """ - text_chunks = chunk_text_with_spacy( + api_key = self.openai_api_key + base_url = self.openai_base_url + model = self.embedding_model + + async def embed_fn(texts: List[str]) -> List[List[float]]: + return await generate_openai_embeddings( + texts, api_key=api_key, base_url=base_url, model=model + ) + + text_chunks = await semantic_chunk_text( note_data["content"], - max_tokens=self.chunk_word_limit, + embed_fn=embed_fn, + buffer_size=self.semantic_buffer_size, + breakpoint_percentile_threshold=self.semantic_breakpoint_percentile, + max_chunk_words=self.max_chunk_words, ) logger.info( f"Processing: {note_data['path']} ({len(note_data['content'])} chars -> {len(text_chunks)} chunks)" @@ -284,7 +251,9 @@ async def chunking_and_embedding(self, note_data: NoteData) -> List[ChunkPayload model=self.embedding_model, ) except Exception as e: - logger.exception(f"Embedding generation failed for {note_data['path']}: {e}") + logger.exception( + f"Embedding generation failed for {note_data['path']}: {e}" + ) return [] chunk_payloads: List[ChunkPayload] = [] @@ -296,7 +265,9 @@ async def chunking_and_embedding(self, note_data: NoteData) -> List[ChunkPayload return chunk_payloads - def ingest_note_and_chunks(self, note_data: NoteData, chunks: List[ChunkPayload]) -> None: + def ingest_note_and_chunks( + self, note_data: NoteData, chunks: List[ChunkPayload] + ) -> None: """Store note and chunks in Neo4j with relationships to folders, tags, and links. Args: @@ -416,15 +387,15 @@ async def search_obsidian(self, query: str, limit: int = 5) -> ObsidianSearchRes cypher_query = """ CALL db.index.vector.queryNodes('chunk_embeddings', $limit, $vector) YIELD node AS chunk, score - + // Find the parent Note MATCH (note:Note)-[:HAS_CHUNK]->(chunk) - + // Get graph context: What tags and linked files are around this note? OPTIONAL MATCH (note)-[:HAS_TAG]->(t:Tag) OPTIONAL MATCH (note)-[:LINKS_TO]->(linked:Note) - - RETURN + + RETURN note.name AS source, chunk.text AS content, collect(DISTINCT t.name) AS tags, @@ -470,15 +441,3 @@ def get_obsidian_service() -> ObsidianService: if _obsidian_service is None: _obsidian_service = ObsidianService() return _obsidian_service - - -# Backward compatibility: module-level access uses lazy initialization -# This property-like access ensures the service is only created when first used -class _ObsidianServiceProxy: - """Proxy for lazy access to obsidian_service.""" - - def __getattr__(self, name): - return getattr(get_obsidian_service(), name) - - -obsidian_service = _ObsidianServiceProxy() diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py b/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py index 804077f4..26516443 100644 --- a/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py +++ b/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py @@ -28,6 +28,47 @@ logger = logging.getLogger(__name__) +def _get_plugin_keywords() -> list[str]: + """Collect ASR keyword hints from all enabled plugins. + + Returns an empty list if the plugin system is not initialised yet. + """ + try: + from advanced_omi_backend.services.plugin_service import get_plugin_router + + router = get_plugin_router() + if router: + return router.get_asr_keywords() + except Exception: + pass + return [] + + +def _merge_hot_words(prompt_hot_words: str, plugin_keywords: list[str]) -> str: + """Merge prompt-registry hot words with plugin keywords (deduplicated).""" + import re + + parts: list[str] = [] + seen: set[str] = set() + + # Parse prompt registry hot words first + if prompt_hot_words and prompt_hot_words.strip(): + for word in re.split(r"[,\n]+", prompt_hot_words): + word = word.strip().lower() + if word and word not in seen: + seen.add(word) + parts.append(word) + + # Add plugin keywords + for kw in plugin_keywords: + kw = kw.strip().lower() + if kw and kw not in seen: + seen.add(kw) + parts.append(kw) + + return "\n".join(parts) if parts else "" + + def _parse_hot_words_to_keyterm(hot_words_str: str) -> str: """Convert hot words string to Deepgram keyterm format. @@ -222,7 +263,8 @@ async def transcribe( if "diarize" in query: query["diarize"] = "true" if diarize else "false" - # Use caller-provided context or fall back to LangFuse prompt store + # Use caller-provided context or fall back to LangFuse prompt store, + # then merge with plugin wake words / keywords for ASR boosting. if context_info: hot_words_str = context_info else: @@ -233,6 +275,8 @@ async def transcribe( except Exception as e: logger.debug(f"Failed to fetch asr.hot_words prompt: {e}") + hot_words_str = _merge_hot_words(hot_words_str, _get_plugin_keywords()) + # For Deepgram: inject as keyterm query param if self.model.model_provider == "deepgram" and hot_words_str.strip(): keyterm = _parse_hot_words_to_keyterm(hot_words_str) @@ -404,17 +448,20 @@ async def start_stream( if diarize and "diarize" in query_dict: query_dict["diarize"] = "true" - # Inject hot words for streaming (Deepgram only) - if self.model.model_provider == "deepgram": - try: - registry = get_prompt_registry() - hot_words_str = await registry.get_prompt("asr.hot_words") - if hot_words_str and hot_words_str.strip(): - keyterm = _parse_hot_words_to_keyterm(hot_words_str) - if keyterm: - query_dict["keyterm"] = keyterm - except Exception as e: - logger.debug(f"Failed to fetch asr.hot_words for streaming: {e}") + # Inject hot words for streaming — merge prompt registry + plugin keywords + prompt_hot_words = "" + try: + registry = get_prompt_registry() + prompt_hot_words = await registry.get_prompt("asr.hot_words") + except Exception as e: + logger.debug(f"Failed to fetch asr.hot_words for streaming: {e}") + + merged_hot_words = _merge_hot_words(prompt_hot_words, _get_plugin_keywords()) + + if self.model.model_provider == "deepgram" and merged_hot_words: + keyterm = _parse_hot_words_to_keyterm(merged_hot_words) + if keyterm: + query_dict["keyterm"] = keyterm # NOTE: PULSE/wave (smallest.ai) does NOT support keywords on WebSocket — # any `keywords` query param causes 0 responses or HTTP 400. diff --git a/backends/advanced/src/advanced_omi_backend/utils/text_chunking.py b/backends/advanced/src/advanced_omi_backend/utils/text_chunking.py new file mode 100644 index 00000000..d2f3ed6e --- /dev/null +++ b/backends/advanced/src/advanced_omi_backend/utils/text_chunking.py @@ -0,0 +1,188 @@ +"""Semantic text chunking using embedding similarity. + +Splits text into semantically coherent chunks by comparing consecutive sentence +embeddings and finding natural topic boundaries. Inspired by LlamaIndex's +SemanticSplitterNodeParser: +https://docs.llamaindex.ai/en/stable/examples/node_parsers/semantic_chunking/ + +The algorithm: +1. Split text into sentences (regex on sentence-ending punctuation) +2. Create "buffered" versions by combining each sentence with its neighbors +3. Batch-embed all buffered sentences in one API call +4. Compute cosine distances between consecutive embeddings +5. Find breakpoints where distance exceeds a percentile threshold +6. Group sentences between breakpoints into chunks +7. Apply a max-word safety valve to prevent oversized chunks +""" + +import logging +import re +from typing import Awaitable, Callable, List, Optional + +import numpy as np + +logger = logging.getLogger(__name__) + + +def split_sentences(text: str) -> List[str]: + """Split text into sentences using regex on .!? boundaries. + + Handles abbreviations and decimal numbers reasonably well by requiring + the punctuation to be followed by whitespace and an uppercase letter or end-of-string. + """ + # Split on sentence-ending punctuation followed by whitespace + parts = re.split(r"(?<=[.!?])\s+", text.strip()) + return [s.strip() for s in parts if s.strip()] + + +def _build_buffered_sentences(sentences: List[str], buffer_size: int = 1) -> List[str]: + """Combine each sentence with its neighbors for richer embedding context. + + For buffer_size=1, sentence i is combined with sentences [i-1, i, i+1]. + """ + buffered = [] + for i in range(len(sentences)): + start = max(0, i - buffer_size) + end = min(len(sentences), i + buffer_size + 1) + buffered.append(" ".join(sentences[start:end])) + return buffered + + +def _cosine_distances(embeddings: List[List[float]]) -> List[float]: + """Compute cosine distances between consecutive embedding pairs. + + Returns a list of length len(embeddings) - 1. + """ + arr = np.array(embeddings, dtype=np.float64) + # Normalize rows + norms = np.linalg.norm(arr, axis=1, keepdims=True) + norms = np.where(norms == 0, 1.0, norms) + normed = arr / norms + + # Cosine similarity between consecutive pairs, then convert to distance + similarities = np.sum(normed[:-1] * normed[1:], axis=1) + distances = 1.0 - similarities + return distances.tolist() + + +def _find_breakpoints(distances: List[float], percentile_threshold: float) -> List[int]: + """Find indices where distance exceeds the given percentile. + + Returns sorted list of breakpoint indices (positions in the distances list + where a topic transition occurs). + """ + if not distances: + return [] + threshold = float(np.percentile(distances, percentile_threshold)) + return [i for i, d in enumerate(distances) if d > threshold] + + +def _enforce_max_chunk_words(chunks: List[str], max_words: int) -> List[str]: + """Split any chunk that exceeds max_words into smaller pieces.""" + result = [] + for chunk in chunks: + words = chunk.split() + if len(words) <= max_words: + result.append(chunk) + else: + for i in range(0, len(words), max_words): + piece = " ".join(words[i : i + max_words]) + if piece: + result.append(piece) + return result + + +async def semantic_chunk_text( + text: str, + embed_fn: Callable[[List[str]], Awaitable[List[List[float]]]], + buffer_size: int = 1, + breakpoint_percentile_threshold: float = 95.0, + max_chunk_words: int = 300, + sentences: Optional[List[str]] = None, + join_str: str = " ", +) -> List[str]: + """Split text into semantically coherent chunks using embedding similarity. + + Uses the approach from LlamaIndex's SemanticSplitterNodeParser + (https://docs.llamaindex.ai/en/stable/examples/node_parsers/semantic_chunking/) + to detect topic transitions via cosine distance between consecutive sentence + embeddings. + + Args: + text: The text to chunk. + embed_fn: Async callable that takes a list of strings and returns + a list of embedding vectors. Keeps the chunker decoupled from + any specific embedding provider. + buffer_size: Number of neighboring sentences to include when building + the buffered context for each sentence's embedding. + breakpoint_percentile_threshold: Percentile of cosine distances above + which a topic transition is detected (higher = fewer breaks). + max_chunk_words: Maximum words per chunk. Chunks exceeding this are + split further as a safety valve. + sentences: Optional pre-split text units (e.g. dialogue turns). When + provided, the regex-based split_sentences() call is skipped and + these units are used directly as the atomic elements for embedding + and breakpoint detection. + join_str: String used to join units within a chunk. Default is ``" "`` + (space). Use ``"\\n"`` for dialogue transcripts to keep speaker + labels on separate lines. + + Returns: + List of text chunks. + """ + text = text.strip() + if not text: + return [] + + units = sentences if sentences is not None else split_sentences(text) + # Filter out empty units + units = [u for u in units if u.strip()] + if len(units) <= 2: + return _enforce_max_chunk_words([text], max_chunk_words) + + # Build buffered sentences for richer embedding context + buffered = _build_buffered_sentences(units, buffer_size) + + # Embed all buffered sentences in one batch call + try: + embeddings = await embed_fn(buffered) + except Exception: + logger.warning( + "Embedding call failed during semantic chunking; returning text as single chunk", + exc_info=True, + ) + return _enforce_max_chunk_words([text], max_chunk_words) + + if not embeddings or len(embeddings) != len(units): + logger.warning( + "Unexpected embedding count (%s vs %s units); returning single chunk", + len(embeddings) if embeddings else 0, + len(units), + ) + return _enforce_max_chunk_words([text], max_chunk_words) + + # Compute distances and find breakpoints + distances = _cosine_distances(embeddings) + breakpoints = _find_breakpoints(distances, breakpoint_percentile_threshold) + + # Group units between breakpoints + chunks: List[str] = [] + start = 0 + for bp in sorted(breakpoints): + # bp is the index in distances; the break is *after* unit bp + end = bp + 1 + chunk = join_str.join(units[start:end]) + if chunk.strip(): + chunks.append(chunk.strip()) + start = end + + # Remaining units + if start < len(units): + chunk = join_str.join(units[start:]) + if chunk.strip(): + chunks.append(chunk.strip()) + + if not chunks: + chunks = [text] + + return _enforce_max_chunk_words(chunks, max_chunk_words) diff --git a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py index 5f7487e5..34285062 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py @@ -9,6 +9,7 @@ import logging import os import time +from dataclasses import dataclass from datetime import datetime from typing import Any, Dict, Optional @@ -207,52 +208,83 @@ async def handle_end_of_conversation( } -@async_job(redis=True, beanie=True) -async def open_conversation_job( +@dataclass +class ConversationState: + """Mutable state tracked across the conversation monitoring loop.""" + + conversation_id: str = "" + session_id: str = "" + user_id: str = "" + client_id: str = "" + start_time: float = 0.0 + last_result_count: int = 0 + timeout_triggered: bool = False + close_requested_reason: Optional[str] = None + last_meaningful_speech_time: float = 0.0 + last_word_count: int = 0 + end_reason: str = "unknown" + + +def _validate_segments(segments: list) -> list: + """Validate and filter transcription segments, correcting minor issues. + + Filters out non-dict segments and segments with no text. Corrects invalid + timing (end <= start) by estimating duration from word count. Ensures + speaker field is always a non-empty string. + """ + validated = [] + for i, seg in enumerate(segments): + if not isinstance(seg, dict): + logger.warning(f"Segment {i} is not a dict: {type(seg)}") + continue + + text = seg.get("text", "").strip() + if not text: + logger.debug(f"Segment {i} has no text, skipping") + continue + + start = seg.get("start", 0.0) + end = seg.get("end", 0.0) + if end <= start: + logger.debug( + f"Segment {i} has invalid timing (start={start}, end={end}), correcting" + ) + estimated_duration = len(text.split()) * 0.5 # ~0.5 seconds per word + seg["end"] = start + estimated_duration + + speaker = seg.get("speaker") + if speaker is None or speaker == "": + seg["speaker"] = "SPEAKER_00" + elif isinstance(speaker, (int, float)): + seg["speaker"] = f"Speaker {int(speaker)}" + + validated.append(seg) + + logger.info(f"Validated {len(validated)}/{len(segments)} segments") + return validated + + +async def _initialize_conversation( session_id: str, user_id: str, client_id: str, - speech_detected_at: float, - speech_job_id: str = None, - *, - redis_client=None, -) -> Dict[str, Any]: - """ - Long-running RQ job that creates and continuously updates conversation with transcription results. + speech_job_id: str, + current_job, + redis_client, +) -> str: + """Create or reuse a conversation for this session. - Creates conversation when speech is detected, then monitors and updates until session ends. - - Args: - session_id: Stream session ID - user_id: User ID - client_id: Client ID - speech_detected_at: Timestamp when speech was first detected - speech_job_id: Optional speech detection job ID to update with conversation_id - redis_client: Redis client (injected by decorator) + Checks for an existing placeholder conversation in Redis. If found and valid, + reuses it. Otherwise creates a new conversation. Attaches session markers, + links job metadata, and signals audio persistence to rotate files. Returns: - Dict with conversation_id, final_result_count, runtime_seconds - - Note: user_email is fetched from the database when needed. + conversation_id of the created/reused conversation. """ - from rq import get_current_job - from advanced_omi_backend.models.conversation import ( Conversation, create_conversation, ) - from advanced_omi_backend.services.audio_stream import ( - TranscriptionResultsAggregator, - ) - - logger.info( - f"📝 Creating and opening conversation for session {session_id} (speech detected at {speech_detected_at})" - ) - - # Get current job for meta storage - current_job = get_current_job() - current_job.meta = {} - current_job.save_meta() # Check if a placeholder conversation already exists for this session conversation_key = f"conversation:current:{session_id}" @@ -388,17 +420,34 @@ async def open_conversation_job( f"🔄 Signaled audio persistence to rotate file for conversation {conversation_id[:12]}" ) - # Use redis_client parameter - aggregator = TranscriptionResultsAggregator(redis_client) + return conversation_id - # Job control - session_key = f"audio:session:{session_id}" + +async def _monitor_conversation_loop( + state: ConversationState, + aggregator, + current_job, + redis_client, +) -> None: + """Poll transcription results and track conversation activity until exit. + + Runs the main monitoring loop that: + - Detects zombie jobs (job hash missing from Redis) + - Handles session finalize signals (disconnect, user stop) + - Handles conversation close requests (API, plugin, button) + - Polls the transcription aggregator for new results + - Validates segments and tracks speech activity + - Detects inactivity timeout and closes conversation + - Dispatches transcript.streaming plugin events + + Mutates ``state`` in place with final values for timeout_triggered, + close_requested_reason, last_result_count, and last_word_count. + """ + session_key = f"audio:session:{state.session_id}" max_runtime = ( 10740 # 3 hours - 60 seconds (single conversations shouldn't exceed 3 hours) ) - start_time = time.time() - last_result_count = 0 finalize_received = False # Inactivity timeout configuration @@ -406,17 +455,9 @@ async def open_conversation_job( os.getenv("SPEECH_INACTIVITY_THRESHOLD_SECONDS", "60") ) inactivity_timeout_minutes = inactivity_timeout_seconds / 60 - last_meaningful_speech_time = ( - 0.0 # Initialize with audio time 0 (will be updated with first speech) - ) - timeout_triggered = False # Track if closure was due to timeout - close_requested_reason = ( - None # Track if closure was requested via API/plugin/button - ) last_inactivity_log_time = ( time.time() ) # Track when we last logged inactivity (wall-clock for logging) - last_word_count = 0 # Track word count to detect actual new speech # Test mode: wait for audio queue to drain before timing out # In real usage, ambient noise keeps connection alive. In tests, chunks arrive in bursts. @@ -434,7 +475,7 @@ async def open_conversation_job( # Check if job still exists in Redis (detect zombie state) from advanced_omi_backend.utils.job_utils import check_job_alive - if not await check_job_alive(redis_client, current_job, session_id): + if not await check_job_alive(redis_client, current_job, state.session_id): break # Check if session is finalizing (set by producer when recording stops) @@ -455,10 +496,12 @@ async def open_conversation_job( if completion_reason_str == "websocket_disconnect": logger.warning( - f"🔌 WebSocket disconnected for session {session_id[:12]} - " + f"🔌 WebSocket disconnected for session {state.session_id[:12]} - " f"ending conversation early" ) - timeout_triggered = False # This is a disconnect, not a timeout + state.timeout_triggered = ( + False # This is a disconnect, not a timeout + ) else: logger.info( f"🛑 Session finalizing (reason: {completion_reason_str}), " @@ -473,92 +516,58 @@ async def open_conversation_job( ) if close_reason: await redis_client.hdel(session_key, "conversation_close_requested") - close_requested_reason = ( + state.close_requested_reason = ( close_reason.decode() if isinstance(close_reason, bytes) else close_reason ) logger.info( - f"🔒 Conversation close requested: {close_requested_reason}" + f"🔒 Conversation close requested: {state.close_requested_reason}" ) - timeout_triggered = True # Session stays active (same restart behavior as inactivity timeout) + state.timeout_triggered = True # Session stays active (same restart behavior as inactivity timeout) finalize_received = True break # Check max runtime timeout - if time.time() - start_time > max_runtime: - logger.warning(f"⏱️ Max runtime reached for {conversation_id}") + if time.time() - state.start_time > max_runtime: + logger.warning(f"⏱️ Max runtime reached for {state.conversation_id}") break # Get combined results from aggregator - combined = await aggregator.get_combined_results(session_id) + combined = await aggregator.get_combined_results(state.session_id) current_count = combined["chunk_count"] # Analyze speech content using detailed analysis - transcript_data = {"text": combined["text"], "words": combined.get("words", [])} speech_analysis = analyze_speech(transcript_data) # Extract speaker information from segments segments = combined.get("segments", []) - # FIX: Validate and filter segments before processing - validated_segments = [] - for i, seg in enumerate(segments): - # Check if segment is a dict - if not isinstance(seg, dict): - logger.warning(f"Segment {i} is not a dict: {type(seg)}") - continue - - # Check for required text field - text = seg.get("text", "").strip() - if not text: - logger.debug(f"Segment {i} has no text, skipping") - continue - - # Check for reasonable timing - start = seg.get("start", 0.0) - end = seg.get("end", 0.0) - if end <= start: - logger.debug( - f"Segment {i} has invalid timing (start={start}, end={end}), correcting" - ) - # Auto-correct: estimate duration from text length - estimated_duration = len(text.split()) * 0.5 # ~0.5 seconds per word - seg["end"] = start + estimated_duration - - # Ensure speaker field exists and is a string - speaker = seg.get("speaker") - if speaker is None or speaker == "": - seg["speaker"] = "SPEAKER_00" - elif isinstance(speaker, (int, float)): - seg["speaker"] = f"Speaker {int(speaker)}" - - validated_segments.append(seg) - - logger.info(f"Validated {len(validated_segments)}/{len(segments)} segments") + # Validate and filter segments before processing + validated_segments = _validate_segments(segments) speakers = extract_speakers_from_segments(validated_segments) # Track new speech activity (word count based) - new_speech_time, last_word_count = await track_speech_activity( + new_speech_time, state.last_word_count = await track_speech_activity( speech_analysis=speech_analysis, - last_word_count=last_word_count, - conversation_id=conversation_id, + last_word_count=state.last_word_count, + conversation_id=state.conversation_id, redis_client=redis_client, ) if new_speech_time: - last_meaningful_speech_time = new_speech_time + state.last_meaningful_speech_time = new_speech_time # Update job metadata with current progress await update_job_progress_metadata( current_job=current_job, - conversation_id=conversation_id, - session_id=session_id, - client_id=client_id, + conversation_id=state.conversation_id, + session_id=state.session_id, + client_id=state.client_id, combined=combined, speech_analysis=speech_analysis, speakers=speakers, - last_meaningful_speech_time=last_meaningful_speech_time, + last_meaningful_speech_time=state.last_meaningful_speech_time, ) # Check inactivity timeout using audio time (not wall-clock time) @@ -567,8 +576,8 @@ async def open_conversation_job( # Calculate inactivity based on audio timestamps # Only check if we have valid audio timing data - if current_audio_time > 0 and last_meaningful_speech_time > 0: - inactivity_duration = current_audio_time - last_meaningful_speech_time + if current_audio_time > 0 and state.last_meaningful_speech_time > 0: + inactivity_duration = current_audio_time - state.last_meaningful_speech_time else: # Fallback: No audio timestamps available (text-only transcription) # Can't reliably detect inactivity, so skip timeout check this iteration @@ -591,7 +600,7 @@ async def open_conversation_job( # In test mode, check if there are pending chunks before timing out if wait_for_queue_drain: # Check audio persistence queue length - persist_queue_key = f"audio:queue:{session_id}" + persist_queue_key = f"audio:queue:{state.session_id}" queue_length = await redis_client.llen(persist_queue_key) if queue_length > 0: @@ -603,24 +612,24 @@ async def open_conversation_job( continue logger.info( - f"🕐 Conversation {conversation_id} inactive for " + f"🕐 Conversation {state.conversation_id} inactive for " f"{inactivity_duration/60:.1f} minutes (threshold: {inactivity_timeout_minutes} min), " f"auto-closing conversation (session remains active for next conversation)..." ) # DON'T set session to finalizing - just close this conversation # Session remains "active" so new conversations can be created # Only user manual stop or WebSocket disconnect should finalize the session - timeout_triggered = True + state.timeout_triggered = True finalize_received = True break # Track results progress (conversation will get transcript from transcription job) - if current_count > last_result_count: + if current_count > state.last_result_count: logger.info( - f"📊 Conversation {conversation_id} progress: " + f"📊 Conversation {state.conversation_id} progress: " f"{current_count} results, {len(combined['text'])} chars, {len(validated_segments)} segments" ) - last_result_count = current_count + state.last_result_count = current_count # Trigger transcript-level plugins on new transcript segments try: @@ -632,22 +641,22 @@ async def open_conversation_job( if transcript_text: plugin_data = { "transcript": transcript_text, - "segment_id": f"{session_id}_{current_count}", - "conversation_id": conversation_id, + "segment_id": f"{state.session_id}_{current_count}", + "conversation_id": state.conversation_id, "segments": validated_segments, "word_count": speech_analysis.get("word_count", 0), } logger.info( f"🔌 DISPATCH: transcript.streaming event " - f"(conversation={conversation_id[:12]}, segment_id={session_id}_{current_count})" + f"(conversation={state.conversation_id[:12]}, segment_id={state.session_id}_{current_count})" ) plugin_results = await plugin_router.dispatch_event( event=PluginEvent.TRANSCRIPT_STREAMING, - user_id=user_id, + user_id=state.user_id, data=plugin_data, - metadata={"client_id": client_id}, + metadata={"client_id": state.client_id}, ) logger.info( @@ -671,85 +680,363 @@ async def open_conversation_job( await asyncio.sleep(1) # Check every second for responsiveness + +async def _save_streaming_transcript( + session_id: str, + conversation_id: str, + aggregator, +) -> str: + """Retrieve final streaming transcript and save it to the conversation document. + + Gets the combined transcription results from the aggregator, converts them + to Word and SpeakerSegment model objects, creates a transcript version, and + saves to MongoDB. + + Returns: + version_id of the saved transcript version. + """ + from advanced_omi_backend.models.conversation import Conversation + + logger.info( + f"📝 Retrieving final streaming transcript for conversation {conversation_id[:12]}" + ) + final_transcript = await aggregator.get_combined_results(session_id) + + # Fetch conversation from database to ensure we have latest state + conversation = await Conversation.find_one( + Conversation.conversation_id == conversation_id + ) + if not conversation: + logger.error(f"❌ Conversation {conversation_id} not found in database") + raise ValueError(f"Conversation {conversation_id} not found") + + # Create transcript version from streaming results + version_id = f"streaming_{session_id[:12]}" + transcript_text = final_transcript.get("text", "") + words_data = final_transcript.get("words", []) # All words from aggregator + + # Convert words to Word objects (including per-word speaker labels if present) + words = [ + Conversation.Word( + word=w.get("word", ""), + start=w.get("start", 0.0), + end=w.get("end", 0.0), + confidence=w.get("confidence"), + speaker=w.get("speaker"), + speaker_confidence=w.get("speaker_confidence"), + ) + for w in words_data + ] + + # Use provider-supplied segments if available (from streaming diarization), + # otherwise leave empty for speaker recognition service to fill later. + segments_data = final_transcript.get("segments", []) + if segments_data: + segments = [ + Conversation.SpeakerSegment( + start=s.get("start", 0.0), + end=s.get("end", 0.0), + text=s.get("text", ""), + speaker=str(s.get("speaker", "Unknown")), + words=[ + Conversation.Word( + word=sw.get("word", ""), + start=sw.get("start", 0.0), + end=sw.get("end", 0.0), + confidence=sw.get("confidence"), + speaker=sw.get("speaker"), + speaker_confidence=sw.get("speaker_confidence"), + ) + for sw in s.get("words", []) + ], + ) + for s in segments_data + ] + else: + segments = [] + + # Determine provider from streaming results + provider = final_transcript.get("provider", "deepgram") + + # Determine diarization source if provider supplied segments + diarization_source = "provider" if segments else None + + # Add streaming transcript with words at version level + version = conversation.add_transcript_version( + version_id=version_id, + transcript=transcript_text, + words=words, # Store at version level + segments=segments, # Provider segments or empty (filled by speaker service later) + provider=provider, + model=provider, # Provider name as model + processing_time_seconds=None, # Not applicable for streaming + metadata={ + "source": "streaming", + "chunk_count": final_transcript.get("chunk_count", 0), + "word_count": len(words), + "provider_capabilities": {"diarization": bool(segments)}, + }, + set_as_active=True, + ) + version.diarization_source = diarization_source + + # Update placeholder conversation if it exists + if ( + getattr(conversation, "always_persist", False) + and getattr(conversation, "processing_status", None) == "pending_transcription" + ): + # Keep placeholder status - will be updated by title_summary_job + logger.info( + f"📝 Placeholder conversation {conversation_id} has transcript, " + f"waiting for title/summary generation" + ) + + # Save conversation with streaming transcript + await conversation.save() + segment_info = ( + f"{len(segments)} provider segments (diarization_source={diarization_source})" + if segments + else "0 segments (pending speaker recognition)" + ) + logger.info( + f"✅ Saved streaming transcript: {len(transcript_text)} chars, " + f"{segment_info}, {len(words)} words " + f"for conversation {conversation_id[:12]}" + ) + + return version_id + + +async def _enqueue_post_processing( + conversation_id: str, + user_id: str, + client_id: str, + version_id: str, + end_reason: str, +) -> None: + """Enqueue post-conversation processing jobs (speaker, memory, title, events). + + Checks configuration for always_batch_retranscribe. If enabled, enqueues + a batch transcription job first with post-processing depending on it. + Otherwise starts post-processing immediately with the streaming transcript. + """ + from advanced_omi_backend.config_loader import get_backend_config + + transcription_cfg = get_backend_config("transcription") + batch_retranscribe = False + if transcription_cfg: + from omegaconf import OmegaConf + + cfg_dict = OmegaConf.to_container(transcription_cfg, resolve=True) + batch_retranscribe = cfg_dict.get("always_batch_retranscribe", False) + + if batch_retranscribe: + # BATCH PATH: Streaming transcript saved as preview — user sees it immediately + # Full post-processing (speaker, memory, title) waits for batch transcript + from advanced_omi_backend.config import get_transcription_job_timeout + from advanced_omi_backend.controllers.queue_controller import ( + JOB_RESULT_TTL, + transcription_queue, + ) + from advanced_omi_backend.workers.transcription_jobs import ( + transcribe_full_audio_job, + ) + + batch_version_id = f"batch_{conversation_id[:12]}" + batch_job = transcription_queue.enqueue( + transcribe_full_audio_job, + conversation_id, + batch_version_id, + "always_batch_retranscribe", + job_timeout=get_transcription_job_timeout(), + result_ttl=JOB_RESULT_TTL, + job_id=f"batch_retranscribe_{conversation_id[:12]}", + description=f"Batch re-transcription for {conversation_id[:8]}", + meta={"conversation_id": conversation_id, "client_id": client_id}, + ) + + logger.info( + f"🔄 Batch re-transcribe enabled: enqueued batch job {batch_job.id} " + f"(streaming transcript is preview only)" + ) + + # Run post-processing ONLY after batch completes + job_ids = start_post_conversation_jobs( + conversation_id=conversation_id, + user_id=user_id, + transcript_version_id=batch_version_id, + depends_on_job=batch_job, + client_id=client_id, + end_reason=end_reason, + ) + + logger.info( + f"📥 Pipeline: batch_retranscribe({batch_job.id}) → " + f"speaker({job_ids['speaker_recognition']}) → " + f"[memory({job_ids['memory']}) + title({job_ids['title_summary']})] → " + f"event({job_ids['event_dispatch']})" + ) + else: + # NORMAL PATH: Process streaming transcript immediately (existing behavior) + job_ids = start_post_conversation_jobs( + conversation_id=conversation_id, + user_id=user_id, + transcript_version_id=version_id, # Pass the streaming transcript version ID + depends_on_job=None, # No dependency - streaming already succeeded + client_id=client_id, # Pass client_id for UI tracking + end_reason=end_reason, # Pass the determined end_reason (websocket_disconnect, inactivity_timeout, etc.) + ) + + logger.info( + f"📥 Pipeline: speaker({job_ids['speaker_recognition']}) → " + f"[memory({job_ids['memory']}) + title({job_ids['title_summary']})] → " + f"event({job_ids['event_dispatch']})" + ) + + # Wait a moment to ensure jobs are registered in RQ + await asyncio.sleep(0.5) + + logger.info( + f"✅ Post-conversation pipeline started with event dispatch job (end_reason={end_reason})" + ) + + +@async_job(redis=True, beanie=True) +async def open_conversation_job( + session_id: str, + user_id: str, + client_id: str, + speech_detected_at: float, + speech_job_id: str = None, + *, + redis_client=None, +) -> Dict[str, Any]: + """ + Long-running RQ job that creates and continuously updates conversation with transcription results. + + Creates conversation when speech is detected, then monitors and updates until session ends. + + Args: + session_id: Stream session ID + user_id: User ID + client_id: Client ID + speech_detected_at: Timestamp when speech was first detected + speech_job_id: Optional speech detection job ID to update with conversation_id + redis_client: Redis client (injected by decorator) + + Returns: + Dict with conversation_id, final_result_count, runtime_seconds + + Note: user_email is fetched from the database when needed. + """ + from rq import get_current_job + + from advanced_omi_backend.services.audio_stream import ( + TranscriptionResultsAggregator, + ) + + logger.info( + f"📝 Creating and opening conversation for session {session_id} (speech detected at {speech_detected_at})" + ) + + # Phase 1: Initialize job and conversation + current_job = get_current_job() + current_job.meta = {} + current_job.save_meta() + + conversation_id = await _initialize_conversation( + session_id=session_id, + user_id=user_id, + client_id=client_id, + speech_job_id=speech_job_id, + current_job=current_job, + redis_client=redis_client, + ) + + # Phase 2: Monitor conversation (polling loop) + aggregator = TranscriptionResultsAggregator(redis_client) + state = ConversationState( + conversation_id=conversation_id, + session_id=session_id, + user_id=user_id, + client_id=client_id, + start_time=time.time(), + ) + + await _monitor_conversation_loop(state, aggregator, current_job, redis_client) + logger.info( f"✅ Conversation {conversation_id} updates complete, checking for meaningful speech..." ) - # Determine end reason based on how we exited the loop - # Check session completion_reason from Redis (set atomically with status by finalize_session) + # Phase 3: Determine end reason + session_key = f"audio:session:{session_id}" completion_reason = await redis_client.hget(session_key, "completion_reason") completion_reason_str = completion_reason.decode() if completion_reason else None - # Determine end_reason with proper precedence: - # 1. completion_reason from Redis (set by WebSocket controller: websocket_disconnect, user_stopped) - # 2. close_requested (via API, plugin, or button press) - # 3. inactivity_timeout (no speech for SPEECH_INACTIVITY_THRESHOLD_SECONDS) - # 4. max_duration (conversation exceeded max runtime) - # 5. user_stopped (fallback for any other exit condition) if completion_reason_str: - end_reason = completion_reason_str - logger.info(f"📊 Using completion_reason from session: {end_reason}") - elif close_requested_reason: - end_reason = "close_requested" - logger.info(f"📊 Conversation closed by request: {close_requested_reason}") - elif timeout_triggered: - end_reason = "inactivity_timeout" - elif time.time() - start_time > max_runtime: - end_reason = "max_duration" + state.end_reason = completion_reason_str + logger.info(f"📊 Using completion_reason from session: {state.end_reason}") + elif state.close_requested_reason: + state.end_reason = "close_requested" + logger.info( + f"📊 Conversation closed by request: {state.close_requested_reason}" + ) + elif state.timeout_triggered: + state.end_reason = "inactivity_timeout" + elif time.time() - state.start_time > 10740: + state.end_reason = "max_duration" else: - end_reason = "user_stopped" + state.end_reason = "user_stopped" logger.info( - f"📊 Conversation {conversation_id[:12]} end_reason determined: {end_reason}" + f"📊 Conversation {conversation_id[:12]} end_reason determined: {state.end_reason}" ) - # Wrap all post-processing in try/finally to guarantee handle_end_of_conversation() - # is always called, even if an exception occurs during transcript saving, job - # enqueuing, etc. Without this, any failure leaves the session in a zombie state - # where the WebSocket is open but no new conversation can ever start. + # Phase 4-7: Post-processing (wrapped in try/finally for guaranteed cleanup) end_of_conversation_handled = False try: - # FINAL VALIDATION: Check if conversation has meaningful speech before post-processing - # This prevents empty/noise-only conversations from being processed and saved - # NOTE: Speech was already validated during streaming, so we skip this check - # to avoid false negatives from aggregated results lacking proper word-level data logger.info( "✅ Conversation has meaningful speech (validated during streaming), proceeding with post-processing" ) - # Wait for streaming transcription consumer to complete before reading transcript - # This fixes the race condition where conversation job reads transcript before - # streaming consumer stores all final results (seen as 24+ second delay in logs) - completion_key = f"transcription:complete:{session_id}" - max_wait_streaming = 30 # seconds - waited_streaming = 0.0 - while waited_streaming < max_wait_streaming: - completion_status = await redis_client.get(completion_key) - if completion_status: - status_str = ( - completion_status.decode() - if isinstance(completion_status, bytes) - else completion_status - ) - if status_str == "error": - logger.warning( - f"⚠️ Streaming transcription ended with error for {session_id}, proceeding anyway" - ) - else: - logger.info( - f"✅ Streaming transcription confirmed complete for {session_id}" - ) - break - await asyncio.sleep(0.5) - waited_streaming += 0.5 - - if waited_streaming >= max_wait_streaming: - logger.warning( - f"⚠️ Timed out waiting for streaming completion signal for {session_id} " - f"(waited {max_wait_streaming}s), proceeding with available transcript" + # Phase 4: Wait for streaming transcription to complete + if state.close_requested_reason: + logger.info( + f"⏩ Skipping transcription:complete wait for close_requested " + f"(reason={state.close_requested_reason})" ) + else: + completion_key = f"transcription:complete:{session_id}" + max_wait_streaming = 30 # seconds + waited_streaming = 0.0 + while waited_streaming < max_wait_streaming: + completion_status = await redis_client.get(completion_key) + if completion_status: + status_str = ( + completion_status.decode() + if isinstance(completion_status, bytes) + else completion_status + ) + if status_str == "error": + logger.warning( + f"⚠️ Streaming transcription ended with error for {session_id}, proceeding anyway" + ) + else: + logger.info( + f"✅ Streaming transcription confirmed complete for {session_id}" + ) + break + await asyncio.sleep(0.5) + waited_streaming += 0.5 + + if waited_streaming >= max_wait_streaming: + logger.warning( + f"⚠️ Timed out waiting for streaming completion signal for {session_id} " + f"(waited {max_wait_streaming}s), proceeding with available transcript" + ) - # Wait for audio_streaming_persistence_job to complete and write MongoDB chunks + # Phase 5: Wait for audio chunks in MongoDB from advanced_omi_backend.utils.audio_chunk_utils import wait_for_audio_chunks chunks_ready = await wait_for_audio_chunks( @@ -757,235 +1044,55 @@ async def open_conversation_job( ) if not chunks_ready: - # Mark conversation as deleted - has speech but no audio chunks to process await mark_conversation_deleted( conversation_id=conversation_id, deletion_reason="audio_chunks_not_ready", ) - - # Call shared cleanup/restart logic before returning end_of_conversation_handled = True return await handle_end_of_conversation( session_id=session_id, conversation_id=conversation_id, client_id=client_id, user_id=user_id, - start_time=start_time, - last_result_count=last_result_count, - timeout_triggered=timeout_triggered, + start_time=state.start_time, + last_result_count=state.last_result_count, + timeout_triggered=state.timeout_triggered, redis_client=redis_client, - end_reason=end_reason, + end_reason=state.end_reason, ) logger.info( f"📦 MongoDB audio chunks ready for conversation {conversation_id[:12]}" ) - # Get final streaming transcript and save to conversation - logger.info( - f"📝 Retrieving final streaming transcript for conversation {conversation_id[:12]}" + # Phase 6: Save streaming transcript + version_id = await _save_streaming_transcript( + session_id=session_id, + conversation_id=conversation_id, + aggregator=aggregator, ) - final_transcript = await aggregator.get_combined_results(session_id) - # Fetch conversation from database to ensure we have latest state - conversation = await Conversation.find_one( - Conversation.conversation_id == conversation_id - ) - if not conversation: - logger.error(f"❌ Conversation {conversation_id} not found in database") - raise ValueError(f"Conversation {conversation_id} not found") - - # Create transcript version from streaming results - version_id = f"streaming_{session_id[:12]}" - transcript_text = final_transcript.get("text", "") - words_data = final_transcript.get("words", []) # All words from aggregator - - # Convert words to Word objects (including per-word speaker labels if present) - words = [ - Conversation.Word( - word=w.get("word", ""), - start=w.get("start", 0.0), - end=w.get("end", 0.0), - confidence=w.get("confidence"), - speaker=w.get("speaker"), - speaker_confidence=w.get("speaker_confidence"), - ) - for w in words_data - ] - - # Use provider-supplied segments if available (from streaming diarization), - # otherwise leave empty for speaker recognition service to fill later. - segments_data = final_transcript.get("segments", []) - if segments_data: - segments = [ - Conversation.SpeakerSegment( - start=s.get("start", 0.0), - end=s.get("end", 0.0), - text=s.get("text", ""), - speaker=str(s.get("speaker", "Unknown")), - words=[ - Conversation.Word( - word=sw.get("word", ""), - start=sw.get("start", 0.0), - end=sw.get("end", 0.0), - confidence=sw.get("confidence"), - speaker=sw.get("speaker"), - speaker_confidence=sw.get("speaker_confidence"), - ) - for sw in s.get("words", []) - ], - ) - for s in segments_data - ] - else: - segments = [] - - # Determine provider from streaming results - provider = final_transcript.get("provider", "deepgram") - - # Determine diarization source if provider supplied segments - diarization_source = "provider" if segments else None - - # Add streaming transcript with words at version level - version = conversation.add_transcript_version( + # Phase 7: Enqueue post-processing pipeline + await _enqueue_post_processing( + conversation_id=conversation_id, + user_id=user_id, + client_id=client_id, version_id=version_id, - transcript=transcript_text, - words=words, # Store at version level - segments=segments, # Provider segments or empty (filled by speaker service later) - provider=provider, - model=provider, # Provider name as model - processing_time_seconds=None, # Not applicable for streaming - metadata={ - "source": "streaming", - "chunk_count": final_transcript.get("chunk_count", 0), - "word_count": len(words), - "provider_capabilities": {"diarization": bool(segments)}, - }, - set_as_active=True, - ) - version.diarization_source = diarization_source - - # Update placeholder conversation if it exists - if ( - getattr(conversation, "always_persist", False) - and getattr(conversation, "processing_status", None) - == "pending_transcription" - ): - # Keep placeholder status - will be updated by title_summary_job - logger.info( - f"📝 Placeholder conversation {conversation_id} has transcript, " - f"waiting for title/summary generation" - ) - - # Save conversation with streaming transcript - await conversation.save() - segment_info = ( - f"{len(segments)} provider segments (diarization_source={diarization_source})" - if segments - else "0 segments (pending speaker recognition)" - ) - logger.info( - f"✅ Saved streaming transcript: {len(transcript_text)} chars, " - f"{segment_info}, {len(words)} words " - f"for conversation {conversation_id[:12]}" - ) - - # Enqueue post-conversation processing pipeline - client_id = conversation.client_id if conversation else None - - # Check if always_batch_retranscribe is enabled - from advanced_omi_backend.config_loader import get_backend_config - - transcription_cfg = get_backend_config("transcription") - batch_retranscribe = False - if transcription_cfg: - from omegaconf import OmegaConf - - cfg_dict = OmegaConf.to_container(transcription_cfg, resolve=True) - batch_retranscribe = cfg_dict.get("always_batch_retranscribe", False) - - if batch_retranscribe: - # BATCH PATH: Streaming transcript saved as preview — user sees it immediately - # Full post-processing (speaker, memory, title) waits for batch transcript - from advanced_omi_backend.config import get_transcription_job_timeout - from advanced_omi_backend.controllers.queue_controller import ( - JOB_RESULT_TTL, - transcription_queue, - ) - from advanced_omi_backend.workers.transcription_jobs import ( - transcribe_full_audio_job, - ) - - batch_version_id = f"batch_{conversation_id[:12]}" - batch_job = transcription_queue.enqueue( - transcribe_full_audio_job, - conversation_id, - batch_version_id, - "always_batch_retranscribe", - job_timeout=get_transcription_job_timeout(), - result_ttl=JOB_RESULT_TTL, - job_id=f"batch_retranscribe_{conversation_id[:12]}", - description=f"Batch re-transcription for {conversation_id[:8]}", - meta={"conversation_id": conversation_id, "client_id": client_id}, - ) - - logger.info( - f"🔄 Batch re-transcribe enabled: enqueued batch job {batch_job.id} " - f"(streaming transcript is preview only)" - ) - - # Run post-processing ONLY after batch completes - job_ids = start_post_conversation_jobs( - conversation_id=conversation_id, - user_id=user_id, - transcript_version_id=batch_version_id, - depends_on_job=batch_job, - client_id=client_id, - end_reason=end_reason, - ) - - logger.info( - f"📥 Pipeline: batch_retranscribe({batch_job.id}) → " - f"speaker({job_ids['speaker_recognition']}) → " - f"[memory({job_ids['memory']}) + title({job_ids['title_summary']})] → " - f"event({job_ids['event_dispatch']})" - ) - else: - # NORMAL PATH: Process streaming transcript immediately (existing behavior) - job_ids = start_post_conversation_jobs( - conversation_id=conversation_id, - user_id=user_id, - transcript_version_id=version_id, # Pass the streaming transcript version ID - depends_on_job=None, # No dependency - streaming already succeeded - client_id=client_id, # Pass client_id for UI tracking - end_reason=end_reason, # Pass the determined end_reason (websocket_disconnect, inactivity_timeout, etc.) - ) - - logger.info( - f"📥 Pipeline: speaker({job_ids['speaker_recognition']}) → " - f"[memory({job_ids['memory']}) + title({job_ids['title_summary']})] → " - f"event({job_ids['event_dispatch']})" - ) - - # Wait a moment to ensure jobs are registered in RQ - await asyncio.sleep(0.5) - - logger.info( - f"✅ Post-conversation pipeline started with event dispatch job (end_reason={end_reason})" + end_reason=state.end_reason, ) - # Call shared cleanup/restart logic + # Cleanup and session restart end_of_conversation_handled = True return await handle_end_of_conversation( session_id=session_id, conversation_id=conversation_id, client_id=client_id, user_id=user_id, - start_time=start_time, - last_result_count=last_result_count, - timeout_triggered=timeout_triggered, + start_time=state.start_time, + last_result_count=state.last_result_count, + timeout_triggered=state.timeout_triggered, redis_client=redis_client, - end_reason=end_reason, + end_reason=state.end_reason, ) finally: if not end_of_conversation_handled: @@ -999,9 +1106,9 @@ async def open_conversation_job( conversation_id=conversation_id, client_id=client_id, user_id=user_id, - start_time=start_time, - last_result_count=last_result_count, - timeout_triggered=timeout_triggered, + start_time=state.start_time, + last_result_count=state.last_result_count, + timeout_triggered=state.timeout_triggered, redis_client=redis_client, end_reason="error", ) diff --git a/backends/advanced/src/advanced_omi_backend/workers/obsidian_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/obsidian_jobs.py index 8c67616d..43ed4f32 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/obsidian_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/obsidian_jobs.py @@ -9,7 +9,7 @@ from rq.job import Job from advanced_omi_backend.models.job import async_job -from advanced_omi_backend.services.obsidian_service import obsidian_service +from advanced_omi_backend.services.obsidian_service import get_obsidian_service logger = logging.getLogger(__name__) @@ -26,7 +26,7 @@ def count_markdown_files(vault_path: str) -> int: @async_job(redis=True, beanie=False) -async def ingest_obsidian_vault_job(job_id: str, vault_path: str, redis_client=None) -> dict: # type: ignore +async def ingest_obsidian_vault_job(job_id: str, vault_path: str, redis_client=None) -> dict: # type: ignore """ Long-running ingestion job enqueued on the default RQ queue. """ @@ -42,7 +42,7 @@ async def ingest_obsidian_vault_job(job_id: str, vault_path: str, redis_client=N job.save_meta() try: - obsidian_service.setup_database() + get_obsidian_service().setup_database() except Exception as exc: logger.exception("Database setup failed for job %s: %s", job.id, exc) job.meta["status"] = "failed" @@ -80,16 +80,17 @@ async def ingest_obsidian_vault_job(job_id: str, vault_path: str, redis_client=N return {"status": "canceled"} try: - note_data = obsidian_service.parse_obsidian_note(root, filename, vault_path) - chunks = await obsidian_service.chunking_and_embedding(note_data) + svc = get_obsidian_service() + note_data = svc.parse_obsidian_note(root, filename, vault_path) + chunks = await svc.chunking_and_embedding(note_data) if chunks: - obsidian_service.ingest_note_and_chunks(note_data, chunks) - + svc.ingest_note_and_chunks(note_data, chunks) + processed += 1 job.meta["processed"] = processed job.meta["last_file"] = os.path.join(root, filename) job.save_meta() - + except Exception as exc: logger.error("Processing %s failed: %s", filename, exc) errors.append(f"{filename}: {exc}") @@ -103,5 +104,5 @@ async def ingest_obsidian_vault_job(job_id: str, vault_path: str, redis_client=N "status": "finished", "processed": processed, "total": total, - "errors": errors + "errors": errors, } diff --git a/backends/advanced/src/scripts/cleanup_state.py b/backends/advanced/src/scripts/cleanup_state.py index 49bfd332..253f3806 100644 --- a/backends/advanced/src/scripts/cleanup_state.py +++ b/backends/advanced/src/scripts/cleanup_state.py @@ -35,7 +35,13 @@ from qdrant_client.models import Distance, VectorParams from rich.console import Console from rich.panel import Panel - from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeElapsedColumn + from rich.progress import ( + BarColumn, + Progress, + SpinnerColumn, + TextColumn, + TimeElapsedColumn, + ) from rich.prompt import Confirm from rich.table import Table from rich.text import Text @@ -62,12 +68,18 @@ # Helpers # --------------------------------------------------------------------------- + def get_qdrant_collection_name() -> str: """Get Qdrant collection name from memory service configuration.""" try: memory_config = build_memory_config_from_env() - if hasattr(memory_config, "vector_store_config") and memory_config.vector_store_config: - return memory_config.vector_store_config.get("collection_name", "chronicle_memories") + if ( + hasattr(memory_config, "vector_store_config") + and memory_config.vector_store_config + ): + return memory_config.vector_store_config.get( + "collection_name", "chronicle_memories" + ) except Exception: pass return "chronicle_memories" @@ -93,6 +105,7 @@ def _human_size(nbytes: int) -> str: # Stats # --------------------------------------------------------------------------- + class Stats: """Track counts across the system.""" @@ -176,8 +189,10 @@ async def gather_stats( # LangFuse prompts if langfuse_client: try: - prompts_response = langfuse_client.prompts.list(limit=100) - s.langfuse_prompts = len(prompts_response.data) if hasattr(prompts_response, "data") else 0 + prompts_response = langfuse_client.api.prompts.list(limit=100) + s.langfuse_prompts = ( + len(prompts_response.data) if hasattr(prompts_response, "data") else 0 + ) except Exception: pass @@ -205,13 +220,21 @@ def render_stats_table(stats: Stats, title: str = "Current State") -> Table: def row(label, value, style="white"): table.add_row(label, f"[{style}]{value}[/{style}]") - row("Conversations", str(stats.conversations), "green" if stats.conversations else "dim") + row( + "Conversations", + str(stats.conversations), + "green" if stats.conversations else "dim", + ) row( " with transcripts", str(stats.conversations_with_transcript), "green" if stats.conversations_with_transcript else "dim", ) - row("Audio Chunks", str(stats.audio_chunks), "green" if stats.audio_chunks else "dim") + row( + "Audio Chunks", + str(stats.audio_chunks), + "green" if stats.audio_chunks else "dim", + ) row("Waveforms", str(stats.waveforms), "dim") row("Chat Sessions", str(stats.chat_sessions), "dim") row("Chat Messages", str(stats.chat_messages), "dim") @@ -220,7 +243,11 @@ def row(label, value, style="white"): row("Memories (Qdrant)", str(stats.memories), "yellow" if stats.memories else "dim") row("Neo4j Nodes", str(stats.neo4j_nodes), "dim") row("Neo4j Relationships", str(stats.neo4j_relationships), "dim") - row("LangFuse Prompts", str(stats.langfuse_prompts), "yellow" if stats.langfuse_prompts else "dim") + row( + "LangFuse Prompts", + str(stats.langfuse_prompts), + "yellow" if stats.langfuse_prompts else "dim", + ) table.add_section() row("Redis Jobs", str(stats.redis_jobs), "dim") row("Legacy WAV Files", str(stats.legacy_wav), "dim") @@ -234,6 +261,7 @@ def row(label, value, style="white"): # Backup # --------------------------------------------------------------------------- + class BackupResult: """Track which backup exports succeeded or failed.""" @@ -241,7 +269,13 @@ def __init__(self): self.exports: dict[str, dict] = {} # name -> {ok, path, size, sha256, error} def record(self, name: str, path: Optional[Path], ok: bool, error: str = ""): - entry = {"ok": ok, "error": error, "path": str(path) if path else None, "size": 0, "sha256": ""} + entry = { + "ok": ok, + "error": error, + "path": str(path) if path else None, + "size": 0, + "sha256": "", + } if ok and path and path.exists(): entry["size"] = path.stat().st_size entry["sha256"] = _file_sha256(path) @@ -255,10 +289,16 @@ def all_ok(self) -> bool: def critical_ok(self) -> bool: """conversations, audio_metadata, and annotations are critical.""" critical = ("conversations", "audio_metadata", "annotations") - return all(self.exports.get(n, {}).get("ok", False) for n in critical if n in self.exports) + return all( + self.exports.get(n, {}).get("ok", False) + for n in critical + if n in self.exports + ) def render_table(self) -> Table: - table = Table(title="Backup Verification", border_style="dim", title_style="bold white") + table = Table( + title="Backup Verification", border_style="dim", title_style="bold white" + ) table.add_column("Export", style="white", min_width=24) table.add_column("Status", justify="center", min_width=8) table.add_column("Size", justify="right", min_width=10) @@ -285,7 +325,14 @@ def total_size(self) -> int: class BackupManager: """Export data to a timestamped backup directory.""" - def __init__(self, backup_dir: str, export_audio: bool, mongo_db: Any, neo4j_driver: Any = None, langfuse_client: Any = None): + def __init__( + self, + backup_dir: str, + export_audio: bool, + mongo_db: Any, + neo4j_driver: Any = None, + langfuse_client: Any = None, + ): self.backup_dir = Path(backup_dir) self.export_audio = export_audio self.mongo_db = mongo_db @@ -323,7 +370,9 @@ async def run( steps.append(("audio_wav", self._export_audio_wav)) if qdrant_client: - steps.append(("memories", lambda r: self._export_memories(qdrant_client, r))) + steps.append( + ("memories", lambda r: self._export_memories(qdrant_client, r)) + ) if self.neo4j_driver: steps.append(("neo4j_graph", self._export_neo4j)) @@ -336,7 +385,8 @@ async def run( for name, func in steps: progress.update(task, description=f"Exporting {name}...") try: - path = await func(result) if asyncio.iscoroutinefunction(func) else func(result) + ret = func(result) + path = await ret if asyncio.iscoroutine(ret) else ret if not result.exports.get(name): # func didn't record itself - record success result.record(name, path, True) @@ -384,19 +434,21 @@ async def _export_audio_metadata(self, result: BackupResult) -> Path: cursor = collection.find({}) data = [] async for chunk in cursor: - data.append({ - "conversation_id": chunk.get("conversation_id"), - "chunk_index": chunk.get("chunk_index"), - "start_time": chunk.get("start_time"), - "end_time": chunk.get("end_time"), - "duration": chunk.get("duration"), - "original_size": chunk.get("original_size"), - "compressed_size": chunk.get("compressed_size"), - "sample_rate": chunk.get("sample_rate", 16000), - "channels": chunk.get("channels", 1), - "has_speech": chunk.get("has_speech"), - "created_at": str(chunk.get("created_at", "")), - }) + data.append( + { + "conversation_id": chunk.get("conversation_id"), + "chunk_index": chunk.get("chunk_index"), + "start_time": chunk.get("start_time"), + "end_time": chunk.get("end_time"), + "duration": chunk.get("duration"), + "original_size": chunk.get("original_size"), + "compressed_size": chunk.get("compressed_size"), + "sample_rate": chunk.get("sample_rate", 16000), + "channels": chunk.get("channels", 1), + "has_speech": chunk.get("has_speech"), + "created_at": str(chunk.get("created_at", "")), + } + ) path = self.backup_path / "audio_chunks_metadata.json" with open(path, "w") as f: json.dump(data, f, indent=2, default=str) @@ -417,14 +469,16 @@ async def _export_chat_sessions(self, result: BackupResult) -> Path: cursor = collection.find({}) data = [] async for session in cursor: - data.append({ - "session_id": session.get("session_id"), - "user_id": session.get("user_id"), - "title": session.get("title"), - "created_at": str(session.get("created_at", "")), - "updated_at": str(session.get("updated_at", "")), - "metadata": session.get("metadata", {}), - }) + data.append( + { + "session_id": session.get("session_id"), + "user_id": session.get("user_id"), + "title": session.get("title"), + "created_at": str(session.get("created_at", "")), + "updated_at": str(session.get("updated_at", "")), + "metadata": session.get("metadata", {}), + } + ) path = self.backup_path / "chat_sessions.json" with open(path, "w") as f: json.dump(data, f, indent=2, default=str) @@ -436,16 +490,18 @@ async def _export_chat_messages(self, result: BackupResult) -> Path: cursor = collection.find({}) data = [] async for msg in cursor: - data.append({ - "message_id": msg.get("message_id"), - "session_id": msg.get("session_id"), - "user_id": msg.get("user_id"), - "role": msg.get("role"), - "content": msg.get("content"), - "timestamp": str(msg.get("timestamp", "")), - "memories_used": msg.get("memories_used", []), - "metadata": msg.get("metadata", {}), - }) + data.append( + { + "message_id": msg.get("message_id"), + "session_id": msg.get("session_id"), + "user_id": msg.get("user_id"), + "role": msg.get("role"), + "content": msg.get("content"), + "timestamp": str(msg.get("timestamp", "")), + "memories_used": msg.get("memories_used", []), + "metadata": msg.get("metadata", {}), + } + ) path = self.backup_path / "chat_messages.json" with open(path, "w") as f: json.dump(data, f, indent=2, default=str) @@ -479,7 +535,9 @@ async def _export_audio_wav(self, result: BackupResult) -> Optional[Path]: for conv in conversations: try: - ok = await self._export_conversation_audio(conv.conversation_id, audio_dir) + ok = await self._export_conversation_audio( + conv.conversation_id, audio_dir + ) if ok: exported += 1 except Exception as e: @@ -491,11 +549,19 @@ async def _export_audio_wav(self, result: BackupResult) -> Optional[Path]: result.record("audio_wav", audio_dir, ok, error) return audio_dir - async def _export_conversation_audio(self, conversation_id: str, audio_dir: Path) -> bool: + async def _export_conversation_audio( + self, conversation_id: str, audio_dir: Path + ) -> bool: """Decode Opus chunks to WAV for a single conversation. Returns True if audio was exported.""" - chunks = await AudioChunkDocument.find( - AudioChunkDocument.conversation_id == conversation_id - ).sort("+chunk_index").to_list() + from advanced_omi_backend.utils.audio_chunk_utils import decode_opus_to_pcm + + chunks = ( + await AudioChunkDocument.find( + AudioChunkDocument.conversation_id == conversation_id + ) + .sort("+chunk_index") + .to_list() + ) if not chunks: return False @@ -506,44 +572,49 @@ async def _export_conversation_audio(self, conversation_id: str, audio_dir: Path sample_rate = chunks[0].sample_rate channels = chunks[0].channels - # Try opuslib, fall back gracefully - try: - import opuslib - - decoder = opuslib.Decoder(sample_rate, channels) - pcm_parts = [] - for chunk in chunks: - frame_size = int(sample_rate * chunk.duration / channels) - decoded = decoder.decode(bytes(chunk.audio_data), frame_size) - pcm_parts.append(decoded) - except ImportError: - logger.warning("opuslib not available, skipping audio export") - return False - except Exception as e: - logger.warning(f"Opus decode error for {conversation_id}: {e}") - return False + # Decode all chunks using FFmpeg (same path as UI playback) + pcm_buffer = bytearray() + for chunk in chunks: + try: + pcm_data = await decode_opus_to_pcm( + opus_data=bytes(chunk.audio_data), + sample_rate=sample_rate, + channels=channels, + ) + pcm_buffer.extend(pcm_data) + except Exception as e: + logger.warning( + f"Opus decode error for {conversation_id} chunk {chunk.chunk_index}: {e}" + ) + continue - all_pcm = b"".join(pcm_parts) - samples = struct.unpack(f"<{len(all_pcm) // 2}h", all_pcm) + if not pcm_buffer: + return False # Split into 1-minute WAV files - samples_per_minute = sample_rate * 60 * channels import wave + bytes_per_minute = ( + sample_rate * channels * 2 * 60 + ) # 16-bit = 2 bytes per sample + all_pcm = bytes(pcm_buffer) chunk_num = 1 - for start in range(0, len(samples), samples_per_minute): + + for start in range(0, len(all_pcm), bytes_per_minute): wav_path = conv_dir / f"chunk_{chunk_num:03d}.wav" - segment = samples[start : start + samples_per_minute] + segment_pcm = all_pcm[start : start + bytes_per_minute] with wave.open(str(wav_path), "wb") as wf: wf.setnchannels(channels) wf.setsampwidth(2) wf.setframerate(sample_rate) - wf.writeframes(struct.pack(f"<{len(segment)}h", *segment)) + wf.writeframes(segment_pcm) chunk_num += 1 return True - async def _export_memories(self, qdrant_client: AsyncQdrantClient, result: BackupResult) -> Path: + async def _export_memories( + self, qdrant_client: AsyncQdrantClient, result: BackupResult + ) -> Path: collection_name = get_qdrant_collection_name() collections = await qdrant_client.get_collections() exists = any(c.name == collection_name for c in collections.collections) @@ -568,7 +639,9 @@ async def _export_memories(self, qdrant_client: AsyncQdrantClient, result: Backu if not points: break for pt in points: - data.append({"id": str(pt.id), "vector": pt.vector, "payload": pt.payload}) + data.append( + {"id": str(pt.id), "vector": pt.vector, "payload": pt.payload} + ) if next_offset is None: break offset = next_offset @@ -583,7 +656,9 @@ def _export_neo4j(self, result: BackupResult) -> Path: try: with self.neo4j_driver.session() as session: nodes_data = [] - for record in session.run("MATCH (n) RETURN n, labels(n) AS labels, elementId(n) AS eid"): + for record in session.run( + "MATCH (n) RETURN n, labels(n) AS labels, elementId(n) AS eid" + ): node = dict(record["n"]) node["_labels"] = record["labels"] node["_element_id"] = record["eid"] @@ -594,15 +669,24 @@ def _export_neo4j(self, result: BackupResult) -> Path: "MATCH (a)-[r]->(b) RETURN elementId(a) AS src, type(r) AS rel_type, " "properties(r) AS props, elementId(b) AS dst" ): - rels_data.append({ - "source": record["src"], - "type": record["rel_type"], - "properties": dict(record["props"]) if record["props"] else {}, - "target": record["dst"], - }) + rels_data.append( + { + "source": record["src"], + "type": record["rel_type"], + "properties": ( + dict(record["props"]) if record["props"] else {} + ), + "target": record["dst"], + } + ) with open(path, "w") as f: - json.dump({"nodes": nodes_data, "relationships": rels_data}, f, indent=2, default=str) + json.dump( + {"nodes": nodes_data, "relationships": rels_data}, + f, + indent=2, + default=str, + ) result.record("neo4j_graph", path, True) except Exception as e: result.record("neo4j_graph", None, False, str(e)) @@ -617,7 +701,7 @@ def _export_langfuse_prompts(self, result: BackupResult) -> Path: try: # Discover all prompt names via list API prompt_names = [] - prompts_response = self.langfuse_client.prompts.list(limit=100) + prompts_response = self.langfuse_client.api.prompts.list(limit=100) if hasattr(prompts_response, "data"): for p in prompts_response.data: prompt_names.append(p.name) @@ -654,6 +738,7 @@ def _export_langfuse_prompts(self, result: BackupResult) -> Path: # Cleanup # --------------------------------------------------------------------------- + class CleanupManager: """Delete data across all services.""" @@ -768,6 +853,7 @@ def _cleanup_legacy_wav(self, stats: Stats): # Connection setup # --------------------------------------------------------------------------- + async def connect_services(): """Initialize all service connections. Returns (mongo_db, redis_conn, qdrant_client, neo4j_driver, langfuse_client).""" # MongoDB @@ -777,7 +863,13 @@ async def connect_services(): mongo_db = mongo_client[mongodb_database] await init_beanie( database=mongo_db, - document_models=[Conversation, AudioChunkDocument, WaveformData, User, Annotation], + document_models=[ + Conversation, + AudioChunkDocument, + WaveformData, + User, + Annotation, + ], ) # Redis @@ -827,6 +919,7 @@ async def connect_services(): # Display helpers # --------------------------------------------------------------------------- + def print_header(): console.print() console.print( @@ -841,11 +934,19 @@ def print_header(): def print_dry_run(stats: Stats, args): console.print() - console.print(Panel("[bold yellow]DRY-RUN MODE[/bold yellow] - no changes will be made", border_style="yellow")) + console.print( + Panel( + "[bold yellow]DRY-RUN MODE[/bold yellow] - no changes will be made", + border_style="yellow", + ) + ) console.print() if args.backup or args.backup_only: - console.print("[cyan]Would create backup at:[/cyan]", str(Path(args.backup_dir) / f"backup_...")) + console.print( + "[cyan]Would create backup at:[/cyan]", + str(Path(args.backup_dir) / f"backup_..."), + ) if args.export_audio: audio_note = f"(from {stats.conversations_with_transcript} conversations with transcripts)" console.print(f"[cyan]Would export audio WAV files[/cyan] {audio_note}") @@ -887,18 +988,26 @@ def print_confirmation(stats: Stats, args) -> bool: console.print() if args.backup or args.backup_only: - console.print(Panel( - f"[green]Backup will be created at:[/green] {args.backup_dir}\n" - + ("[green]Audio WAV export included[/green]" if args.export_audio else "[dim]Audio WAV export: off[/dim]"), - title="Backup", - border_style="green", - )) + console.print( + Panel( + f"[green]Backup will be created at:[/green] {args.backup_dir}\n" + + ( + "[green]Audio WAV export included[/green]" + if args.export_audio + else "[dim]Audio WAV export: off[/dim]" + ), + title="Backup", + border_style="green", + ) + ) elif not args.backup_only: - console.print(Panel( - "[bold red]No backup will be created![/bold red]\nData will be permanently lost.", - title="Warning", - border_style="red", - )) + console.print( + Panel( + "[bold red]No backup will be created![/bold red]\nData will be permanently lost.", + title="Warning", + border_style="red", + ) + ) if not args.backup_only: items = [ @@ -911,18 +1020,22 @@ def print_confirmation(stats: Stats, args) -> bool: f" {stats.memories} memories", ] if stats.neo4j_nodes: - items.append(f" {stats.neo4j_nodes} Neo4j nodes + {stats.neo4j_relationships} relationships") + items.append( + f" {stats.neo4j_nodes} Neo4j nodes + {stats.neo4j_relationships} relationships" + ) items.append(f" {stats.redis_jobs} Redis jobs") if args.include_wav: items.append(f" {stats.legacy_wav} legacy WAV files") if args.delete_users: items.append(f" [bold red]{stats.users} users (DANGEROUS)[/bold red]") - console.print(Panel( - "\n".join(items), - title="[bold red]Will Delete[/bold red]", - border_style="red", - )) + console.print( + Panel( + "\n".join(items), + title="[bold red]Will Delete[/bold red]", + border_style="red", + ) + ) console.print() return Confirm.ask("[bold]Proceed?[/bold]", default=False) @@ -932,6 +1045,7 @@ def print_confirmation(stats: Stats, args) -> bool: # Main # --------------------------------------------------------------------------- + async def main(): parser = argparse.ArgumentParser( description="Chronicle Cleanup & Backup Tool", @@ -947,14 +1061,37 @@ async def main(): """, ) - parser.add_argument("--backup", action="store_true", help="Create backup before cleaning") - parser.add_argument("--backup-only", action="store_true", help="Create backup WITHOUT cleaning (safe)") - parser.add_argument("--export-audio", action="store_true", help="Include audio WAV export in backup (conversations with transcripts only)") - parser.add_argument("--include-wav", action="store_true", help="Include legacy WAV file cleanup") - parser.add_argument("--dry-run", action="store_true", help="Preview without making changes") + parser.add_argument( + "--backup", action="store_true", help="Create backup before cleaning" + ) + parser.add_argument( + "--backup-only", + action="store_true", + help="Create backup WITHOUT cleaning (safe)", + ) + parser.add_argument( + "--export-audio", + action="store_true", + help="Include audio WAV export in backup (conversations with transcripts only)", + ) + parser.add_argument( + "--include-wav", action="store_true", help="Include legacy WAV file cleanup" + ) + parser.add_argument( + "--dry-run", action="store_true", help="Preview without making changes" + ) parser.add_argument("--force", action="store_true", help="Skip confirmation prompt") - parser.add_argument("--backup-dir", type=str, default="/app/data/backups", help="Backup directory (default: /app/data/backups)") - parser.add_argument("--delete-users", action="store_true", help="DANGEROUS: Also delete user accounts") + parser.add_argument( + "--backup-dir", + type=str, + default="/app/data/backups", + help="Backup directory (default: /app/data/backups)", + ) + parser.add_argument( + "--delete-users", + action="store_true", + help="DANGEROUS: Also delete user accounts", + ) args = parser.parse_args() @@ -968,11 +1105,15 @@ async def main(): # Connect with console.status("[bold cyan]Connecting to services...", spinner="dots"): - mongo_db, redis_conn, qdrant_client, neo4j_driver, langfuse_client = await connect_services() + mongo_db, redis_conn, qdrant_client, neo4j_driver, langfuse_client = ( + await connect_services() + ) # Gather stats with console.status("[bold cyan]Gathering statistics...", spinner="dots"): - stats = await gather_stats(mongo_db, redis_conn, qdrant_client, neo4j_driver, langfuse_client) + stats = await gather_stats( + mongo_db, redis_conn, qdrant_client, neo4j_driver, langfuse_client + ) console.print() console.print(render_stats_table(stats, "Current Backend State")) @@ -993,7 +1134,9 @@ async def main(): do_backup = args.backup or args.backup_only if do_backup: console.print() - backup_mgr = BackupManager(args.backup_dir, args.export_audio, mongo_db, neo4j_driver, langfuse_client) + backup_mgr = BackupManager( + args.backup_dir, args.export_audio, mongo_db, neo4j_driver, langfuse_client + ) result = await backup_mgr.run(qdrant_client, stats) console.print() @@ -1006,44 +1149,61 @@ async def main(): if not result.critical_ok: console.print() - console.print(Panel( - "[bold red]Critical backup exports failed![/bold red]\n" - "Conversations or audio metadata could not be exported.\n" - "Cleanup will NOT proceed to protect your data.", - title="Backup Verification Failed", - border_style="red", - )) + console.print( + Panel( + "[bold red]Critical backup exports failed![/bold red]\n" + "Conversations or audio metadata could not be exported.\n" + "Cleanup will NOT proceed to protect your data.", + title="Backup Verification Failed", + border_style="red", + ) + ) sys.exit(1) if not result.all_ok: console.print() - console.print("[yellow]Some non-critical exports failed (see table above).[/yellow]") + console.print( + "[yellow]Some non-critical exports failed (see table above).[/yellow]" + ) # If backup-only, we're done if args.backup_only: console.print() - console.print(Panel( - "[bold green]Backup completed successfully![/bold green]\n" - "No data was deleted.", - border_style="green", - )) + console.print( + Panel( + "[bold green]Backup completed successfully![/bold green]\n" + "No data was deleted.", + border_style="green", + ) + ) return # Cleanup console.print() cleanup_mgr = CleanupManager( - mongo_db, redis_conn, qdrant_client, args.include_wav, args.delete_users, neo4j_driver + mongo_db, + redis_conn, + qdrant_client, + args.include_wav, + args.delete_users, + neo4j_driver, ) success = await cleanup_mgr.run(stats) if not success: - console.print(Panel("[bold red]Cleanup encountered errors![/bold red]", border_style="red")) + console.print( + Panel( + "[bold red]Cleanup encountered errors![/bold red]", border_style="red" + ) + ) sys.exit(1) # Verify console.print() with console.status("[bold cyan]Verifying cleanup...", spinner="dots"): - final_stats = await gather_stats(mongo_db, redis_conn, qdrant_client, neo4j_driver, langfuse_client) + final_stats = await gather_stats( + mongo_db, redis_conn, qdrant_client, neo4j_driver, langfuse_client + ) console.print(render_stats_table(final_stats, "After Cleanup")) diff --git a/backends/advanced/tests/test_obsidian_service.py b/backends/advanced/tests/test_obsidian_service.py index 0daafc1a..e9408290 100644 --- a/backends/advanced/tests/test_obsidian_service.py +++ b/backends/advanced/tests/test_obsidian_service.py @@ -1,41 +1,56 @@ -import unittest import asyncio -from unittest.mock import MagicMock, patch, AsyncMock -import sys import os +import sys +import unittest +from unittest.mock import AsyncMock, MagicMock, patch -sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../src'))) +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../src"))) from advanced_omi_backend.services.obsidian_service import ( - ObsidianService, ObsidianSearchError, + ObsidianService, ) + class TestObsidianService(unittest.TestCase): def setUp(self): # Patch load_root_config - self.config_patcher = patch('advanced_omi_backend.services.obsidian_service.load_root_config') + self.config_patcher = patch( + "advanced_omi_backend.services.obsidian_service.load_root_config" + ) self.mock_load_config = self.config_patcher.start() self.mock_load_config.return_value = { - 'defaults': {'llm': 'gpt-4', 'embedding': 'text-embedding-3-small'}, - 'models': [ - {'name': 'gpt-4', 'model_url': 'https://api.openai.com/v1', 'api_key': 'sk-test'}, - {'name': 'text-embedding-3-small', 'model_name': 'text-embedding-3-small', 'embedding_dimensions': 1536, 'model_url': 'https://api.openai.com/v1', 'api_key': 'sk-test'} - ] + "defaults": {"llm": "gpt-4", "embedding": "text-embedding-3-small"}, + "models": [ + { + "name": "gpt-4", + "model_url": "https://api.openai.com/v1", + "api_key": "sk-test", + }, + { + "name": "text-embedding-3-small", + "model_name": "text-embedding-3-small", + "embedding_dimensions": 1536, + "model_url": "https://api.openai.com/v1", + "api_key": "sk-test", + }, + ], } self.addCleanup(self.config_patcher.stop) # Patch embedding helper self.embedding_patcher = patch( - 'advanced_omi_backend.services.obsidian_service.generate_openai_embeddings', - new_callable=AsyncMock + "advanced_omi_backend.services.obsidian_service.generate_openai_embeddings", + new_callable=AsyncMock, ) self.mock_generate_embeddings = self.embedding_patcher.start() self.addCleanup(self.embedding_patcher.stop) # Patch GraphDatabase - self.graph_db_patcher = patch('advanced_omi_backend.services.neo4j_client.GraphDatabase') + self.graph_db_patcher = patch( + "advanced_omi_backend.services.neo4j_client.GraphDatabase" + ) self.mock_graph_db = self.graph_db_patcher.start() self.mock_driver = MagicMock() self.mock_session = MagicMock() @@ -44,14 +59,17 @@ def setUp(self): self.addCleanup(self.graph_db_patcher.stop) # Patch environment variables - self.env_patcher = patch.dict(os.environ, { - "NEO4J_HOST": "localhost", - "NEO4J_USER": "neo4j", - "NEO4J_PASSWORD": "password" - }) + self.env_patcher = patch.dict( + os.environ, + { + "NEO4J_HOST": "localhost", + "NEO4J_USER": "neo4j", + "NEO4J_PASSWORD": "password", + }, + ) self.env_patcher.start() self.addCleanup(self.env_patcher.stop) - + # Initialize Service self.service = ObsidianService() @@ -59,45 +77,45 @@ def test_search_obsidian_success(self): # Setup mock embedding response mock_embedding = [0.1, 0.2, 0.3] self.mock_generate_embeddings.return_value = [mock_embedding] - + # Setup mock Neo4j results mock_record1 = { - 'source': 'Note1', - 'content': 'Content of chunk 1', - 'tags': ['tag1', 'tag2'], - 'outgoing_links': ['Note2'], - 'score': 0.95 + "source": "Note1", + "content": "Content of chunk 1", + "tags": ["tag1", "tag2"], + "outgoing_links": ["Note2"], + "score": 0.95, } mock_record2 = { - 'source': 'Note2', - 'content': 'Content of chunk 2', - 'tags': [], - 'outgoing_links': [], - 'score': 0.90 + "source": "Note2", + "content": "Content of chunk 2", + "tags": [], + "outgoing_links": [], + "score": 0.90, } - + # The session.run returns an iterable of records self.mock_session.run.return_value = [mock_record1, mock_record2] - + # Execute search response = asyncio.run(self.service.search_obsidian("test query", limit=2)) - + # Assertions # 1. Check embedding call self.mock_generate_embeddings.assert_awaited_once() - + # 2. Check Neo4j query execution self.mock_session.run.assert_called_once() args, kwargs = self.mock_session.run.call_args self.assertIn("CALL db.index.vector.queryNodes", args[0]) - self.assertEqual(kwargs['vector'], mock_embedding) - self.assertEqual(kwargs['limit'], 2) - + self.assertEqual(kwargs["vector"], mock_embedding) + self.assertEqual(kwargs["limit"], 2) + # 3. Check results formatting - self.assertEqual(len(response['results']), 2) - + self.assertEqual(len(response["results"]), 2) + # Check first result format - first_entry = response['results'][0] + first_entry = response["results"][0] self.assertIn("SOURCE: Note1", first_entry) self.assertIn("TAGS: tag1, tag2", first_entry) self.assertIn("RELATED NOTES: Note2", first_entry) @@ -105,19 +123,22 @@ def test_search_obsidian_success(self): def test_setup_database(self): self.service.setup_database() - + # Verify constraints and index creation calls self.assertTrue(self.mock_session.run.called) # It should run at least 3 queries: Note constraint, Chunk constraint, Vector Index self.assertGreaterEqual(self.mock_session.run.call_count, 3) - + calls = [call[0][0] for call in self.mock_session.run.call_args_list] self.assertTrue(any("CREATE CONSTRAINT note_path" in c for c in calls)) self.assertTrue(any("CREATE CONSTRAINT chunk_id" in c for c in calls)) self.assertTrue(any("CREATE VECTOR INDEX chunk_embeddings" in c for c in calls)) - @patch('advanced_omi_backend.services.obsidian_service.chunk_text_with_spacy') - def test_chunking_and_embedding_uses_shared_chunker(self, mock_chunker): + @patch( + "advanced_omi_backend.services.obsidian_service.semantic_chunk_text", + new_callable=AsyncMock, + ) + def test_chunking_and_embedding_uses_semantic_chunker(self, mock_chunker): mock_chunker.return_value = ["part1"] self.mock_generate_embeddings.return_value = [[0.1, 0.2]] note_data = { @@ -130,7 +151,18 @@ def test_chunking_and_embedding_uses_shared_chunker(self, mock_chunker): "tags": [], } chunks = asyncio.run(self.service.chunking_and_embedding(note_data)) - mock_chunker.assert_called_once_with("sample", max_tokens=self.service.chunk_word_limit) + mock_chunker.assert_awaited_once() + call_kwargs = mock_chunker.call_args + self.assertEqual( + call_kwargs[1]["buffer_size"], self.service.semantic_buffer_size + ) + self.assertEqual( + call_kwargs[1]["breakpoint_percentile_threshold"], + self.service.semantic_breakpoint_percentile, + ) + self.assertEqual( + call_kwargs[1]["max_chunk_words"], self.service.max_chunk_words + ) self.mock_generate_embeddings.assert_awaited_once() self.assertEqual(len(chunks), 1) @@ -142,21 +174,19 @@ def test_ingest_note_and_chunks(self): "content": "some content", "wordcount": 2, "links": ["OtherNote"], - "tags": ["tag1"] + "tags": ["tag1"], } - chunks = [ - {"text": "chunk1", "embedding": [0.1, 0.2]} - ] - + chunks = [{"text": "chunk1", "embedding": [0.1, 0.2]}] + self.service.ingest_note_and_chunks(note_data, chunks) - + # Verify DB calls # 1. Note + Folder merge # 2. Chunk merge # 3. Tag merge # 4. Link merge self.assertGreaterEqual(self.mock_session.run.call_count, 4) - + calls = [call[0][0] for call in self.mock_session.run.call_args_list] self.assertTrue(any("MERGE (f:Folder" in c for c in calls)) self.assertTrue(any("MERGE (c:Chunk" in c for c in calls)) @@ -166,10 +196,10 @@ def test_ingest_note_and_chunks(self): def test_search_obsidian_embedding_fail(self): # Mock embedding failure (raises exception) self.mock_generate_embeddings.side_effect = Exception("API Error") - + with self.assertRaises(ObsidianSearchError) as ctx: asyncio.run(self.service.search_obsidian("test query")) - + self.assertEqual(ctx.exception.stage, "embedding") self.assertIn("API Error", str(ctx.exception)) self.mock_session.run.assert_not_called() @@ -178,13 +208,13 @@ def test_search_obsidian_db_fail(self): # Setup mock embedding mock_embedding = [0.1] self.mock_generate_embeddings.return_value = [mock_embedding] - + # Mock DB failure self.mock_session.run.side_effect = Exception("DB Connection Failed") - + with self.assertRaises(ObsidianSearchError) as ctx: asyncio.run(self.service.search_obsidian("test query")) - + self.assertEqual(ctx.exception.stage, "database") self.assertIn("DB Connection Failed", str(ctx.exception)) @@ -192,13 +222,14 @@ def test_search_obsidian_empty_results(self): # Setup mock embedding mock_embedding = [0.1] self.mock_generate_embeddings.return_value = [mock_embedding] - + # Mock empty DB results self.mock_session.run.return_value = [] - + response = asyncio.run(self.service.search_obsidian("test query")) - - self.assertEqual(response['results'], []) -if __name__ == '__main__': + self.assertEqual(response["results"], []) + + +if __name__ == "__main__": unittest.main() diff --git a/backends/advanced/tests/test_text_chunking.py b/backends/advanced/tests/test_text_chunking.py new file mode 100644 index 00000000..62a32c6f --- /dev/null +++ b/backends/advanced/tests/test_text_chunking.py @@ -0,0 +1,376 @@ +"""Unit tests for semantic text chunking.""" + +import asyncio +import math +import os +import sys +import unittest +from unittest.mock import AsyncMock + +import numpy as np + +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../src"))) + +from advanced_omi_backend.utils.text_chunking import ( + _build_buffered_sentences, + _cosine_distances, + _enforce_max_chunk_words, + _find_breakpoints, + semantic_chunk_text, + split_sentences, +) + + +class TestSplitSentences(unittest.TestCase): + def test_basic_splitting(self): + text = "Hello world. How are you? I am fine!" + result = split_sentences(text) + self.assertEqual(result, ["Hello world.", "How are you?", "I am fine!"]) + + def test_single_sentence(self): + self.assertEqual(split_sentences("Just one sentence."), ["Just one sentence."]) + + def test_empty_string(self): + self.assertEqual(split_sentences(""), []) + self.assertEqual(split_sentences(" "), []) + + def test_no_terminal_punctuation(self): + result = split_sentences("No punctuation here") + self.assertEqual(result, ["No punctuation here"]) + + def test_multiple_spaces(self): + result = split_sentences("First sentence. Second sentence.") + self.assertEqual(len(result), 2) + + def test_newlines_split_sentences(self): + result = split_sentences("Hello world.\nNew line here.") + # Newline after punctuation splits into separate sentences + self.assertEqual(len(result), 2) + + def test_preserves_sentence_content(self): + text = "The temperature is 3.5 degrees. It is cold." + result = split_sentences(text) + self.assertEqual(len(result), 2) + + +class TestBuildBufferedSentences(unittest.TestCase): + def test_buffer_size_zero(self): + sentences = ["A.", "B.", "C."] + result = _build_buffered_sentences(sentences, buffer_size=0) + self.assertEqual(result, ["A.", "B.", "C."]) + + def test_buffer_size_one(self): + sentences = ["A.", "B.", "C.", "D."] + result = _build_buffered_sentences(sentences, buffer_size=1) + self.assertEqual(result[0], "A. B.") # [0:2] + self.assertEqual(result[1], "A. B. C.") # [0:3] + self.assertEqual(result[2], "B. C. D.") # [1:4] + self.assertEqual(result[3], "C. D.") # [2:4] + + def test_single_sentence(self): + result = _build_buffered_sentences(["Only one."], buffer_size=1) + self.assertEqual(result, ["Only one."]) + + +class TestCosineDistances(unittest.TestCase): + def test_identical_vectors(self): + embeddings = [[1.0, 0.0], [1.0, 0.0], [1.0, 0.0]] + distances = _cosine_distances(embeddings) + self.assertEqual(len(distances), 2) + for d in distances: + self.assertAlmostEqual(d, 0.0, places=6) + + def test_orthogonal_vectors(self): + embeddings = [[1.0, 0.0], [0.0, 1.0]] + distances = _cosine_distances(embeddings) + self.assertAlmostEqual(distances[0], 1.0, places=6) + + def test_opposite_vectors(self): + embeddings = [[1.0, 0.0], [-1.0, 0.0]] + distances = _cosine_distances(embeddings) + self.assertAlmostEqual(distances[0], 2.0, places=6) + + def test_known_values(self): + # Two similar, then one different + embeddings = [[1.0, 0.0], [0.95, 0.05], [0.0, 1.0]] + distances = _cosine_distances(embeddings) + self.assertEqual(len(distances), 2) + # First pair should be close (small distance) + self.assertLess(distances[0], 0.1) + # Second pair should be far (large distance) + self.assertGreater(distances[1], 0.5) + + def test_zero_vector_handling(self): + embeddings = [[0.0, 0.0], [1.0, 0.0]] + distances = _cosine_distances(embeddings) + # Zero vector gets norm=1 (no division by zero) + self.assertEqual(len(distances), 1) + + +class TestFindBreakpoints(unittest.TestCase): + def test_clear_breakpoint(self): + # Low distances except one spike + distances = [0.01, 0.02, 0.01, 0.9, 0.01, 0.02] + breakpoints = _find_breakpoints(distances, 90.0) + self.assertIn(3, breakpoints) + + def test_no_breakpoints_uniform(self): + distances = [0.1, 0.1, 0.1, 0.1] + breakpoints = _find_breakpoints(distances, 95.0) + # With all equal distances, the 95th percentile = 0.1, and we need > threshold + self.assertEqual(breakpoints, []) + + def test_empty_distances(self): + self.assertEqual(_find_breakpoints([], 95.0), []) + + def test_single_distance(self): + breakpoints = _find_breakpoints([0.5], 50.0) + # 50th percentile of [0.5] = 0.5; nothing is > 0.5 + self.assertEqual(breakpoints, []) + + +class TestEnforceMaxChunkWords(unittest.TestCase): + def test_no_split_needed(self): + chunks = ["short chunk", "another one"] + result = _enforce_max_chunk_words(chunks, max_words=10) + self.assertEqual(result, chunks) + + def test_split_long_chunk(self): + long_chunk = " ".join(f"word{i}" for i in range(20)) + result = _enforce_max_chunk_words([long_chunk], max_words=10) + self.assertEqual(len(result), 2) + self.assertEqual(len(result[0].split()), 10) + self.assertEqual(len(result[1].split()), 10) + + def test_empty_chunks(self): + self.assertEqual(_enforce_max_chunk_words([], max_words=10), []) + + +class TestSemanticChunkText(unittest.TestCase): + def test_empty_text(self): + embed_fn = AsyncMock() + result = asyncio.run(semantic_chunk_text("", embed_fn)) + self.assertEqual(result, []) + embed_fn.assert_not_awaited() + + def test_single_sentence_returns_whole_text(self): + embed_fn = AsyncMock() + result = asyncio.run(semantic_chunk_text("Just one sentence.", embed_fn)) + self.assertEqual(result, ["Just one sentence."]) + embed_fn.assert_not_awaited() + + def test_two_sentences_returns_whole_text(self): + embed_fn = AsyncMock() + text = "First sentence. Second sentence." + result = asyncio.run(semantic_chunk_text(text, embed_fn)) + self.assertEqual(result, [text]) + embed_fn.assert_not_awaited() + + def test_topic_transition_detected(self): + """Three sentences: first two similar, third different. Should split.""" + + async def mock_embed(texts): + embeddings = [] + for t in texts: + if "weather" in t.lower(): + embeddings.append([1.0, 0.0, 0.0]) + else: + embeddings.append([0.0, 0.0, 1.0]) + return embeddings + + text = ( + "The weather is nice. It is sunny today. Python is a programming language." + ) + result = asyncio.run( + semantic_chunk_text(text, mock_embed, breakpoint_percentile_threshold=50.0) + ) + # Should detect the topic transition + self.assertGreater(len(result), 1) + + def test_uniform_topic_single_chunk(self): + """All sentences on the same topic should stay together.""" + + async def mock_embed(texts): + return [[1.0, 0.0, 0.0]] * len(texts) + + text = "Dogs are great. Dogs are loyal. Dogs are friendly." + result = asyncio.run(semantic_chunk_text(text, mock_embed)) + self.assertEqual(len(result), 1) + + def test_embed_fn_failure_returns_single_chunk(self): + """If embedding fails, fall back to returning text as single chunk.""" + + async def failing_embed(texts): + raise RuntimeError("API error") + + text = "First sentence. Second sentence. Third sentence." + result = asyncio.run(semantic_chunk_text(text, failing_embed)) + self.assertEqual(result, [text]) + + def test_max_chunk_words_applied(self): + """Long uniform text should still be split by max_chunk_words.""" + words = " ".join(f"word{i}." for i in range(100)) + + async def mock_embed(texts): + return [[1.0, 0.0]] * len(texts) + + result = asyncio.run(semantic_chunk_text(words, mock_embed, max_chunk_words=30)) + for chunk in result: + self.assertLessEqual(len(chunk.split()), 30) + + def test_wrong_embedding_count_returns_single_chunk(self): + """If embed_fn returns wrong number of embeddings, fall back gracefully.""" + + async def wrong_count_embed(texts): + return [[1.0, 0.0]] # Always returns 1 regardless of input + + text = "First sentence. Second sentence. Third sentence." + result = asyncio.run(semantic_chunk_text(text, wrong_count_embed)) + self.assertEqual(result, [text]) + + +class TestSemanticChunkTextWithSentences(unittest.TestCase): + """Tests for the `sentences` and `join_str` parameters.""" + + def test_sentences_param_skips_split(self): + """Pre-split units should be used directly, not regex-split.""" + call_count = {"n": 0} + + async def mock_embed(texts): + call_count["n"] += 1 + # Return distinct embeddings so we can verify units are passed through + embeddings = [] + for i, _ in enumerate(texts): + vec = [0.0] * 3 + vec[i % 3] = 1.0 + embeddings.append(vec) + return embeddings + + # These dialogue turns have no sentence-ending punctuation — regex + # split_sentences would return them as a single unit. + turns = [ + "Alice: Hey how are you", + "Bob: I'm good thanks", + "Alice: Want to grab lunch", + "Bob: Sure let's go", + ] + text = "\n".join(turns) + result = asyncio.run( + semantic_chunk_text( + text, mock_embed, sentences=turns, breakpoint_percentile_threshold=50.0 + ) + ) + # embed_fn should have been called (4 units > 2 threshold) + self.assertEqual(call_count["n"], 1) + # Result should contain all turns (possibly grouped) + joined = " ".join(result) + for turn in turns: + self.assertIn(turn, joined) + + def test_join_str_newline_preserves_dialogue(self): + """With join_str='\\n', chunks should keep speaker labels on separate lines.""" + + async def same_topic_embed(texts): + return [[1.0, 0.0, 0.0]] * len(texts) + + turns = [ + "Alice: The project is on track", + "Bob: Great to hear", + "Alice: We should ship next week", + ] + text = "\n".join(turns) + result = asyncio.run( + semantic_chunk_text(text, same_topic_embed, sentences=turns, join_str="\n") + ) + # All same topic → single chunk with newlines + self.assertEqual(len(result), 1) + self.assertIn("\n", result[0]) + # Each turn should be on its own line + lines = result[0].split("\n") + self.assertEqual(len(lines), 3) + + def test_single_turn_returns_whole_text(self): + """A single dialogue turn should return the full text.""" + embed_fn = AsyncMock() + turns = ["Alice: Hello"] + text = "Alice: Hello" + result = asyncio.run(semantic_chunk_text(text, embed_fn, sentences=turns)) + self.assertEqual(result, [text]) + embed_fn.assert_not_awaited() + + def test_two_turns_returns_whole_text(self): + """Two dialogue turns should return the full text (below threshold).""" + embed_fn = AsyncMock() + turns = ["Alice: Hello", "Bob: Hi"] + text = "\n".join(turns) + result = asyncio.run(semantic_chunk_text(text, embed_fn, sentences=turns)) + self.assertEqual(result, [text]) + embed_fn.assert_not_awaited() + + def test_topic_transition_with_dialogue(self): + """Dialogue that switches topics should be split into separate chunks.""" + + async def mock_embed(texts): + embeddings = [] + for t in texts: + if "weather" in t.lower() or "sunny" in t.lower(): + embeddings.append([1.0, 0.0, 0.0]) + else: + embeddings.append([0.0, 0.0, 1.0]) + return embeddings + + turns = [ + "Alice: The weather is beautiful today", + "Bob: Yes it's very sunny outside", + "Alice: By the way I started learning Python", + "Bob: Oh that's a great programming language", + ] + text = "\n".join(turns) + result = asyncio.run( + semantic_chunk_text( + text, + mock_embed, + sentences=turns, + join_str="\n", + breakpoint_percentile_threshold=50.0, + ) + ) + self.assertGreater(len(result), 1) + + def test_empty_turns_filtered(self): + """Empty strings in sentences list should be filtered out.""" + embed_fn = AsyncMock() + turns = ["Alice: Hello", "", " ", "Bob: Hi"] + text = "Alice: Hello\nBob: Hi" + result = asyncio.run(semantic_chunk_text(text, embed_fn, sentences=turns)) + # After filtering: 2 units → returns whole text + self.assertEqual(result, [text]) + embed_fn.assert_not_awaited() + + def test_max_chunk_words_still_applied(self): + """The max_chunk_words safety valve should apply to dialogue chunks.""" + + async def same_topic(texts): + return [[1.0, 0.0]] * len(texts) + + # Each turn has ~10 words; 5 turns = ~50 words + turns = [ + f"Speaker: word {i} " + " ".join(f"w{j}" for j in range(8)) + for i in range(5) + ] + text = "\n".join(turns) + result = asyncio.run( + semantic_chunk_text( + text, + same_topic, + sentences=turns, + join_str="\n", + max_chunk_words=20, + ) + ) + for chunk in result: + self.assertLessEqual(len(chunk.split()), 20) + + +if __name__ == "__main__": + unittest.main() diff --git a/backends/advanced/webui/package-lock.json b/backends/advanced/webui/package-lock.json index c3bd503e..0b11b4a7 100644 --- a/backends/advanced/webui/package-lock.json +++ b/backends/advanced/webui/package-lock.json @@ -20,6 +20,7 @@ "d3-zoom": "^3.0.0", "framer-motion": "^11.0.0", "lucide-react": "^0.294.0", + "qrcode.react": "^4.2.0", "react": "^18.2.0", "react-dom": "^18.2.0", "react-router-dom": "^6.20.0" @@ -4538,6 +4539,15 @@ "node": ">=6" } }, + "node_modules/qrcode.react": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/qrcode.react/-/qrcode.react-4.2.0.tgz", + "integrity": "sha512-QpgqWi8rD9DsS9EP3z7BT+5lY5SFhsqGjpgW5DY/i3mK4M9DTBNz3ErMi8BWYEfI3L0d8GIbGmcdFAS1uIRGjA==", + "license": "ISC", + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" + } + }, "node_modules/queue-microtask": { "version": "1.2.3", "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", diff --git a/backends/advanced/webui/package.json b/backends/advanced/webui/package.json index 7c497790..64696777 100644 --- a/backends/advanced/webui/package.json +++ b/backends/advanced/webui/package.json @@ -13,7 +13,6 @@ "@tanstack/react-query": "^5.90.20", "axios": "^1.6.2", "clsx": "^2.0.0", - "framer-motion": "^11.0.0", "cronstrue": "^2.50.0", "d3-array": "^3.2.4", "d3-axis": "^3.0.0", @@ -21,7 +20,9 @@ "d3-selection": "^3.0.0", "d3-time-format": "^4.1.0", "d3-zoom": "^3.0.0", + "framer-motion": "^11.0.0", "lucide-react": "^0.294.0", + "qrcode.react": "^4.2.0", "react": "^18.2.0", "react-dom": "^18.2.0", "react-router-dom": "^6.20.0" diff --git a/backends/advanced/webui/src/App.tsx b/backends/advanced/webui/src/App.tsx index e660311f..cbb03ab4 100644 --- a/backends/advanced/webui/src/App.tsx +++ b/backends/advanced/webui/src/App.tsx @@ -23,6 +23,7 @@ const Queue = lazy(() => import('./pages/Queue')) const LiveRecord = lazy(() => import('./pages/LiveRecord')) const Plugins = lazy(() => import('./pages/Plugins')) const Finetuning = lazy(() => import('./pages/Finetuning')) +const ConnectApp = lazy(() => import('./pages/ConnectApp')) function PageSkeleton() { return ( @@ -159,6 +160,13 @@ function App() { } /> + + }> + + + + } /> diff --git a/backends/advanced/webui/src/components/PluginSettingsForm.tsx b/backends/advanced/webui/src/components/PluginSettingsForm.tsx index 718ade20..0f59a3c2 100644 --- a/backends/advanced/webui/src/components/PluginSettingsForm.tsx +++ b/backends/advanced/webui/src/components/PluginSettingsForm.tsx @@ -15,8 +15,9 @@ interface PluginMetadata { enabled: boolean events: string[] condition: { - type: 'always' | 'wake_word' + type: 'always' | 'wake_word' | 'keyword_anywhere' wake_words?: string[] + keywords?: string[] } } config_schema: { @@ -30,8 +31,9 @@ interface PluginConfig { enabled: boolean events: string[] condition: { - type: 'always' | 'wake_word' + type: 'always' | 'wake_word' | 'keyword_anywhere' wake_words?: string[] + keywords?: string[] } } settings: Record diff --git a/backends/advanced/webui/src/components/layout/Layout.tsx b/backends/advanced/webui/src/components/layout/Layout.tsx index 630fb0d2..a182e033 100644 --- a/backends/advanced/webui/src/components/layout/Layout.tsx +++ b/backends/advanced/webui/src/components/layout/Layout.tsx @@ -1,5 +1,5 @@ import { Link, useLocation, Outlet } from 'react-router-dom' -import { Music, MessageSquare, MessageCircle, Brain, Users, Upload, Settings, LogOut, Sun, Moon, Shield, Radio, Layers, Puzzle, Zap, Activity } from 'lucide-react' +import { Music, MessageSquare, MessageCircle, Brain, Users, Upload, Settings, LogOut, Sun, Moon, Shield, Radio, Layers, Puzzle, Zap, Activity, Smartphone } from 'lucide-react' import { useAuth } from '../../contexts/AuthContext' import { useTheme } from '../../contexts/ThemeContext' import GlobalRecordingIndicator from './GlobalRecordingIndicator' @@ -16,6 +16,7 @@ export default function Layout() { { path: '/conversations', label: 'Conversations', icon: MessageSquare }, { path: '/memories', label: 'Memories', icon: Brain }, { path: '/users', label: 'User Management', icon: Users }, + { path: '/connect-app', label: 'Connect App', icon: Smartphone }, ...(isAdmin ? [ { path: '/upload', label: 'Upload Audio', icon: Upload }, { path: '/queue', label: 'Queue & Events', icon: Layers }, diff --git a/backends/advanced/webui/src/components/plugins/OrchestrationSection.tsx b/backends/advanced/webui/src/components/plugins/OrchestrationSection.tsx index ceaf51c8..16de8088 100644 --- a/backends/advanced/webui/src/components/plugins/OrchestrationSection.tsx +++ b/backends/advanced/webui/src/components/plugins/OrchestrationSection.tsx @@ -4,8 +4,9 @@ interface OrchestrationConfig { enabled: boolean events: string[] condition: { - type: 'always' | 'wake_word' + type: 'always' | 'wake_word' | 'keyword_anywhere' wake_words?: string[] + keywords?: string[] } } @@ -41,12 +42,13 @@ export default function OrchestrationSection({ onChange({ ...config, events }) } - const handleConditionTypeChange = (type: 'always' | 'wake_word') => { + const handleConditionTypeChange = (type: 'always' | 'wake_word' | 'keyword_anywhere') => { onChange({ ...config, condition: { type, - wake_words: type === 'wake_word' ? config.condition.wake_words || [] : undefined + wake_words: type === 'wake_word' ? config.condition.wake_words || [] : undefined, + keywords: type === 'keyword_anywhere' ? config.condition.keywords || [] : undefined } }) } @@ -62,6 +64,17 @@ export default function OrchestrationSection({ }) } + const handleKeywordsChange = (value: string) => { + const keywords = value.split(',').map((w) => w.trim()).filter(Boolean) + onChange({ + ...config, + condition: { + ...config.condition, + keywords + } + }) + } + return (
{/* Section Header */} @@ -182,7 +195,7 @@ export default function OrchestrationSection({ Always

- Execute on every matching event + Execute on every matching event, no filtering

@@ -209,10 +222,40 @@ export default function OrchestrationSection({ />
- Wake Word + Wake Word (start of sentence) + +

+ Triggers when the transcript starts with the wake word +

+
+ + + @@ -239,7 +282,32 @@ export default function OrchestrationSection({ className="w-full px-3 py-2 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-gray-100 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-blue-500 disabled:opacity-50 disabled:cursor-not-allowed" />

- Comma-separated list of wake words (case-insensitive) + Comma-separated list of wake words. The transcript must start with one of these words (case-insensitive). +

+ + )} + + {/* Keywords Input (conditional) */} + {config.condition.type === 'keyword_anywhere' && ( +
+ + !disabled && handleKeywordsChange(e.target.value)} + placeholder="e.g., vivi, hey chronicle" + disabled={disabled} + className="w-full px-3 py-2 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-gray-100 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-blue-500 disabled:opacity-50 disabled:cursor-not-allowed" + /> +

+ Comma-separated list of keywords. Triggers when any keyword appears anywhere in the transcript (case-insensitive).

)} diff --git a/backends/advanced/webui/src/components/plugins/PluginConfigPanel.tsx b/backends/advanced/webui/src/components/plugins/PluginConfigPanel.tsx index 13c19542..a58eb023 100644 --- a/backends/advanced/webui/src/components/plugins/PluginConfigPanel.tsx +++ b/backends/advanced/webui/src/components/plugins/PluginConfigPanel.tsx @@ -22,8 +22,9 @@ interface PluginConfig { enabled: boolean events: string[] condition: { - type: 'always' | 'wake_word' + type: 'always' | 'wake_word' | 'keyword_anywhere' wake_words?: string[] + keywords?: string[] } } settings: Record diff --git a/backends/advanced/webui/src/pages/ConnectApp.tsx b/backends/advanced/webui/src/pages/ConnectApp.tsx new file mode 100644 index 00000000..768cdb6e --- /dev/null +++ b/backends/advanced/webui/src/pages/ConnectApp.tsx @@ -0,0 +1,120 @@ +import { useState } from 'react' +import { QRCodeSVG } from 'qrcode.react' +import { Smartphone, Copy, Check } from 'lucide-react' +import { useTheme } from '../contexts/ThemeContext' + +function getBackendHttpUrl(): string { + const { protocol, hostname, port } = window.location + + const isStandardPort = + (protocol === 'https:' && (port === '' || port === '443')) || + (protocol === 'http:' && (port === '' || port === '80')) + + const basePath = import.meta.env.BASE_URL + if (isStandardPort && basePath && basePath !== '/') { + // Caddy path-based routing — return full origin + return `${protocol}//${hostname}` + } + + if (import.meta.env.VITE_BACKEND_URL) { + const url = import.meta.env.VITE_BACKEND_URL as string + // If it's a relative URL, make it absolute + if (url.startsWith('/') || url === '') { + return `${protocol}//${hostname}${port ? `:${port}` : ''}` + } + return url + } + + if (isStandardPort) { + return `${protocol}//${hostname}` + } + + if (port === '5173') { + return `${protocol}//${hostname}:8000` + } + + return `${protocol}//${hostname}${port ? `:${port}` : ''}` +} + +export default function ConnectApp() { + const { isDark } = useTheme() + const [copied, setCopied] = useState(false) + const backendUrl = getBackendHttpUrl() + + const handleCopy = async () => { + try { + await navigator.clipboard.writeText(backendUrl) + setCopied(true) + setTimeout(() => setCopied(false), 2000) + } catch { + // Fallback for older browsers + const textArea = document.createElement('textarea') + textArea.value = backendUrl + document.body.appendChild(textArea) + textArea.select() + document.execCommand('copy') + document.body.removeChild(textArea) + setCopied(true) + setTimeout(() => setCopied(false), 2000) + } + } + + return ( +
+
+ +

+ Connect App +

+
+ +

+ Scan this QR code with the Chronicle mobile app to connect it to your backend. +

+ + {/* QR Code */} +
+
+ +
+ + {/* URL display + copy */} +
+ + {backendUrl} + + +
+
+ + {/* Instructions */} +
+

+ How to connect +

+
    +
  1. Open the Chronicle app on your phone
  2. +
  3. Go to Settings and tap Scan QR Code
  4. +
  5. Point your camera at the QR code above
  6. +
  7. The backend URL will be configured automatically
  8. +
+
+
+ ) +} diff --git a/backends/advanced/webui/src/pages/ConversationDetail.tsx b/backends/advanced/webui/src/pages/ConversationDetail.tsx index a7ec7fa4..c32465c9 100644 --- a/backends/advanced/webui/src/pages/ConversationDetail.tsx +++ b/backends/advanced/webui/src/pages/ConversationDetail.tsx @@ -3,7 +3,7 @@ import { useParams, useNavigate } from 'react-router-dom' import { useQueryClient } from '@tanstack/react-query' import { ArrowLeft, Calendar, User, Trash2, RefreshCw, MoreVertical, - RotateCcw, Zap, Play, Pause, + RotateCcw, Zap, Play, Pause, Download, Save, X, Pencil, Brain, Clock, Database, Layers, Star, BarChart3 } from 'lucide-react' import { annotationsApi, speakerApi, systemApi, BACKEND_URL } from '../services/api' @@ -333,6 +333,27 @@ export default function ConversationDetail() { } // Action handlers + const handleDownloadAudio = async () => { + if (!id) return + setOpenDropdown(false) + try { + const token = localStorage.getItem(getStorageKey('token')) || '' + const resp = await fetch(`${BACKEND_URL}/api/audio/get_audio/${id}`, { + headers: { Authorization: `Bearer ${token}` }, + }) + if (!resp.ok) throw new Error(`Download failed: ${resp.status}`) + const blob = await resp.blob() + const url = URL.createObjectURL(blob) + const a = document.createElement('a') + a.href = url + a.download = `${conversation?.title || id}.wav` + a.click() + URL.revokeObjectURL(url) + } catch (err: any) { + setActionError(`Failed to download audio: ${err.message || 'Unknown error'}`) + } + } + const handleDelete = async () => { if (!id) return const confirmed = window.confirm('Are you sure you want to delete this conversation?') @@ -658,6 +679,15 @@ export default function ConversationDetail() { {reprocessingSpeakers ? : } Reprocess Speakers + {conversation.audio_chunks_count && conversation.audio_chunks_count > 0 && ( + + )}
+ + + + {/* Raw Data (Debug) */} {readinessData && (
From 4559eda70d79b50720543b5712f48e05ab9fb349 Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Wed, 25 Feb 2026 18:34:19 +0530 Subject: [PATCH 3/4] Update application configuration and enhance device management features - Changed app slug from "chronicle" to "friend-lite-app" in app.json for better branding. - Added "ITSAppUsesNonExemptEncryption" key to app.json for compliance with App Store requirements. - Introduced a new "reconnect_backoff" event type in ConnectionLogContext for improved connection handling. - Enhanced diagnostics with a new reconnect backoff color in diagnostics.tsx. - Updated device filtering logic in index.tsx to include "neo" devices, improving user experience. - Added device type detection in DeviceListItem component to visually differentiate between device types. - Implemented retry connection UI feedback in index.tsx to inform users during reconnection attempts. - Updated useAutoReconnect hook to manage connection retry logic and backoff timing effectively. - Added utility function for device type detection to streamline device management. --- app/app.json | 10 +- app/app/diagnostics.tsx | 1 + app/app/index.tsx | 43 +++++- app/package-lock.json | 38 ++--- app/src/components/DeviceListItem.tsx | 18 +++ app/src/contexts/ConnectionLogContext.tsx | 1 + app/src/hooks/useAudioStreamer.ts | 1 + .../hooks/useAudioStreamingOrchestrator.ts | 8 +- app/src/hooks/useAutoReconnect.ts | 139 +++++++++++++++++- app/src/utils/deviceType.ts | 8 + 10 files changed, 232 insertions(+), 35 deletions(-) create mode 100644 app/src/utils/deviceType.ts diff --git a/app/app.json b/app/app.json index d2bf04ec..237eab68 100644 --- a/app/app.json +++ b/app/app.json @@ -1,7 +1,7 @@ { "expo": { "name": "chronicle", - "slug": "chronicle", + "slug": "friend-lite-app", "version": "1.0.0", "scheme": "chronicle", "orientation": "portrait", @@ -24,7 +24,8 @@ "NSAppTransportSecurity": { "NSAllowsArbitraryLoads": true, "NSAllowsLocalNetworking": true - } + }, + "ITSAppUsesNonExemptEncryption": false } }, "android": { @@ -55,7 +56,9 @@ "enableNotifications": true, "enableBackgroundAudio": true, "enableDeviceDetection": true, - "iosBackgroundModes": { "useProcessing": true }, + "iosBackgroundModes": { + "useProcessing": true + }, "iosConfig": { "microphoneUsageDescription": "We use the mic for live audio streaming" } @@ -107,6 +110,7 @@ "expo-image-picker", "./plugins/with-ats" ], + "owner": "cupbearer5517", "extra": { "eas": { "projectId": "05d8598e-6fe7-4373-81e4-1654f3d8e181" diff --git a/app/app/diagnostics.tsx b/app/app/diagnostics.tsx index 18ade000..c8c0d59a 100644 --- a/app/app/diagnostics.tsx +++ b/app/app/diagnostics.tsx @@ -17,6 +17,7 @@ const EVENT_BADGE_COLORS: Record = { error: '#FF3B30', health_ping: '#34C759', reconnect_attempt: '#FF9500', + reconnect_backoff: '#FF9500', bt_state_change: '#5856D6', }; diff --git a/app/app/index.tsx b/app/app/index.tsx index 636e475e..1c4dc58e 100644 --- a/app/app/index.tsx +++ b/app/app/index.tsx @@ -117,16 +117,17 @@ export default function App() { const canScan = React.useMemo(() => ( permissionGranted && bluetoothState === BluetoothState.PoweredOn && - !autoReconnect.isAttemptingAutoReconnect && !deviceConnection.isConnecting && + !autoReconnect.isAttemptingAutoReconnect && !autoReconnect.isRetryingConnection && + !deviceConnection.isConnecting && !deviceConnection.connectedDeviceId && (autoReconnect.triedAutoReconnectForCurrentId || !autoReconnect.lastKnownDeviceId) - ), [permissionGranted, bluetoothState, autoReconnect.isAttemptingAutoReconnect, deviceConnection.isConnecting, deviceConnection.connectedDeviceId, autoReconnect.triedAutoReconnectForCurrentId, autoReconnect.lastKnownDeviceId]); + ), [permissionGranted, bluetoothState, autoReconnect.isAttemptingAutoReconnect, autoReconnect.isRetryingConnection, deviceConnection.isConnecting, deviceConnection.connectedDeviceId, autoReconnect.triedAutoReconnectForCurrentId, autoReconnect.lastKnownDeviceId]); const filteredDevices = React.useMemo(() => { if (!showOnlyOmi) return scannedDevices; return scannedDevices.filter(d => { const name = d.name?.toLowerCase() || ''; - return name.includes('omi') || name.includes('friend'); + return name.includes('omi') || name.includes('friend') || name.includes('neo'); }); }, [scannedDevices, showOnlyOmi]); @@ -190,6 +191,21 @@ export default function App() { + {autoReconnect.isRetryingConnection && ( + + + + Reconnecting in {autoReconnect.retryBackoffSeconds}s... (attempt {autoReconnect.connectionRetryCount}) + + + Cancel + + + )} + {!settings.isAuthenticated && ( Login is required for advanced backend features. Simple backend can be used without authentication. @@ -201,7 +217,7 @@ export default function App() { Found Devices - Show only OMI/Friend + Show only OMI/Friend/Neo - {showOnlyOmi ? `No OMI/Friend devices found. ${scannedDevices.length} other device(s) hidden by filter.` : 'No devices found.'} + {showOnlyOmi ? `No OMI/Friend/Neo devices found. ${scannedDevices.length} other device(s) hidden by filter.` : 'No devices found.'} )} @@ -408,6 +424,23 @@ const createStyles = (colors: ThemeColors) => StyleSheet.create({ textAlign: 'center', fontStyle: 'italic', }, + retryBanner: { + flexDirection: 'row', + alignItems: 'center', + padding: 12, + marginBottom: 15, + backgroundColor: colors.card, + borderRadius: 8, + borderWidth: 1, + borderColor: colors.warning, + }, + retryBannerText: { + flex: 1, + marginLeft: 10, + fontSize: 14, + color: colors.warning, + fontWeight: '500', + }, authWarning: { marginBottom: 20, padding: 15, diff --git a/app/package-lock.json b/app/package-lock.json index c4ceb0c8..27c99921 100644 --- a/app/package-lock.json +++ b/app/package-lock.json @@ -82,6 +82,7 @@ "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.28.4.tgz", "integrity": "sha512-2BCOP7TN8M+gVDj7/ht3hsaO/B/n5oDbiAyyvnRlNOs+u1o+JWNYTQrmpuNp1/Wq2gcFrI01JAW+paEKDMx/CA==", "license": "MIT", + "peer": true, "dependencies": { "@babel/code-frame": "^7.27.1", "@babel/generator": "^7.28.3", @@ -2814,6 +2815,7 @@ "integrity": "sha512-Q7UnBqOO/JsWfgmO9qZjrKgMi/0U9ih0FywXXheml8VH1hn/pBXKIeO/BvzA6g5gHIvBZ/6KyhdGoNok1R/ZJw==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "@react-native-community/cli-clean": "20.0.1", "@react-native-community/cli-config": "20.0.1", @@ -3429,6 +3431,7 @@ "resolved": "https://registry.npmjs.org/@react-navigation/native/-/native-7.1.28.tgz", "integrity": "sha512-d1QDn+KNHfHGt3UIwOZvupvdsDdiHYZBEj7+wL2yDVo3tMezamYy60H9s3EnNVE1Ae1ty0trc7F2OKqo/RmsdQ==", "license": "MIT", + "peer": true, "dependencies": { "@react-navigation/core": "^7.14.0", "escape-string-regexp": "^4.0.0", @@ -3624,6 +3627,7 @@ "integrity": "sha512-ixLZ7zG7j1fM0DijL9hDArwhwcCb4vqmePgwtV0GfnkHRSCUEv4LvzarcTdhoqgyMznUx/EhoTUv31CKZzkQlw==", "devOptional": true, "license": "MIT", + "peer": true, "dependencies": { "csstype": "^3.0.2" } @@ -3748,6 +3752,7 @@ "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", "license": "MIT", + "peer": true, "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", @@ -4331,6 +4336,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "caniuse-lite": "^1.0.30001737", "electron-to-chromium": "^1.5.211", @@ -5286,6 +5292,7 @@ "resolved": "https://registry.npmjs.org/expo/-/expo-53.0.22.tgz", "integrity": "sha512-sJ2I4W/e5iiM4u/wYCe3qmW4D7WPCRqByPDD0hJcdYNdjc9HFFFdO4OAudZVyC/MmtoWZEIH5kTJP1cw9FjzYA==", "license": "MIT", + "peer": true, "dependencies": { "@babel/runtime": "^7.20.0", "@expo/cli": "0.24.21", @@ -5394,6 +5401,7 @@ "resolved": "https://registry.npmjs.org/expo-constants/-/expo-constants-17.1.7.tgz", "integrity": "sha512-byBjGsJ6T6FrLlhOBxw4EaiMXrZEn/MlUYIj/JAd+FS7ll5X/S4qVRbIimSJtdW47hXMq0zxPfJX6njtA56hHA==", "license": "MIT", + "peer": true, "dependencies": { "@expo/config": "~11.0.12", "@expo/env": "~1.0.7" @@ -5486,6 +5494,7 @@ "resolved": "https://registry.npmjs.org/expo-font/-/expo-font-13.3.2.tgz", "integrity": "sha512-wUlMdpqURmQ/CNKK/+BIHkDA5nGjMqNlYmW0pJFXY/KE/OG80Qcavdu2sHsL4efAIiNGvYdBS10WztuQYU4X0A==", "license": "MIT", + "peer": true, "dependencies": { "fontfaceobserver": "^2.1.0" }, @@ -5536,7 +5545,6 @@ "resolved": "https://registry.npmjs.org/expo-linking/-/expo-linking-8.0.11.tgz", "integrity": "sha512-+VSaNL5om3kOp/SSKO5qe6cFgfSIWnnQDSbA7XLs3ECkYzXRquk5unxNS3pg7eK5kNUmQ4kgLI7MhTggAEUBLA==", "license": "MIT", - "peer": true, "dependencies": { "expo-constants": "~18.0.12", "invariant": "^2.2.4" @@ -5551,7 +5559,6 @@ "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.10.4.tgz", "integrity": "sha512-vG6SvB6oYEhvgisZNFRmRCUkLz11c7rp+tbNTynGqc6mS1d5ATd/sGyV6W0KZZnXRKMTzZDRgQT3Ou9jhpAfUg==", "license": "MIT", - "peer": true, "dependencies": { "@babel/highlight": "^7.10.4" } @@ -5561,7 +5568,6 @@ "resolved": "https://registry.npmjs.org/@expo/config/-/config-12.0.13.tgz", "integrity": "sha512-Cu52arBa4vSaupIWsF0h7F/Cg//N374nYb7HAxV0I4KceKA7x2UXpYaHOL7EEYYvp7tZdThBjvGpVmr8ScIvaQ==", "license": "MIT", - "peer": true, "dependencies": { "@babel/code-frame": "~7.10.4", "@expo/config-plugins": "~54.0.4", @@ -5583,7 +5589,6 @@ "resolved": "https://registry.npmjs.org/@expo/config-plugins/-/config-plugins-54.0.4.tgz", "integrity": "sha512-g2yXGICdoOw5i3LkQSDxl2Q5AlQCrG7oniu0pCPPO+UxGb7He4AFqSvPSy8HpRUj55io17hT62FTjYRD+d6j3Q==", "license": "MIT", - "peer": true, "dependencies": { "@expo/config-types": "^54.0.10", "@expo/json-file": "~10.0.8", @@ -5605,15 +5610,13 @@ "version": "54.0.10", "resolved": "https://registry.npmjs.org/@expo/config-types/-/config-types-54.0.10.tgz", "integrity": "sha512-/J16SC2an1LdtCZ67xhSkGXpALYUVUNyZws7v+PVsFZxClYehDSoKLqyRaGkpHlYrCc08bS0RF5E0JV6g50psA==", - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/expo-linking/node_modules/@expo/env": { "version": "2.0.8", "resolved": "https://registry.npmjs.org/@expo/env/-/env-2.0.8.tgz", "integrity": "sha512-5VQD6GT8HIMRaSaB5JFtOXuvfDVU80YtZIuUT/GDhUF782usIXY13Tn3IdDz1Tm/lqA9qnRZQ1BF4t7LlvdJPA==", "license": "MIT", - "peer": true, "dependencies": { "chalk": "^4.0.0", "debug": "^4.3.4", @@ -5627,7 +5630,6 @@ "resolved": "https://registry.npmjs.org/@expo/json-file/-/json-file-10.0.8.tgz", "integrity": "sha512-9LOTh1PgKizD1VXfGQ88LtDH0lRwq9lsTb4aichWTWSWqy3Ugfkhfm3BhzBIkJJfQQ5iJu3m/BoRlEIjoCGcnQ==", "license": "MIT", - "peer": true, "dependencies": { "@babel/code-frame": "~7.10.4", "json5": "^2.2.3" @@ -5638,7 +5640,6 @@ "resolved": "https://registry.npmjs.org/@expo/plist/-/plist-0.4.8.tgz", "integrity": "sha512-pfNtErGGzzRwHP+5+RqswzPDKkZrx+Cli0mzjQaus1ZWFsog5ibL+nVT3NcporW51o8ggnt7x813vtRbPiyOrQ==", "license": "MIT", - "peer": true, "dependencies": { "@xmldom/xmldom": "^0.8.8", "base64-js": "^1.2.3", @@ -5650,7 +5651,6 @@ "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz", "integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==", "license": "MIT", - "peer": true, "engines": { "node": "18 || 20 || >=22" } @@ -5660,7 +5660,6 @@ "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.3.tgz", "integrity": "sha512-fy6KJm2RawA5RcHkLa1z/ScpBeA762UF9KmZQxwIbDtRJrgLzM10depAiEQ+CXYcoiqW1/m96OAAoke2nE9EeA==", "license": "MIT", - "peer": true, "dependencies": { "balanced-match": "^4.0.2" }, @@ -5673,7 +5672,6 @@ "resolved": "https://registry.npmjs.org/commander/-/commander-4.1.1.tgz", "integrity": "sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA==", "license": "MIT", - "peer": true, "engines": { "node": ">= 6" } @@ -5683,7 +5681,6 @@ "resolved": "https://registry.npmjs.org/expo-constants/-/expo-constants-18.0.13.tgz", "integrity": "sha512-FnZn12E1dRYKDHlAdIyNFhBurKTS3F9CrfrBDJI5m3D7U17KBHMQ6JEfYlSj7LG7t+Ulr+IKaj58L1k5gBwTcQ==", "license": "MIT", - "peer": true, "dependencies": { "@expo/config": "~12.0.13", "@expo/env": "~2.0.8" @@ -5698,7 +5695,6 @@ "resolved": "https://registry.npmjs.org/glob/-/glob-13.0.6.tgz", "integrity": "sha512-Wjlyrolmm8uDpm/ogGyXZXb1Z+Ca2B8NbJwqBVg0axK9GbBeoS7yGV6vjXnYdGm6X53iehEuxxbyiKp8QmN4Vw==", "license": "BlueOak-1.0.0", - "peer": true, "dependencies": { "minimatch": "^10.2.2", "minipass": "^7.1.3", @@ -5716,7 +5712,6 @@ "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.2.6.tgz", "integrity": "sha512-ESL2CrkS/2wTPfuend7Zhkzo2u0daGJ/A2VucJOgQ/C48S/zB8MMeMHSGKYpXhIjbPxfuezITkaBH1wqv00DDQ==", "license": "BlueOak-1.0.0", - "peer": true, "engines": { "node": "20 || >=22" } @@ -5726,7 +5721,6 @@ "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.2.tgz", "integrity": "sha512-+G4CpNBxa5MprY+04MbgOw1v7So6n5JY166pFi9KfYwT78fxScCeSNQSNzp6dpPSW2rONOps6Ocam1wFhCgoVw==", "license": "BlueOak-1.0.0", - "peer": true, "dependencies": { "brace-expansion": "^5.0.2" }, @@ -5742,7 +5736,6 @@ "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-2.0.2.tgz", "integrity": "sha512-3O/iVVsJAPsOnpwWIeD+d6z/7PmqApyQePUtCndjatj/9I5LylHvt5qluFaBT3I5h3r1ejfR056c+FCv+NnNXg==", "license": "BlueOak-1.0.0", - "peer": true, "dependencies": { "lru-cache": "^11.0.0", "minipass": "^7.1.2" @@ -5759,7 +5752,6 @@ "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", "license": "ISC", - "peer": true, "bin": { "semver": "bin/semver.js" }, @@ -5772,7 +5764,6 @@ "resolved": "https://registry.npmjs.org/sucrase/-/sucrase-3.35.1.tgz", "integrity": "sha512-DhuTmvZWux4H1UOnWMB3sk0sbaCVOoQZjv8u1rDoTV0HTdGem9hkAZtl4JZy8P2z4Bg0nT+YMeOFyVr4zcG5Tw==", "license": "MIT", - "peer": true, "dependencies": { "@jridgewell/gen-mapping": "^0.3.2", "commander": "^4.0.0", @@ -6020,7 +6011,6 @@ "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==", "license": "MIT", - "peer": true, "engines": { "node": ">=12.0.0" }, @@ -8698,6 +8688,7 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-3.0.1.tgz", "integrity": "sha512-I3EurrIQMlRc9IaAZnqRR044Phh2DXY+55o7uJ0V+hYZAcQYSuFWsc9q5PvyDHUSCe1Qxn/iBz+78s86zWnGag==", "license": "MIT", + "peer": true, "engines": { "node": ">=10" }, @@ -8986,6 +8977,7 @@ "resolved": "https://registry.npmjs.org/react/-/react-19.0.0.tgz", "integrity": "sha512-V8AVnmPIICiWpGfm6GLzCR/W5FXLchHop40W4nXBmdlEceh16rCN8O8LNWm5bh5XUX91fh7KpA+W0TgMKmgTpQ==", "license": "MIT", + "peer": true, "engines": { "node": ">=0.10.0" } @@ -9050,6 +9042,7 @@ "resolved": "https://registry.npmjs.org/react-native/-/react-native-0.79.6.tgz", "integrity": "sha512-kvIWSmf4QPfY41HC25TR285N7Fv0Pyn3DAEK8qRL9dA35usSaxsJkHfw+VqnonqJjXOaoKCEanwudRAJ60TBGA==", "license": "MIT", + "peer": true, "dependencies": { "@jest/create-cache-key-function": "^29.7.0", "@react-native/assets-registry": "0.79.6", @@ -9148,6 +9141,7 @@ "resolved": "https://registry.npmjs.org/react-native-safe-area-context/-/react-native-safe-area-context-5.4.0.tgz", "integrity": "sha512-JaEThVyJcLhA+vU0NU8bZ0a1ih6GiF4faZ+ArZLqpYbL6j7R3caRqj+mE3lEtKCuHgwjLg3bCxLL1GPUJZVqUA==", "license": "MIT", + "peer": true, "peerDependencies": { "react": "*", "react-native": "*" @@ -9158,6 +9152,7 @@ "resolved": "https://registry.npmjs.org/react-native-screens/-/react-native-screens-4.11.1.tgz", "integrity": "sha512-F0zOzRVa3ptZfLpD0J8ROdo+y1fEPw+VBFq1MTY/iyDu08al7qFUO5hLMd+EYMda5VXGaTFCa8q7bOppUszhJw==", "license": "MIT", + "peer": true, "dependencies": { "react-freeze": "^1.0.0", "react-native-is-edge-to-edge": "^1.1.7", @@ -10567,7 +10562,6 @@ "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz", "integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==", "license": "MIT", - "peer": true, "dependencies": { "fdir": "^6.5.0", "picomatch": "^4.0.3" @@ -10584,7 +10578,6 @@ "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz", "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==", "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -10663,6 +10656,7 @@ "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", "dev": true, "license": "Apache-2.0", + "peer": true, "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" diff --git a/app/src/components/DeviceListItem.tsx b/app/src/components/DeviceListItem.tsx index 33409b88..573dc30f 100644 --- a/app/src/components/DeviceListItem.tsx +++ b/app/src/components/DeviceListItem.tsx @@ -3,6 +3,7 @@ import { View, Text, TouchableOpacity, StyleSheet } from 'react-native'; import { OmiDevice } from 'friend-lite-react-native'; import { useTheme, ThemeColors } from '../theme'; import SignalStrength from './SignalStrength'; +import { detectDeviceType } from '../utils/deviceType'; interface DeviceListItemProps { device: OmiDevice; @@ -23,12 +24,18 @@ export const DeviceListItem: React.FC = ({ const s = createStyles(colors); const isThisDeviceConnected = connectedDeviceId === device.id; const isAnotherDeviceConnected = connectedDeviceId !== null && connectedDeviceId !== device.id; + const deviceType = detectDeviceType(device.name); return ( {device.name || 'Unknown Device'} + {deviceType !== 'unknown' && ( + + {deviceType === 'neo' ? 'Neo' : 'OMI'} + + )} ID: {device.id} @@ -85,6 +92,17 @@ const createStyles = (colors: ThemeColors) => StyleSheet.create({ color: colors.textSecondary, marginTop: 2, }, + deviceTypeBadge: { + marginLeft: 6, + paddingHorizontal: 6, + paddingVertical: 2, + borderRadius: 4, + }, + deviceTypeBadgeText: { + color: 'white', + fontSize: 10, + fontWeight: '700', + }, button: { backgroundColor: colors.primary, paddingVertical: 12, diff --git a/app/src/contexts/ConnectionLogContext.tsx b/app/src/contexts/ConnectionLogContext.tsx index 8493cc90..dc9314c4 100644 --- a/app/src/contexts/ConnectionLogContext.tsx +++ b/app/src/contexts/ConnectionLogContext.tsx @@ -24,6 +24,7 @@ export type ConnectionEventType = | 'error' | 'health_ping' | 'reconnect_attempt' + | 'reconnect_backoff' | 'bt_state_change'; const MAX_EVENTS = 200; diff --git a/app/src/hooks/useAudioStreamer.ts b/app/src/hooks/useAudioStreamer.ts index cfd86fd7..f22371c0 100644 --- a/app/src/hooks/useAudioStreamer.ts +++ b/app/src/hooks/useAudioStreamer.ts @@ -27,6 +27,7 @@ const AUDIO_FORMAT = { rate: 16000, width: 2, channels: 1, + mode: 'streaming', }; /** -------------------- Foreground Service helpers (NEW) -------------------- */ diff --git a/app/src/hooks/useAudioStreamingOrchestrator.ts b/app/src/hooks/useAudioStreamingOrchestrator.ts index 43e78511..bff49ee5 100644 --- a/app/src/hooks/useAudioStreamingOrchestrator.ts +++ b/app/src/hooks/useAudioStreamingOrchestrator.ts @@ -46,8 +46,14 @@ export const useAudioStreamingOrchestrator = ({ const buildWebSocketUrl = useCallback((baseUrl: string): string => { let url = baseUrl.trim(); - const isAdvanced = settings.jwtToken && settings.isAuthenticated; + url = url.replace(/^http:/, 'ws:').replace(/^https:/, 'wss:'); + if (!url.includes('/ws')) url = url.replace(/\/$/, '') + '/ws'; + if (!url.includes('codec=')) { + const sep = url.includes('?') ? '&' : '?'; + url = url + sep + 'codec=opus'; + } + const isAdvanced = settings.jwtToken && settings.isAuthenticated; if (isAdvanced) { const params = new URLSearchParams(); params.append('token', settings.jwtToken!); diff --git a/app/src/hooks/useAutoReconnect.ts b/app/src/hooks/useAutoReconnect.ts index c2581b99..c45a1b62 100644 --- a/app/src/hooks/useAutoReconnect.ts +++ b/app/src/hooks/useAutoReconnect.ts @@ -1,8 +1,12 @@ -import { useState, useEffect, useCallback } from 'react'; +import { useState, useEffect, useCallback, useRef } from 'react'; import { State as BluetoothState } from 'react-native-ble-plx'; import { saveLastConnectedDeviceId, getLastConnectedDeviceId } from '../utils/storage'; import { useConnectionLog } from '../contexts/ConnectionLogContext'; +const BACKOFF_INITIAL = 10000; // 10s +const BACKOFF_MAX = 300000; // 5 min +const MIN_HEALTHY_DURATION = 30000; // 30s + interface UseAutoReconnectParams { bluetoothState: BluetoothState; permissionGranted: boolean; @@ -19,6 +23,9 @@ export interface AutoReconnectState { lastKnownDeviceId: string | null; isAttemptingAutoReconnect: boolean; triedAutoReconnectForCurrentId: boolean; + isRetryingConnection: boolean; + retryBackoffSeconds: number; + connectionRetryCount: number; setLastKnownDeviceId: (id: string | null) => void; setTriedAutoReconnectForCurrentId: (tried: boolean) => void; handleCancelAutoReconnect: () => Promise; @@ -35,6 +42,35 @@ export const useAutoReconnect = ({ const [triedAutoReconnectForCurrentId, setTriedAutoReconnectForCurrentId] = useState(false); const { addEvent } = useConnectionLog(); + // Retry / backoff state + const [isRetryingConnection, setIsRetryingConnection] = useState(false); + const [retryBackoffSeconds, setRetryBackoffSeconds] = useState(0); + const [connectionRetryCount, setConnectionRetryCount] = useState(0); + const backoffMsRef = useRef(0); + const connectionStartTimeRef = useRef(null); + const retryTimerRef = useRef | null>(null); + const countdownTimerRef = useRef | null>(null); + const prevConnectedRef = useRef(null); + + const clearRetryTimers = useCallback(() => { + if (retryTimerRef.current) { + clearTimeout(retryTimerRef.current); + retryTimerRef.current = null; + } + if (countdownTimerRef.current) { + clearInterval(countdownTimerRef.current); + countdownTimerRef.current = null; + } + setIsRetryingConnection(false); + setRetryBackoffSeconds(0); + }, []); + + const resetBackoff = useCallback(() => { + backoffMsRef.current = 0; + setConnectionRetryCount(0); + clearRetryTimers(); + }, [clearRetryTimers]); + // Load last device on mount useEffect(() => { const load = async () => { @@ -50,7 +86,88 @@ export const useAutoReconnect = ({ load(); }, []); - // Auto-reconnect effect + // Track connection start/end for backoff calculation + useEffect(() => { + const currentConnected = deviceConnection.connectedDeviceId; + const prevConnected = prevConnectedRef.current; + prevConnectedRef.current = currentConnected; + + // Connection established + if (currentConnected && !prevConnected) { + connectionStartTimeRef.current = Date.now(); + clearRetryTimers(); + return; + } + + // Connection lost (unexpected disconnect) + if (!currentConnected && prevConnected && lastKnownDeviceId) { + const startTime = connectionStartTimeRef.current; + connectionStartTimeRef.current = null; + const duration = startTime ? Date.now() - startTime : 0; + + if (duration >= MIN_HEALTHY_DURATION) { + // Healthy connection — reset backoff + backoffMsRef.current = 0; + setConnectionRetryCount(0); + } else { + // Quick failure — increase backoff + if (backoffMsRef.current === 0) { + backoffMsRef.current = BACKOFF_INITIAL; + } else { + backoffMsRef.current = Math.min(backoffMsRef.current * 2, BACKOFF_MAX); + } + } + + const delay = backoffMsRef.current; + const deviceId = lastKnownDeviceId; + addEvent('reconnect_backoff', `Scheduling retry in ${delay / 1000}s (device: ${deviceId})`, { deviceId }); + + setIsRetryingConnection(true); + setRetryBackoffSeconds(Math.ceil(delay / 1000)); + setConnectionRetryCount(c => c + 1); + + // Countdown timer for UI + const countdownEnd = Date.now() + delay; + countdownTimerRef.current = setInterval(() => { + const remaining = Math.max(0, Math.ceil((countdownEnd - Date.now()) / 1000)); + setRetryBackoffSeconds(remaining); + if (remaining <= 0 && countdownTimerRef.current) { + clearInterval(countdownTimerRef.current); + countdownTimerRef.current = null; + } + }, 1000); + + // Schedule reconnect + retryTimerRef.current = setTimeout(async () => { + retryTimerRef.current = null; + if (countdownTimerRef.current) { + clearInterval(countdownTimerRef.current); + countdownTimerRef.current = null; + } + + if (!deviceId) { + setIsRetryingConnection(false); + return; + } + + setIsAttemptingAutoReconnect(true); + setIsRetryingConnection(false); + setRetryBackoffSeconds(0); + addEvent('reconnect_attempt', `Retrying connection to ${deviceId} (attempt ${connectionRetryCount})`, { deviceId }); + + try { + await deviceConnection.connectToDevice(deviceId); + } catch (error) { + console.error(`[AutoReconnect] Retry failed for ${deviceId}:`, error); + // Let the next disconnect cycle handle further retries + } finally { + setIsAttemptingAutoReconnect(false); + } + }, delay); + } + }, [deviceConnection.connectedDeviceId]); + + // Auto-reconnect on app launch (existing behavior) useEffect(() => { if ( bluetoothState === BluetoothState.PoweredOn && @@ -60,7 +177,8 @@ export const useAutoReconnect = ({ !deviceConnection.isConnecting && !scanning && !isAttemptingAutoReconnect && - !triedAutoReconnectForCurrentId + !triedAutoReconnectForCurrentId && + !isRetryingConnection ) { const attemptAutoConnect = async () => { setIsAttemptingAutoReconnect(true); @@ -83,6 +201,7 @@ export const useAutoReconnect = ({ deviceConnection.connectedDeviceId, deviceConnection.isConnecting, scanning, deviceConnection.connectToDevice, triedAutoReconnectForCurrentId, isAttemptingAutoReconnect, + isRetryingConnection, ]); const handleCancelAutoReconnect = useCallback(async () => { @@ -91,14 +210,26 @@ export const useAutoReconnect = ({ setLastKnownDeviceId(null); setTriedAutoReconnectForCurrentId(true); } + resetBackoff(); await deviceConnection.disconnectFromDevice(); setIsAttemptingAutoReconnect(false); - }, [deviceConnection, lastKnownDeviceId]); + }, [deviceConnection, lastKnownDeviceId, resetBackoff]); + + // Cleanup timers on unmount + useEffect(() => { + return () => { + if (retryTimerRef.current) clearTimeout(retryTimerRef.current); + if (countdownTimerRef.current) clearInterval(countdownTimerRef.current); + }; + }, []); return { lastKnownDeviceId, isAttemptingAutoReconnect, triedAutoReconnectForCurrentId, + isRetryingConnection, + retryBackoffSeconds, + connectionRetryCount, setLastKnownDeviceId, setTriedAutoReconnectForCurrentId, handleCancelAutoReconnect, diff --git a/app/src/utils/deviceType.ts b/app/src/utils/deviceType.ts new file mode 100644 index 00000000..e0648e4c --- /dev/null +++ b/app/src/utils/deviceType.ts @@ -0,0 +1,8 @@ +export type DeviceType = 'neo' | 'omi' | 'unknown'; + +export function detectDeviceType(name: string | null): DeviceType { + const lower = (name || '').toLowerCase(); + if (lower.includes('neo')) return 'neo'; + if (lower.includes('omi') || lower.includes('friend')) return 'omi'; + return 'unknown'; +} From 0d0029d45d92fbc489b0ce9b8383ce45b0750a72 Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Wed, 25 Feb 2026 13:14:50 +0000 Subject: [PATCH 4/4] Update dependencies and refactor OpenAI client integration - Added new dependencies for OpenTelemetry and OpenInference instrumentation to enhance observability and tracing capabilities. - Refactored OpenAI client creation to remove LangFuse tracing logic, simplifying the client setup. - Updated LLM client methods to streamline API calls and improve error handling. - Enhanced session management in controllers to ensure accurate status tracking and websocket connection handling. - Removed deprecated LangFuse session ID parameters from various methods to clean up the codebase. - Improved job ID handling in streaming jobs for better traceability and consistency. --- backends/advanced/pyproject.toml | 6 +- .../controllers/queue_controller.py | 343 ++++++---- .../controllers/session_controller.py | 222 +++--- .../controllers/system_controller.py | 635 ++++++++++-------- .../src/advanced_omi_backend/llm_client.py | 64 +- .../observability/otel_setup.py | 93 ++- .../advanced_omi_backend/openai_factory.py | 38 +- .../routers/modules/queue_routes.py | 8 + .../services/memory/base.py | 14 +- .../services/memory/providers/chronicle.py | 76 ++- .../memory/providers/llm_providers.py | 76 +-- .../utils/conversation_utils.py | 18 +- .../workers/conversation_jobs.py | 225 +++---- .../workers/transcription_jobs.py | 136 +--- .../advanced/webui/src/pages/ConnectApp.tsx | 120 ---- backends/advanced/webui/src/pages/Queue.tsx | 27 +- backends/advanced/webui/src/services/api.ts | 3 + 17 files changed, 1078 insertions(+), 1026 deletions(-) delete mode 100644 backends/advanced/webui/src/pages/ConnectApp.tsx diff --git a/backends/advanced/pyproject.toml b/backends/advanced/pyproject.toml index e23f3a34..db93628f 100644 --- a/backends/advanced/pyproject.toml +++ b/backends/advanced/pyproject.toml @@ -24,6 +24,9 @@ dependencies = [ "ruamel-yaml>=0.18.0", "omegaconf>=2.3.0", "langfuse>=3.13.0,<4.0", + "opentelemetry-api>=1.20", + "opentelemetry-sdk>=1.20", + "openinference-instrumentation-openai>=0.1", "spacy>=3.8.2", "en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl", "redis>=5.0.0", @@ -47,10 +50,7 @@ local-audio = [ ] galileo = [ "galileo>=1.0", - "opentelemetry-api>=1.20", - "opentelemetry-sdk>=1.20", "opentelemetry-exporter-otlp>=1.20", - "openinference-instrumentation-openai>=0.1", ] [build-system] diff --git a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py index d2cfc7df..6973dffd 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py @@ -73,6 +73,7 @@ def get_job_status_from_rq(job: Job) -> str: return status_str + # Queue name constants TRANSCRIPTION_QUEUE = "transcription" MEMORY_QUEUE = "memory" @@ -86,9 +87,13 @@ def get_job_status_from_rq(job: Job) -> str: JOB_RESULT_TTL = int(os.getenv("RQ_RESULT_TTL", 86400)) # 24 hour default # Create queues with custom result TTL -transcription_queue = Queue(TRANSCRIPTION_QUEUE, connection=redis_conn, default_timeout=86400) # 24 hours for streaming jobs +transcription_queue = Queue( + TRANSCRIPTION_QUEUE, connection=redis_conn, default_timeout=86400 +) # 24 hours for streaming jobs memory_queue = Queue(MEMORY_QUEUE, connection=redis_conn, default_timeout=300) -audio_queue = Queue(AUDIO_QUEUE, connection=redis_conn, default_timeout=86400) # 24 hours for all-day sessions +audio_queue = Queue( + AUDIO_QUEUE, connection=redis_conn, default_timeout=86400 +) # 24 hours for all-day sessions default_queue = Queue(DEFAULT_QUEUE, connection=redis_conn, default_timeout=300) @@ -123,7 +128,9 @@ def get_job_stats() -> Dict[str, Any]: canceled_jobs += len(queue.canceled_job_registry) deferred_jobs += len(queue.deferred_job_registry) - total_jobs = queued_jobs + started_jobs + finished_jobs + failed_jobs + canceled_jobs + deferred_jobs + total_jobs = ( + queued_jobs + started_jobs + finished_jobs + failed_jobs + canceled_jobs + deferred_jobs + ) return { "total_jobs": total_jobs, @@ -133,7 +140,7 @@ def get_job_stats() -> Dict[str, Any]: "failed_jobs": failed_jobs, "canceled_jobs": canceled_jobs, "deferred_jobs": deferred_jobs, - "timestamp": datetime.utcnow().isoformat() + "timestamp": datetime.utcnow().isoformat(), } @@ -142,7 +149,7 @@ def get_jobs( offset: int = 0, queue_name: str = None, job_type: str = None, - client_id: str = None + client_id: str = None, ) -> Dict[str, Any]: """ Get jobs from a specific queue or all queues with optional filtering. @@ -157,7 +164,9 @@ def get_jobs( Returns: Dict with jobs list and pagination metadata matching frontend expectations """ - logger.info(f"🔍 DEBUG get_jobs: Filtering - queue_name={queue_name}, job_type={job_type}, client_id={client_id}") + logger.info( + f"🔍 DEBUG get_jobs: Filtering - queue_name={queue_name}, job_type={job_type}, client_id={client_id}" + ) all_jobs = [] seen_job_ids = set() # Track which job IDs we've already processed to avoid duplicates @@ -173,7 +182,10 @@ def get_jobs( (queue.started_job_registry.get_job_ids(), "started"), # RQ standard, not "processing" (queue.finished_job_registry.get_job_ids(), "finished"), # RQ standard, not "completed" (queue.failed_job_registry.get_job_ids(), "failed"), - (queue.deferred_job_registry.get_job_ids(), "deferred"), # Jobs waiting for dependencies + ( + queue.deferred_job_registry.get_job_ids(), + "deferred", + ), # Jobs waiting for dependencies ] for job_ids, status in registries: @@ -190,46 +202,58 @@ def get_jobs( user_id = job.kwargs.get("user_id", "") if job.kwargs else "" # Extract just the function name (e.g., "listen_for_speech_job" from "module.listen_for_speech_job") - func_name = job.func_name.split('.')[-1] if job.func_name else "unknown" + func_name = job.func_name.split(".")[-1] if job.func_name else "unknown" # Debug: Log job details before filtering - logger.debug(f"🔍 DEBUG get_jobs: Job {job_id} - func_name={func_name}, full_func_name={job.func_name}, meta_client_id={job.meta.get('client_id', '') if job.meta else ''}, status={status}") + logger.debug( + f"🔍 DEBUG get_jobs: Job {job_id} - func_name={func_name}, full_func_name={job.func_name}, meta_client_id={job.meta.get('client_id', '') if job.meta else ''}, status={status}" + ) # Apply job_type filter if job_type and job_type not in func_name: - logger.debug(f"🔍 DEBUG get_jobs: Filtered out {job_id} - job_type '{job_type}' not in func_name '{func_name}'") + logger.debug( + f"🔍 DEBUG get_jobs: Filtered out {job_id} - job_type '{job_type}' not in func_name '{func_name}'" + ) continue # Apply client_id filter (partial match in meta) if client_id: job_client_id = job.meta.get("client_id", "") if job.meta else "" if client_id not in job_client_id: - logger.debug(f"🔍 DEBUG get_jobs: Filtered out {job_id} - client_id '{client_id}' not in job_client_id '{job_client_id}'") + logger.debug( + f"🔍 DEBUG get_jobs: Filtered out {job_id} - client_id '{client_id}' not in job_client_id '{job_client_id}'" + ) continue logger.debug(f"🔍 DEBUG get_jobs: Including job {job_id} in results") - all_jobs.append({ - "job_id": job.id, - "job_type": func_name, - "user_id": user_id, - "status": status, - "priority": "normal", # RQ doesn't track priority in metadata - "data": { - "description": job.description or "", - "queue": qname, - }, - "result": job.result if hasattr(job, 'result') else None, - "meta": job.meta if job.meta else {}, # Include job metadata - "error_message": str(job.exc_info) if job.exc_info else None, - "created_at": job.created_at.isoformat() if job.created_at else None, - "started_at": job.started_at.isoformat() if job.started_at else None, - "completed_at": job.ended_at.isoformat() if job.ended_at else None, - "retry_count": job.retries_left if hasattr(job, 'retries_left') else 0, - "max_retries": 3, # Default max retries - "progress_percent": (job.meta or {}).get("batch_progress", {}).get("percent", 0), - "progress_message": (job.meta or {}).get("batch_progress", {}).get("message", ""), - }) + all_jobs.append( + { + "job_id": job.id, + "job_type": func_name, + "user_id": user_id, + "status": status, + "priority": "normal", # RQ doesn't track priority in metadata + "data": { + "description": job.description or "", + "queue": qname, + }, + "result": job.result if hasattr(job, "result") else None, + "meta": job.meta if job.meta else {}, # Include job metadata + "error_message": str(job.exc_info) if job.exc_info else None, + "created_at": job.created_at.isoformat() if job.created_at else None, + "started_at": job.started_at.isoformat() if job.started_at else None, + "completed_at": job.ended_at.isoformat() if job.ended_at else None, + "retry_count": job.retries_left if hasattr(job, "retries_left") else 0, + "max_retries": 3, # Default max retries + "progress_percent": (job.meta or {}) + .get("batch_progress", {}) + .get("percent", 0), + "progress_message": (job.meta or {}) + .get("batch_progress", {}) + .get("message", ""), + } + ) except Exception as e: logger.error(f"Error fetching job {job_id}: {e}") @@ -238,10 +262,12 @@ def get_jobs( # Paginate total_jobs = len(all_jobs) - paginated_jobs = all_jobs[offset:offset + limit] + paginated_jobs = all_jobs[offset : offset + limit] has_more = (offset + limit) < total_jobs - logger.info(f"🔍 DEBUG get_jobs: Found {total_jobs} matching jobs (returning {len(paginated_jobs)} after pagination)") + logger.info( + f"🔍 DEBUG get_jobs: Found {total_jobs} matching jobs (returning {len(paginated_jobs)} after pagination)" + ) return { "jobs": paginated_jobs, @@ -250,7 +276,7 @@ def get_jobs( "limit": limit, "offset": offset, "has_more": has_more, - } + }, } @@ -281,7 +307,7 @@ def is_job_complete(job): return False # Check dependent jobs - for dep_id in (job.dependent_ids or []): + for dep_id in job.dependent_ids or []: try: dep_job = Job.fetch(dep_id, connection=redis_conn) if not is_job_complete(dep_job): @@ -310,7 +336,7 @@ def is_job_complete(job): job = Job.fetch(job_id, connection=redis_conn) # Only check jobs with client_id in meta - if job.meta and job.meta.get('client_id') == client_id: + if job.meta and job.meta.get("client_id") == client_id: if not is_job_complete(job): return False except Exception as e: @@ -319,11 +345,7 @@ def is_job_complete(job): return True -def start_streaming_jobs( - session_id: str, - user_id: str, - client_id: str -) -> Dict[str, str]: +def start_streaming_jobs(session_id: str, user_id: str, client_id: str) -> Dict[str, str]: """ Enqueue jobs for streaming audio session (initial session setup). @@ -351,7 +373,7 @@ def start_streaming_jobs( # Read always_persist from global config NOW (backend process has fresh config) misc_settings = get_misc_settings() - always_persist = misc_settings.get('always_persist_enabled', False) + always_persist = misc_settings.get("always_persist_enabled", False) # Enqueue speech detection job speech_job = transcription_queue.enqueue( @@ -363,9 +385,9 @@ def start_streaming_jobs( ttl=None, # No pre-run expiry (job can wait indefinitely in queue) result_ttl=JOB_RESULT_TTL, # Cleanup AFTER completion failure_ttl=86400, # Cleanup failed jobs after 24h - job_id=f"speech-detect_{session_id[:12]}", + job_id=f"speech-detect_{session_id}", description=f"Listening for speech...", - meta={'client_id': client_id, 'session_level': True} + meta={"client_id": client_id, "session_level": True}, ) # Log job enqueue with TTL information for debugging actual_ttl = redis_conn.ttl(f"rq:job:{speech_job.id}") @@ -397,9 +419,9 @@ def start_streaming_jobs( ttl=None, # No pre-run expiry (job can wait indefinitely in queue) result_ttl=JOB_RESULT_TTL, # Cleanup AFTER completion failure_ttl=86400, # Cleanup failed jobs after 24h - job_id=f"audio-persist_{session_id[:12]}", - description=f"Audio persistence for session {session_id[:12]}", - meta={'client_id': client_id, 'session_level': True} # Mark as session-level job + job_id=f"audio-persist_{session_id}", + description=f"Audio persistence for session {session_id}", + meta={"client_id": client_id, "session_level": True}, # Mark as session-level job ) # Log job enqueue with TTL information for debugging actual_ttl = redis_conn.ttl(f"rq:job:{audio_job.id}") @@ -411,19 +433,16 @@ def start_streaming_jobs( f"queue_length={audio_queue.count}, client_id={client_id}" ) - return { - 'speech_detection': speech_job.id, - 'audio_persistence': audio_job.id - } + return {"speech_detection": speech_job.id, "audio_persistence": audio_job.id} def start_post_conversation_jobs( conversation_id: str, user_id: str, transcript_version_id: Optional[str] = None, - depends_on_job = None, + depends_on_job=None, client_id: Optional[str] = None, - end_reason: str = "file_upload" + end_reason: str = "file_upload", ) -> Dict[str, str]: """ Start post-conversation processing jobs after conversation is created. @@ -458,21 +477,27 @@ def start_post_conversation_jobs( version_id = transcript_version_id or str(uuid.uuid4()) # Build job metadata (include client_id if provided for UI tracking) - job_meta = {'conversation_id': conversation_id} + job_meta = {"conversation_id": conversation_id} if client_id: - job_meta['client_id'] = client_id + job_meta["client_id"] = client_id # Check if speaker recognition is enabled - speaker_config = get_service_config('speaker_recognition') - speaker_enabled = speaker_config.get('enabled', True) # Default to True for backward compatibility + speaker_config = get_service_config("speaker_recognition") + speaker_enabled = speaker_config.get( + "enabled", True + ) # Default to True for backward compatibility # Step 1: Speaker recognition job (conditional - only if enabled) - speaker_dependency = depends_on_job # Start with upstream dependency (transcription if file upload) + speaker_dependency = ( + depends_on_job # Start with upstream dependency (transcription if file upload) + ) speaker_job = None if speaker_enabled: speaker_job_id = f"speaker_{conversation_id[:12]}" - logger.info(f"🔍 DEBUG: Creating speaker job with job_id={speaker_job_id}, conversation_id={conversation_id[:12]}") + logger.info( + f"🔍 DEBUG: Creating speaker job with job_id={speaker_job_id}, conversation_id={conversation_id[:12]}" + ) speaker_job = transcription_queue.enqueue( recognise_speakers_job, @@ -483,26 +508,36 @@ def start_post_conversation_jobs( depends_on=speaker_dependency, job_id=speaker_job_id, description=f"Speaker recognition for conversation {conversation_id[:8]}", - meta=job_meta + meta=job_meta, ) speaker_dependency = speaker_job # Chain for next jobs if depends_on_job: - logger.info(f"📥 RQ: Enqueued speaker recognition job {speaker_job.id}, meta={speaker_job.meta} (depends on {depends_on_job.id})") + logger.info( + f"📥 RQ: Enqueued speaker recognition job {speaker_job.id}, meta={speaker_job.meta} (depends on {depends_on_job.id})" + ) else: - logger.info(f"📥 RQ: Enqueued speaker recognition job {speaker_job.id}, meta={speaker_job.meta} (no dependencies, starts immediately)") + logger.info( + f"📥 RQ: Enqueued speaker recognition job {speaker_job.id}, meta={speaker_job.meta} (no dependencies, starts immediately)" + ) else: - logger.info(f"⏭️ Speaker recognition disabled, skipping speaker job for conversation {conversation_id[:8]}") + logger.info( + f"⏭️ Speaker recognition disabled, skipping speaker job for conversation {conversation_id[:8]}" + ) # Step 2: Memory extraction job (conditional - only if enabled) # Check if memory extraction is enabled - memory_config = get_service_config('memory.extraction') - memory_enabled = memory_config.get('enabled', True) # Default to True for backward compatibility + memory_config = get_service_config("memory.extraction") + memory_enabled = memory_config.get( + "enabled", True + ) # Default to True for backward compatibility memory_job = None if memory_enabled: # Depends on speaker job if it was created, otherwise depends on upstream (transcription or nothing) memory_job_id = f"memory_{conversation_id[:12]}" - logger.info(f"🔍 DEBUG: Creating memory job with job_id={memory_job_id}, conversation_id={conversation_id[:12]}") + logger.info( + f"🔍 DEBUG: Creating memory job with job_id={memory_job_id}, conversation_id={conversation_id[:12]}" + ) memory_job = memory_queue.enqueue( process_memory_job, @@ -512,23 +547,33 @@ def start_post_conversation_jobs( depends_on=speaker_dependency, # Either speaker_job or upstream dependency job_id=memory_job_id, description=f"Memory extraction for conversation {conversation_id[:8]}", - meta=job_meta + meta=job_meta, ) if speaker_job: - logger.info(f"📥 RQ: Enqueued memory extraction job {memory_job.id}, meta={memory_job.meta} (depends on speaker job {speaker_job.id})") + logger.info( + f"📥 RQ: Enqueued memory extraction job {memory_job.id}, meta={memory_job.meta} (depends on speaker job {speaker_job.id})" + ) elif depends_on_job: - logger.info(f"📥 RQ: Enqueued memory extraction job {memory_job.id}, meta={memory_job.meta} (depends on {depends_on_job.id})") + logger.info( + f"📥 RQ: Enqueued memory extraction job {memory_job.id}, meta={memory_job.meta} (depends on {depends_on_job.id})" + ) else: - logger.info(f"📥 RQ: Enqueued memory extraction job {memory_job.id}, meta={memory_job.meta} (no dependencies, starts immediately)") + logger.info( + f"📥 RQ: Enqueued memory extraction job {memory_job.id}, meta={memory_job.meta} (no dependencies, starts immediately)" + ) else: - logger.info(f"⏭️ Memory extraction disabled, skipping memory job for conversation {conversation_id[:8]}") + logger.info( + f"⏭️ Memory extraction disabled, skipping memory job for conversation {conversation_id[:8]}" + ) # Step 3: Title/summary generation job # Depends on memory job to avoid race condition (both jobs save the conversation document) # and to ensure fresh memories are available for context-enriched summaries title_dependency = memory_job if memory_job else speaker_dependency title_job_id = f"title_summary_{conversation_id[:12]}" - logger.info(f"🔍 DEBUG: Creating title/summary job with job_id={title_job_id}, conversation_id={conversation_id[:12]}") + logger.info( + f"🔍 DEBUG: Creating title/summary job with job_id={title_job_id}, conversation_id={conversation_id[:12]}" + ) title_summary_job = default_queue.enqueue( generate_title_summary_job, @@ -538,21 +583,31 @@ def start_post_conversation_jobs( depends_on=title_dependency, job_id=title_job_id, description=f"Generate title and summary for conversation {conversation_id[:8]}", - meta=job_meta + meta=job_meta, ) if memory_job: - logger.info(f"📥 RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (depends on memory job {memory_job.id})") + logger.info( + f"📥 RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (depends on memory job {memory_job.id})" + ) elif speaker_job: - logger.info(f"📥 RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (depends on speaker job {speaker_job.id})") + logger.info( + f"📥 RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (depends on speaker job {speaker_job.id})" + ) elif depends_on_job: - logger.info(f"📥 RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (depends on {depends_on_job.id})") + logger.info( + f"📥 RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (depends on {depends_on_job.id})" + ) else: - logger.info(f"📥 RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (no dependencies, starts immediately)") + logger.info( + f"📥 RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (no dependencies, starts immediately)" + ) # Step 5: Dispatch conversation.complete event (runs after both memory and title/summary complete) # This ensures plugins receive the event after all processing is done event_job_id = f"event_complete_{conversation_id[:12]}" - logger.info(f"🔍 DEBUG: Creating conversation complete event job with job_id={event_job_id}, conversation_id={conversation_id[:12]}") + logger.info( + f"🔍 DEBUG: Creating conversation complete event job with job_id={event_job_id}, conversation_id={conversation_id[:12]}" + ) # Event job depends on memory and title/summary jobs that were actually enqueued # Build dependency list excluding None values @@ -571,29 +626,33 @@ def start_post_conversation_jobs( end_reason, # Use the end_reason parameter (defaults to 'file_upload' for backward compatibility) job_timeout=120, # 2 minutes result_ttl=JOB_RESULT_TTL, - depends_on=event_dependencies if event_dependencies else None, # Wait for jobs that were enqueued + depends_on=( + event_dependencies if event_dependencies else None + ), # Wait for jobs that were enqueued job_id=event_job_id, description=f"Dispatch conversation complete event ({end_reason}) for {conversation_id[:8]}", - meta=job_meta + meta=job_meta, ) # Log event dispatch dependencies if event_dependencies: dep_ids = [job.id for job in event_dependencies] - logger.info(f"📥 RQ: Enqueued conversation complete event job {event_dispatch_job.id}, meta={event_dispatch_job.meta} (depends on {', '.join(dep_ids)})") + logger.info( + f"📥 RQ: Enqueued conversation complete event job {event_dispatch_job.id}, meta={event_dispatch_job.meta} (depends on {', '.join(dep_ids)})" + ) else: - logger.info(f"📥 RQ: Enqueued conversation complete event job {event_dispatch_job.id}, meta={event_dispatch_job.meta} (no dependencies, starts immediately)") + logger.info( + f"📥 RQ: Enqueued conversation complete event job {event_dispatch_job.id}, meta={event_dispatch_job.meta} (no dependencies, starts immediately)" + ) return { - 'speaker_recognition': speaker_job.id if speaker_job else None, - 'memory': memory_job.id if memory_job else None, - 'title_summary': title_summary_job.id, - 'event_dispatch': event_dispatch_job.id + "speaker_recognition": speaker_job.id if speaker_job else None, + "memory": memory_job.id if memory_job else None, + "title_summary": title_summary_job.id, + "event_dispatch": event_dispatch_job.id, } - - def get_queue_health() -> Dict[str, Any]: """Get health status of all queues and workers.""" health = { @@ -637,15 +696,18 @@ def get_queue_health() -> Dict[str, Any]: else: health["idle_workers"] += 1 - health["workers"].append({ - "name": worker.name, - "state": state, - "queues": [q.name for q in worker.queues], - "current_job": current_job, - }) + health["workers"].append( + { + "name": worker.name, + "state": state, + "queues": [q.name for q in worker.queues], + "current_job": current_job, + } + ) return health + # needs tidying but works for now async def cleanup_stuck_stream_workers(request): """Clean up stuck Redis Stream consumers and pending messages from all active streams.""" @@ -660,7 +722,7 @@ async def cleanup_stuck_stream_workers(request): if not redis_client: return JSONResponse( status_code=503, - content={"error": "Redis client for audio streaming not initialized"} + content={"error": "Redis client for audio streaming not initialized"}, ) cleanup_results = {} @@ -677,13 +739,17 @@ async def cleanup_stuck_stream_workers(request): try: # First check stream age - delete old streams (>1 hour) immediately - stream_info = await redis_client.execute_command('XINFO', 'STREAM', stream_name) + stream_info = await redis_client.execute_command("XINFO", "STREAM", stream_name) # Parse stream info info_dict = {} for i in range(0, len(stream_info), 2): - key_name = stream_info[i].decode() if isinstance(stream_info[i], bytes) else str(stream_info[i]) - info_dict[key_name] = stream_info[i+1] + key_name = ( + stream_info[i].decode() + if isinstance(stream_info[i], bytes) + else str(stream_info[i]) + ) + info_dict[key_name] = stream_info[i + 1] stream_length = int(info_dict.get("length", 0)) last_entry = info_dict.get("last-entry") @@ -700,7 +766,7 @@ async def cleanup_stuck_stream_workers(request): last_id = last_entry[0] if isinstance(last_id, bytes): last_id = last_id.decode() - last_timestamp_ms = int(last_id.split('-')[0]) + last_timestamp_ms = int(last_id.split("-")[0]) last_timestamp_s = last_timestamp_ms / 1000 stream_age = current_time - last_timestamp_s @@ -718,15 +784,19 @@ async def cleanup_stuck_stream_workers(request): "cleaned": 0, "deleted_consumers": 0, "deleted_stream": True, - "stream_age": stream_age + "stream_age": stream_age, } continue # Get consumer groups - groups = await redis_client.execute_command('XINFO', 'GROUPS', stream_name) + groups = await redis_client.execute_command("XINFO", "GROUPS", stream_name) if not groups: - cleanup_results[stream_name] = {"message": "No consumer groups found", "cleaned": 0, "deleted_stream": False} + cleanup_results[stream_name] = { + "message": "No consumer groups found", + "cleaned": 0, + "deleted_stream": False, + } continue # Parse first group @@ -734,7 +804,7 @@ async def cleanup_stuck_stream_workers(request): group = groups[0] for i in range(0, len(group), 2): key = group[i].decode() if isinstance(group[i], bytes) else str(group[i]) - value = group[i+1] + value = group[i + 1] if isinstance(value, bytes): try: value = value.decode() @@ -749,7 +819,9 @@ async def cleanup_stuck_stream_workers(request): pending_count = int(group_dict.get("pending", 0)) # Get consumers for this group to check per-consumer pending - consumers = await redis_client.execute_command('XINFO', 'CONSUMERS', stream_name, group_name) + consumers = await redis_client.execute_command( + "XINFO", "CONSUMERS", stream_name, group_name + ) cleaned_count = 0 total_consumer_pending = 0 @@ -759,8 +831,12 @@ async def cleanup_stuck_stream_workers(request): for consumer in consumers: consumer_dict = {} for i in range(0, len(consumer), 2): - key = consumer[i].decode() if isinstance(consumer[i], bytes) else str(consumer[i]) - value = consumer[i+1] + key = ( + consumer[i].decode() + if isinstance(consumer[i], bytes) + else str(consumer[i]) + ) + value = consumer[i + 1] if isinstance(value, bytes): try: value = value.decode() @@ -780,12 +856,20 @@ async def cleanup_stuck_stream_workers(request): is_dead = consumer_idle_ms > 300000 if consumer_pending > 0: - logger.info(f"Found {consumer_pending} pending messages for consumer {consumer_name} (idle: {consumer_idle_ms}ms)") + logger.info( + f"Found {consumer_pending} pending messages for consumer {consumer_name} (idle: {consumer_idle_ms}ms)" + ) # Get pending messages for this specific consumer try: pending_messages = await redis_client.execute_command( - 'XPENDING', stream_name, group_name, '-', '+', str(consumer_pending), consumer_name + "XPENDING", + stream_name, + group_name, + "-", + "+", + str(consumer_pending), + consumer_name, ) # XPENDING returns flat list: [msg_id, consumer, idle_ms, delivery_count, msg_id, ...] @@ -799,31 +883,49 @@ async def cleanup_stuck_stream_workers(request): # Claim the message to a cleanup worker try: await redis_client.execute_command( - 'XCLAIM', stream_name, group_name, 'cleanup-worker', '0', msg_id + "XCLAIM", + stream_name, + group_name, + "cleanup-worker", + "0", + msg_id, ) # Acknowledge it immediately await redis_client.xack(stream_name, group_name, msg_id) cleaned_count += 1 except Exception as claim_error: - logger.warning(f"Failed to claim/ack message {msg_id}: {claim_error}") + logger.warning( + f"Failed to claim/ack message {msg_id}: {claim_error}" + ) except Exception as consumer_error: - logger.error(f"Error processing consumer {consumer_name}: {consumer_error}") + logger.error( + f"Error processing consumer {consumer_name}: {consumer_error}" + ) # Delete dead consumers (idle > 5 minutes with no pending messages) if is_dead and consumer_pending == 0: try: await redis_client.execute_command( - 'XGROUP', 'DELCONSUMER', stream_name, group_name, consumer_name + "XGROUP", "DELCONSUMER", stream_name, group_name, consumer_name ) deleted_consumers += 1 - logger.info(f"🧹 Deleted dead consumer {consumer_name} (idle: {consumer_idle_ms}ms)") + logger.info( + f"🧹 Deleted dead consumer {consumer_name} (idle: {consumer_idle_ms}ms)" + ) except Exception as delete_error: - logger.warning(f"Failed to delete consumer {consumer_name}: {delete_error}") + logger.warning( + f"Failed to delete consumer {consumer_name}: {delete_error}" + ) if total_consumer_pending == 0 and deleted_consumers == 0: - cleanup_results[stream_name] = {"message": "No pending messages or dead consumers", "cleaned": 0, "deleted_consumers": 0, "deleted_stream": False} + cleanup_results[stream_name] = { + "message": "No pending messages or dead consumers", + "cleaned": 0, + "deleted_consumers": 0, + "deleted_stream": False, + } continue total_cleaned += cleaned_count @@ -833,14 +935,11 @@ async def cleanup_stuck_stream_workers(request): "cleaned": cleaned_count, "deleted_consumers": deleted_consumers, "deleted_stream": False, - "original_pending": pending_count + "original_pending": pending_count, } except Exception as e: - cleanup_results[stream_name] = { - "error": str(e), - "cleaned": 0 - } + cleanup_results[stream_name] = {"error": str(e), "cleaned": 0} return { "success": True, @@ -849,7 +948,7 @@ async def cleanup_stuck_stream_workers(request): "total_deleted_streams": total_deleted_streams, "streams": cleanup_results, # New key for per-stream results "providers": cleanup_results, # Keep for backward compatibility with frontend - "timestamp": time.time() + "timestamp": time.time(), } except Exception as e: diff --git a/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py index 9b3a2de9..6a96883b 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py @@ -24,7 +24,7 @@ async def mark_session_complete( "user_stopped", "inactivity_timeout", "max_duration", - "all_jobs_complete" + "all_jobs_complete", ], ) -> None: """ @@ -57,11 +57,10 @@ async def mark_session_complete( """ session_key = f"audio:session:{session_id}" mark_time = time.time() - await redis_client.hset(session_key, mapping={ - "status": "finished", - "completed_at": str(mark_time), - "completion_reason": reason - }) + await redis_client.hset( + session_key, + mapping={"status": "finished", "completed_at": str(mark_time), "completion_reason": reason}, + ) logger.info(f"✅ Session {session_id[:12]} marked finished: {reason} [TIME: {mark_time:.3f}]") @@ -117,7 +116,9 @@ async def get_session_info(redis_client, session_id: str) -> Optional[Dict]: # Get conversation count for this session conversation_count_key = f"session:conversation_count:{session_id}" conversation_count_bytes = await redis_client.get(conversation_count_key) - conversation_count = int(conversation_count_bytes.decode()) if conversation_count_bytes else 0 + conversation_count = ( + int(conversation_count_bytes.decode()) if conversation_count_bytes else 0 + ) started_at = float(session_data.get(b"started_at", b"0")) last_chunk_at = float(session_data.get(b"last_chunk_at", b"0")) @@ -129,6 +130,9 @@ async def get_session_info(redis_client, session_id: str) -> Optional[Dict]: "provider": session_data.get(b"provider", b"").decode(), "mode": session_data.get(b"mode", b"").decode(), "status": session_data.get(b"status", b"").decode(), + "websocket_connected": session_data.get(b"websocket_connected", b"false").decode() + == "true", + "completion_reason": session_data.get(b"completion_reason", b"").decode(), "chunks_published": int(session_data.get(b"chunks_published", b"0")), "started_at": started_at, "last_chunk_at": last_chunk_at, @@ -139,7 +143,7 @@ async def get_session_info(redis_client, session_id: str) -> Optional[Dict]: "last_event": session_data.get(b"last_event", b"").decode(), "speech_detected_at": session_data.get(b"speech_detected_at", b"").decode(), "speaker_check_status": session_data.get(b"speaker_check_status", b"").decode(), - "identified_speakers": session_data.get(b"identified_speakers", b"").decode() + "identified_speakers": session_data.get(b"identified_speakers", b"").decode(), } except Exception as e: @@ -163,10 +167,8 @@ async def get_all_sessions(redis_client, limit: int = 100) -> List[Dict]: session_keys = [] cursor = b"0" while cursor and len(session_keys) < limit: - cursor, keys = await redis_client.scan( - cursor, match="audio:session:*", count=limit - ) - session_keys.extend(keys[:limit - len(session_keys)]) + cursor, keys = await redis_client.scan(cursor, match="audio:session:*", count=limit) + session_keys.extend(keys[: limit - len(session_keys)]) # Get info for each session sessions = [] @@ -241,7 +243,7 @@ async def get_streaming_status(request): if not redis_client: return JSONResponse( status_code=503, - content={"error": "Redis client for audio streaming not initialized"} + content={"error": "Redis client for audio streaming not initialized"}, ) # Get all sessions (both active and completed) @@ -261,40 +263,56 @@ async def get_streaming_status(request): # Separate active and completed sessions # Check if all jobs are complete (including failed jobs) - # Note: session_id == client_id in streaming context, but using client_id explicitly all_jobs_done = all_jobs_complete_for_client(session_obj.get("client_id")) - # Session is finished if: - # 1. Redis status says finished AND all jobs done, OR - # 2. All jobs are done (even if status isn't finished yet) - # This ensures sessions with failed jobs move to finished - if status == "finished" or all_jobs_done: - if all_jobs_done: - # All jobs finished - this is truly a finished session - # Update Redis status if it wasn't already marked finished - if status != "finished": - await mark_session_complete(redis_client, session_id, "all_jobs_complete") - - # Get additional session data for completed sessions - session_key = f"audio:session:{session_id}" - session_data = await redis_client.hgetall(session_key) - - completed_sessions_from_redis.append({ + # Session is completed ONLY when: + # 1. Status was already set to "finished" by an authoritative source + # (WebSocket disconnect handler or job handler), AND + # 2. All RQ jobs are in terminal state + # + # IMPORTANT: Do NOT mark sessions as finished here. Between conversations + # (after open_conversation_job finishes, before speech detection restarts), + # all jobs are briefly terminal. Writing "finished" during this gap kills + # the session permanently. + if status == "finished" and all_jobs_done: + # Get additional session data for completed sessions + session_key = f"audio:session:{session_id}" + session_data = await redis_client.hgetall(session_key) + + completed_sessions_from_redis.append( + { "session_id": session_id, "client_id": session_obj.get("client_id", ""), - "conversation_id": session_data.get(b"conversation_id", b"").decode() if session_data and b"conversation_id" in session_data else None, - "has_conversation": bool(session_data and session_data.get(b"conversation_id", b"")), - "action": session_data.get(b"action", b"finished").decode() if session_data and b"action" in session_data else "finished", - "reason": session_data.get(b"reason", b"").decode() if session_data and b"reason" in session_data else "", + "conversation_id": ( + session_data.get(b"conversation_id", b"").decode() + if session_data and b"conversation_id" in session_data + else None + ), + "has_conversation": bool( + session_data and session_data.get(b"conversation_id", b"") + ), + "action": ( + session_data.get(b"action", b"finished").decode() + if session_data and b"action" in session_data + else "finished" + ), + "reason": ( + session_data.get(b"reason", b"").decode() + if session_data and b"reason" in session_data + else "" + ), "completed_at": session_obj.get("last_chunk_at", 0), - "audio_file": session_data.get(b"audio_file", b"").decode() if session_data and b"audio_file" in session_data else "", - "conversation_count": session_obj.get("conversation_count", 0) - }) - else: - # Status says complete but jobs still processing - keep in active - active_sessions.append(session_obj) + "audio_file": ( + session_data.get(b"audio_file", b"").decode() + if session_data and b"audio_file" in session_data + else "" + ), + "conversation_count": session_obj.get("conversation_count", 0), + } + ) else: - # This is an active session + # Active session (including inter-conversation gaps where all jobs + # are temporarily terminal but status is still "active") active_sessions.append(session_obj) # Get stream health for all streams (per-client streams) @@ -317,13 +335,17 @@ async def get_streaming_status(request): stream_name = stream_key.decode() if isinstance(stream_key, bytes) else stream_key try: # Check if stream exists - stream_info = await redis_client.execute_command('XINFO', 'STREAM', stream_name) + stream_info = await redis_client.execute_command("XINFO", "STREAM", stream_name) # Parse stream info (returns flat list of key-value pairs) info_dict = {} for i in range(0, len(stream_info), 2): - key = stream_info[i].decode() if isinstance(stream_info[i], bytes) else str(stream_info[i]) - value = stream_info[i+1] + key = ( + stream_info[i].decode() + if isinstance(stream_info[i], bytes) + else str(stream_info[i]) + ) + value = stream_info[i + 1] # Skip complex binary structures like first-entry and last-entry # which contain message data that can't be JSON serialized @@ -351,7 +373,7 @@ async def get_streaming_status(request): if last_entry_id: try: # Redis Stream IDs format: "milliseconds-sequence" - last_timestamp_ms = int(last_entry_id.split('-')[0]) + last_timestamp_ms = int(last_entry_id.split("-")[0]) last_timestamp_s = last_timestamp_ms / 1000 stream_age_seconds = current_time - last_timestamp_s except (ValueError, IndexError, AttributeError): @@ -369,7 +391,7 @@ async def get_streaming_status(request): session_idle_seconds = session_data.get("idle_seconds", 0) # Get consumer groups - groups = await redis_client.execute_command('XINFO', 'GROUPS', stream_name) + groups = await redis_client.execute_command("XINFO", "GROUPS", stream_name) stream_data = { "stream_length": info_dict.get("length", 0), @@ -378,19 +400,19 @@ async def get_streaming_status(request): "session_age_seconds": session_age_seconds, # Age since session started "session_idle_seconds": session_idle_seconds, # Time since last audio chunk "client_id": client_id, # Include client_id for reference - "consumer_groups": [] + "consumer_groups": [], } # Track if stream has any active consumers has_active_consumer = False - min_consumer_idle_ms = float('inf') + min_consumer_idle_ms = float("inf") # Parse consumer groups for group in groups: group_dict = {} for i in range(0, len(group), 2): key = group[i].decode() if isinstance(group[i], bytes) else str(group[i]) - value = group[i+1] + value = group[i + 1] if isinstance(value, bytes): try: value = value.decode() @@ -403,15 +425,21 @@ async def get_streaming_status(request): group_name = group_name.decode() # Get consumers for this group - consumers = await redis_client.execute_command('XINFO', 'CONSUMERS', stream_name, group_name) + consumers = await redis_client.execute_command( + "XINFO", "CONSUMERS", stream_name, group_name + ) consumer_list = [] consumer_pending_total = 0 for consumer in consumers: consumer_dict = {} for i in range(0, len(consumer), 2): - key = consumer[i].decode() if isinstance(consumer[i], bytes) else str(consumer[i]) - value = consumer[i+1] + key = ( + consumer[i].decode() + if isinstance(consumer[i], bytes) + else str(consumer[i]) + ) + value = consumer[i + 1] if isinstance(value, bytes): try: value = value.decode() @@ -434,11 +462,13 @@ async def get_streaming_status(request): if consumer_idle_ms < 300000: has_active_consumer = True - consumer_list.append({ - "name": consumer_name, - "pending": consumer_pending, - "idle_ms": consumer_idle_ms - }) + consumer_list.append( + { + "name": consumer_name, + "pending": consumer_pending, + "idle_ms": consumer_idle_ms, + } + ) # Get group-level pending count (may be 0 even if consumers have pending) try: @@ -451,20 +481,22 @@ async def get_streaming_status(request): # (Sometimes group pending is 0 but consumers still have pending messages) effective_pending = max(group_pending_count, consumer_pending_total) - stream_data["consumer_groups"].append({ - "name": str(group_name), - "consumers": consumer_list, - "pending": int(effective_pending) - }) + stream_data["consumer_groups"].append( + { + "name": str(group_name), + "consumers": consumer_list, + "pending": int(effective_pending), + } + ) # Determine if stream is active or completed # Active: has active consumers OR pending messages OR recent activity (< 5 min) # Completed: no active consumers and idle > 5 minutes but < 1 hour total_pending = sum(group["pending"] for group in stream_data["consumer_groups"]) is_active = ( - has_active_consumer or - total_pending > 0 or - stream_age_seconds < 300 # Less than 5 minutes old + has_active_consumer + or total_pending > 0 + or stream_age_seconds < 300 # Less than 5 minutes old ) if is_active: @@ -487,7 +519,7 @@ async def get_streaming_status(request): "finished": len(transcription_queue.finished_job_registry), "failed": len(transcription_queue.failed_job_registry), "canceled": len(transcription_queue.canceled_job_registry), - "deferred": len(transcription_queue.deferred_job_registry) + "deferred": len(transcription_queue.deferred_job_registry), }, "memory_queue": { "queued": memory_queue.count, @@ -495,7 +527,7 @@ async def get_streaming_status(request): "finished": len(memory_queue.finished_job_registry), "failed": len(memory_queue.failed_job_registry), "canceled": len(memory_queue.canceled_job_registry), - "deferred": len(memory_queue.deferred_job_registry) + "deferred": len(memory_queue.deferred_job_registry), }, "default_queue": { "queued": default_queue.count, @@ -503,8 +535,8 @@ async def get_streaming_status(request): "finished": len(default_queue.finished_job_registry), "failed": len(default_queue.failed_job_registry), "canceled": len(default_queue.canceled_job_registry), - "deferred": len(default_queue.deferred_job_registry) - } + "deferred": len(default_queue.deferred_job_registry), + }, } return { @@ -514,14 +546,13 @@ async def get_streaming_status(request): "completed_streams": completed_streams, "stream_health": active_streams, # Backward compatibility - use active_streams "rq_queues": rq_stats, - "timestamp": time.time() + "timestamp": time.time(), } except Exception as e: logger.error(f"Error getting streaming status: {e}", exc_info=True) return JSONResponse( - status_code=500, - content={"error": f"Failed to get streaming status: {str(e)}"} + status_code=500, content={"error": f"Failed to get streaming status: {str(e)}"} ) @@ -538,7 +569,7 @@ async def cleanup_old_sessions(request, max_age_seconds: int = 3600): if not redis_client: return JSONResponse( status_code=503, - content={"error": "Redis client for audio streaming not initialized"} + content={"error": "Redis client for audio streaming not initialized"}, ) # Get all session keys @@ -560,17 +591,14 @@ async def cleanup_old_sessions(request, max_age_seconds: int = 3600): age_seconds = current_time - started_at # Clean up sessions older than max_age or stuck in "finalizing" - should_clean = ( - age_seconds > max_age_seconds or - (status == "finalizing" and age_seconds > 300) # Finalizing for more than 5 minutes - ) + should_clean = age_seconds > max_age_seconds or ( + status == "finalizing" and age_seconds > 300 + ) # Finalizing for more than 5 minutes if should_clean: - old_sessions.append({ - "session_id": session_id, - "age_seconds": age_seconds, - "status": status - }) + old_sessions.append( + {"session_id": session_id, "age_seconds": age_seconds, "status": status} + ) await redis_client.delete(key) cleaned_sessions += 1 @@ -584,13 +612,17 @@ async def cleanup_old_sessions(request, max_age_seconds: int = 3600): try: # Check stream info to get last activity - stream_info = await redis_client.execute_command('XINFO', 'STREAM', stream_name) + stream_info = await redis_client.execute_command("XINFO", "STREAM", stream_name) # Parse stream info info_dict = {} for i in range(0, len(stream_info), 2): - key_name = stream_info[i].decode() if isinstance(stream_info[i], bytes) else str(stream_info[i]) - info_dict[key_name] = stream_info[i+1] + key_name = ( + stream_info[i].decode() + if isinstance(stream_info[i], bytes) + else str(stream_info[i]) + ) + info_dict[key_name] = stream_info[i + 1] stream_length = int(info_dict.get("length", 0)) last_entry = info_dict.get("last-entry") @@ -611,7 +643,7 @@ async def cleanup_old_sessions(request, max_age_seconds: int = 3600): # Redis Stream IDs format: "milliseconds-sequence" try: - last_timestamp_ms = int(last_id.split('-')[0]) + last_timestamp_ms = int(last_id.split("-")[0]) last_timestamp_s = last_timestamp_ms / 1000 age_seconds = current_time - last_timestamp_s @@ -627,7 +659,7 @@ async def cleanup_old_sessions(request, max_age_seconds: int = 3600): first_id = first_entry[0] if isinstance(first_id, bytes): first_id = first_id.decode() - first_timestamp_ms = int(first_id.split('-')[0]) + first_timestamp_ms = int(first_id.split("-")[0]) first_timestamp_s = first_timestamp_ms / 1000 age_seconds = current_time - first_timestamp_s @@ -640,12 +672,14 @@ async def cleanup_old_sessions(request, max_age_seconds: int = 3600): if should_delete: await redis_client.delete(stream_name) cleaned_streams += 1 - old_streams.append({ - "stream_name": stream_name, - "reason": reason, - "age_seconds": age_seconds, - "length": stream_length - }) + old_streams.append( + { + "stream_name": stream_name, + "reason": reason, + "age_seconds": age_seconds, + "length": stream_length, + } + ) except Exception as e: logger.debug(f"Error checking stream {stream_name}: {e}") @@ -657,7 +691,7 @@ async def cleanup_old_sessions(request, max_age_seconds: int = 3600): "cleaned_streams": cleaned_streams, "cleaned_session_details": old_sessions, "cleaned_stream_details": old_streams, - "timestamp": time.time() + "timestamp": time.time(), } except Exception as e: diff --git a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py index bf3ce1b1..274861c8 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py @@ -7,30 +7,29 @@ import logging import os import re -import signal import shutil +import signal import time import warnings from datetime import UTC, datetime +from io import StringIO from pathlib import Path from typing import Optional -from io import StringIO - -from ruamel.yaml import YAML from fastapi import HTTPException +from ruamel.yaml import YAML from advanced_omi_backend.config import ( get_diarization_settings as load_diarization_settings, ) from advanced_omi_backend.config import get_misc_settings as load_misc_settings -from advanced_omi_backend.config import ( - save_diarization_settings, - save_misc_settings, +from advanced_omi_backend.config import save_diarization_settings, save_misc_settings +from advanced_omi_backend.config_loader import get_plugins_yml_path, save_config_section +from advanced_omi_backend.model_registry import ( + _find_config_path, + get_models_registry, + load_models_config, ) -from advanced_omi_backend.config_loader import get_plugins_yml_path -from advanced_omi_backend.config_loader import save_config_section -from advanced_omi_backend.model_registry import _find_config_path, get_models_registry, load_models_config from advanced_omi_backend.models.user import User logger = logging.getLogger(__name__) @@ -43,7 +42,7 @@ async def get_config_diagnostics(): """ Get comprehensive configuration diagnostics. - + Returns warnings, errors, and status for all configuration components. """ diagnostics = { @@ -52,9 +51,9 @@ async def get_config_diagnostics(): "issues": [], "warnings": [], "info": [], - "components": {} + "components": {}, } - + # Test OmegaConf configuration loading try: from advanced_omi_backend.config_loader import load_config @@ -63,7 +62,7 @@ async def get_config_diagnostics(): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") config = load_config(force_reload=True) - + # Check for OmegaConf warnings for warning in w: warning_msg = str(warning.message) @@ -71,148 +70,168 @@ async def get_config_diagnostics(): # Extract the variable name from warning if "variable '" in warning_msg.lower(): var_name = warning_msg.split("'")[1] - diagnostics["warnings"].append({ - "component": "OmegaConf", - "severity": "warning", - "message": f"Environment variable '{var_name}' not set (using empty default)", - "resolution": f"Set {var_name} in .env file if needed" - }) - + diagnostics["warnings"].append( + { + "component": "OmegaConf", + "severity": "warning", + "message": f"Environment variable '{var_name}' not set (using empty default)", + "resolution": f"Set {var_name} in .env file if needed", + } + ) + diagnostics["components"]["omegaconf"] = { "status": "healthy", - "message": "Configuration loaded successfully" + "message": "Configuration loaded successfully", } except Exception as e: diagnostics["overall_status"] = "unhealthy" - diagnostics["issues"].append({ - "component": "OmegaConf", - "severity": "error", - "message": f"Failed to load configuration: {str(e)}", - "resolution": "Check config/defaults.yml and config/config.yml syntax" - }) - diagnostics["components"]["omegaconf"] = { - "status": "unhealthy", - "message": str(e) - } - + diagnostics["issues"].append( + { + "component": "OmegaConf", + "severity": "error", + "message": f"Failed to load configuration: {str(e)}", + "resolution": "Check config/defaults.yml and config/config.yml syntax", + } + ) + diagnostics["components"]["omegaconf"] = {"status": "unhealthy", "message": str(e)} + # Test model registry try: from advanced_omi_backend.model_registry import get_models_registry - + with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") registry = get_models_registry() - + # Capture model loading warnings for warning in w: warning_msg = str(warning.message) - diagnostics["warnings"].append({ - "component": "Model Registry", - "severity": "warning", - "message": warning_msg, - "resolution": "Check model definitions in config/defaults.yml" - }) - + diagnostics["warnings"].append( + { + "component": "Model Registry", + "severity": "warning", + "message": warning_msg, + "resolution": "Check model definitions in config/defaults.yml", + } + ) + if registry: diagnostics["components"]["model_registry"] = { "status": "healthy", "message": f"Loaded {len(registry.models)} models", "details": { "total_models": len(registry.models), - "defaults": dict(registry.defaults) if registry.defaults else {} - } + "defaults": dict(registry.defaults) if registry.defaults else {}, + }, } - + # Check critical models stt = registry.get_default("stt") stt_stream = registry.get_default("stt_stream") llm = registry.get_default("llm") - + # STT check if stt: if stt.api_key: - diagnostics["info"].append({ - "component": "STT (Batch)", - "message": f"Configured: {stt.name} ({stt.model_provider}) - API key present" - }) + diagnostics["info"].append( + { + "component": "STT (Batch)", + "message": f"Configured: {stt.name} ({stt.model_provider}) - API key present", + } + ) else: - diagnostics["warnings"].append({ - "component": "STT (Batch)", - "severity": "warning", - "message": f"{stt.name} ({stt.model_provider}) - No API key configured", - "resolution": "Transcription can fail without API key" - }) + diagnostics["warnings"].append( + { + "component": "STT (Batch)", + "severity": "warning", + "message": f"{stt.name} ({stt.model_provider}) - No API key configured", + "resolution": "Transcription can fail without API key", + } + ) else: - diagnostics["issues"].append({ - "component": "STT (Batch)", - "severity": "error", - "message": "No batch STT model configured", - "resolution": "Set defaults.stt in config.yml" - }) + diagnostics["issues"].append( + { + "component": "STT (Batch)", + "severity": "error", + "message": "No batch STT model configured", + "resolution": "Set defaults.stt in config.yml", + } + ) diagnostics["overall_status"] = "partial" - + # Streaming STT check if stt_stream: if stt_stream.api_key: - diagnostics["info"].append({ - "component": "STT (Streaming)", - "message": f"Configured: {stt_stream.name} ({stt_stream.model_provider}) - API key present" - }) + diagnostics["info"].append( + { + "component": "STT (Streaming)", + "message": f"Configured: {stt_stream.name} ({stt_stream.model_provider}) - API key present", + } + ) else: - diagnostics["warnings"].append({ + diagnostics["warnings"].append( + { + "component": "STT (Streaming)", + "severity": "warning", + "message": f"{stt_stream.name} ({stt_stream.model_provider}) - No API key configured", + "resolution": "Real-time transcription can fail without API key", + } + ) + else: + diagnostics["warnings"].append( + { "component": "STT (Streaming)", "severity": "warning", - "message": f"{stt_stream.name} ({stt_stream.model_provider}) - No API key configured", - "resolution": "Real-time transcription can fail without API key" - }) - else: - diagnostics["warnings"].append({ - "component": "STT (Streaming)", - "severity": "warning", - "message": "No streaming STT model configured - streaming worker disabled", - "resolution": "Set defaults.stt_stream in config.yml for WebSocket transcription" - }) - + "message": "No streaming STT model configured - streaming worker disabled", + "resolution": "Set defaults.stt_stream in config.yml for WebSocket transcription", + } + ) + # LLM check if llm: if llm.api_key: - diagnostics["info"].append({ - "component": "LLM", - "message": f"Configured: {llm.name} ({llm.model_provider}) - API key present" - }) + diagnostics["info"].append( + { + "component": "LLM", + "message": f"Configured: {llm.name} ({llm.model_provider}) - API key present", + } + ) else: - diagnostics["warnings"].append({ - "component": "LLM", - "severity": "warning", - "message": f"{llm.name} ({llm.model_provider}) - No API key configured", - "resolution": "Memory extraction can fail without API key" - }) - + diagnostics["warnings"].append( + { + "component": "LLM", + "severity": "warning", + "message": f"{llm.name} ({llm.model_provider}) - No API key configured", + "resolution": "Memory extraction can fail without API key", + } + ) + else: diagnostics["overall_status"] = "unhealthy" - diagnostics["issues"].append({ - "component": "Model Registry", - "severity": "error", - "message": "Failed to load model registry", - "resolution": "Check config/defaults.yml for syntax errors" - }) + diagnostics["issues"].append( + { + "component": "Model Registry", + "severity": "error", + "message": "Failed to load model registry", + "resolution": "Check config/defaults.yml for syntax errors", + } + ) diagnostics["components"]["model_registry"] = { "status": "unhealthy", - "message": "Registry failed to load" + "message": "Registry failed to load", } except Exception as e: diagnostics["overall_status"] = "partial" - diagnostics["issues"].append({ - "component": "Model Registry", - "severity": "error", - "message": f"Error loading registry: {str(e)}", - "resolution": "Check logs for detailed error information" - }) - diagnostics["components"]["model_registry"] = { - "status": "unhealthy", - "message": str(e) - } - + diagnostics["issues"].append( + { + "component": "Model Registry", + "severity": "error", + "message": f"Error loading registry: {str(e)}", + "resolution": "Check logs for detailed error information", + } + ) + diagnostics["components"]["model_registry"] = {"status": "unhealthy", "message": str(e)} + # Check environment variables (only warn about keys relevant to configured providers) env_checks = [ ("AUTH_SECRET_KEY", "Required for authentication"), @@ -235,18 +254,22 @@ async def get_config_diagnostics(): if provider == "deepgram": env_checks.append(("DEEPGRAM_API_KEY", "Required for Deepgram transcription")) elif provider == "smallest": - env_checks.append(("SMALLEST_API_KEY", "Required for Smallest.ai Pulse transcription")) - + env_checks.append( + ("SMALLEST_API_KEY", "Required for Smallest.ai Pulse transcription") + ) + for env_var, description in env_checks: value = os.getenv(env_var) if not value or value == "": - diagnostics["warnings"].append({ - "component": "Environment Variables", - "severity": "warning", - "message": f"{env_var} not set - {description}", - "resolution": f"Set {env_var} in .env file" - }) - + diagnostics["warnings"].append( + { + "component": "Environment Variables", + "severity": "warning", + "message": f"{env_var} not set - {description}", + "resolution": f"Set {env_var} in .env file", + } + ) + return diagnostics @@ -288,7 +311,7 @@ async def get_observability_config(): Returns non-secret data only (enabled status and browser URL). """ - from advanced_omi_backend.openai_factory import is_langfuse_enabled + from advanced_omi_backend.observability.otel_setup import is_langfuse_enabled enabled = is_langfuse_enabled() session_base_url = None @@ -321,10 +344,7 @@ async def get_diarization_settings(): try: # Get settings using OmegaConf settings = load_diarization_settings() - return { - "settings": settings, - "status": "success" - } + return {"settings": settings, "status": "success"} except Exception as e: logger.exception("Error getting diarization settings") raise e @@ -335,8 +355,13 @@ async def save_diarization_settings_controller(settings: dict): try: # Validate settings valid_keys = { - "diarization_source", "similarity_threshold", "min_duration", "collar", - "min_duration_off", "min_speakers", "max_speakers" + "diarization_source", + "similarity_threshold", + "min_duration", + "collar", + "min_duration_off", + "min_speakers", + "max_speakers", } # Filter to only valid keys (allow round-trip GET→POST) @@ -348,13 +373,20 @@ async def save_diarization_settings_controller(settings: dict): # Type validation for known keys only if key in ["min_speakers", "max_speakers"]: if not isinstance(value, int) or value < 1 or value > 20: - raise HTTPException(status_code=400, detail=f"Invalid value for {key}: must be integer 1-20") + raise HTTPException( + status_code=400, detail=f"Invalid value for {key}: must be integer 1-20" + ) elif key == "diarization_source": if not isinstance(value, str) or value not in ["pyannote", "deepgram"]: - raise HTTPException(status_code=400, detail=f"Invalid value for {key}: must be 'pyannote' or 'deepgram'") + raise HTTPException( + status_code=400, + detail=f"Invalid value for {key}: must be 'pyannote' or 'deepgram'", + ) else: if not isinstance(value, (int, float)) or value < 0: - raise HTTPException(status_code=400, detail=f"Invalid value for {key}: must be positive number") + raise HTTPException( + status_code=400, detail=f"Invalid value for {key}: must be positive number" + ) filtered_settings[key] = value @@ -373,14 +405,14 @@ async def save_diarization_settings_controller(settings: dict): return { "message": "Diarization settings saved successfully", "settings": current_settings, - "status": "success" + "status": "success", } else: logger.warning("Settings save failed") return { "message": "Settings save failed", "settings": current_settings, - "status": "error" + "status": "error", } except Exception as e: @@ -393,10 +425,7 @@ async def get_misc_settings(): try: # Get settings using OmegaConf settings = load_misc_settings() - return { - "settings": settings, - "status": "success" - } + return {"settings": settings, "status": "success"} except Exception as e: logger.exception("Error getting misc settings") raise e @@ -406,7 +435,12 @@ async def save_misc_settings_controller(settings: dict): """Save miscellaneous settings.""" try: # Validate settings - boolean_keys = {"always_persist_enabled", "use_provider_segments", "per_segment_speaker_id", "always_batch_retranscribe"} + boolean_keys = { + "always_persist_enabled", + "use_provider_segments", + "per_segment_speaker_id", + "always_batch_retranscribe", + } integer_keys = {"transcription_job_timeout_seconds"} valid_keys = boolean_keys | integer_keys @@ -419,10 +453,15 @@ async def save_misc_settings_controller(settings: dict): # Type validation if key in boolean_keys: if not isinstance(value, bool): - raise HTTPException(status_code=400, detail=f"Invalid value for {key}: must be boolean") + raise HTTPException( + status_code=400, detail=f"Invalid value for {key}: must be boolean" + ) elif key == "transcription_job_timeout_seconds": if not isinstance(value, int) or value < 60 or value > 7200: - raise HTTPException(status_code=400, detail=f"Invalid value for {key}: must be integer between 60 and 7200") + raise HTTPException( + status_code=400, + detail=f"Invalid value for {key}: must be integer between 60 and 7200", + ) filtered_settings[key] = value @@ -439,14 +478,14 @@ async def save_misc_settings_controller(settings: dict): return { "message": "Miscellaneous settings saved successfully", "settings": updated_settings, - "status": "success" + "status": "success", } else: logger.warning("Settings save failed") return { "message": "Settings save failed", "settings": load_misc_settings(), - "status": "error" + "status": "error", } except HTTPException: @@ -472,9 +511,7 @@ async def get_cleanup_settings_controller(user: User) -> dict: async def save_cleanup_settings_controller( - auto_cleanup_enabled: bool, - retention_days: int, - user: User + auto_cleanup_enabled: bool, retention_days: int, user: User ) -> dict: """ Save cleanup settings (admin only). @@ -504,19 +541,20 @@ async def save_cleanup_settings_controller( # Create settings object settings = CleanupSettings( - auto_cleanup_enabled=auto_cleanup_enabled, - retention_days=retention_days + auto_cleanup_enabled=auto_cleanup_enabled, retention_days=retention_days ) # Save using OmegaConf save_cleanup_settings(settings) - logger.info(f"Admin {user.email} updated cleanup settings: auto_cleanup={auto_cleanup_enabled}, retention={retention_days}d") + logger.info( + f"Admin {user.email} updated cleanup settings: auto_cleanup={auto_cleanup_enabled}, retention={retention_days}d" + ) return { "auto_cleanup_enabled": settings.auto_cleanup_enabled, "retention_days": settings.retention_days, - "message": "Cleanup settings saved successfully" + "message": "Cleanup settings saved successfully", } @@ -526,7 +564,7 @@ async def get_speaker_configuration(user: User): return { "primary_speakers": user.primary_speakers, "user_id": user.user_id, - "status": "success" + "status": "success", } except Exception as e: logger.exception(f"Error getting speaker configuration for user {user.user_id}") @@ -540,30 +578,32 @@ async def update_speaker_configuration(user: User, primary_speakers: list[dict]) for speaker in primary_speakers: if not isinstance(speaker, dict): raise ValueError("Each speaker must be a dictionary") - + required_fields = ["speaker_id", "name", "user_id"] for field in required_fields: if field not in speaker: raise ValueError(f"Missing required field: {field}") - + # Enforce server-side user_id and add timestamp to each speaker for speaker in primary_speakers: speaker["user_id"] = user.user_id # Override client-supplied user_id speaker["selected_at"] = datetime.now(UTC).isoformat() - + # Update user model user.primary_speakers = primary_speakers await user.save() - - logger.info(f"Updated primary speakers configuration for user {user.user_id}: {len(primary_speakers)} speakers") - + + logger.info( + f"Updated primary speakers configuration for user {user.user_id}: {len(primary_speakers)} speakers" + ) + return { "message": "Primary speakers configuration updated successfully", "primary_speakers": primary_speakers, "count": len(primary_speakers), - "status": "success" + "status": "success", } - + except Exception as e: logger.exception(f"Error updating speaker configuration for user {user.user_id}") raise e @@ -578,25 +618,25 @@ async def get_enrolled_speakers(user: User): # Initialize speaker recognition client speaker_client = SpeakerRecognitionClient() - + if not speaker_client.enabled: return { "speakers": [], "service_available": False, "message": "Speaker recognition service is not configured or disabled", - "status": "success" + "status": "success", } - + # Get enrolled speakers - using hardcoded user_id=1 for now (as noted in speaker_recognition_client.py) speakers = await speaker_client.get_enrolled_speakers(user_id="1") - + return { "speakers": speakers.get("speakers", []) if speakers else [], "service_available": True, "message": "Successfully retrieved enrolled speakers", - "status": "success" + "status": "success", } - + except Exception as e: logger.exception(f"Error getting enrolled speakers for user {user.user_id}") raise e @@ -611,25 +651,25 @@ async def get_speaker_service_status(): # Initialize speaker recognition client speaker_client = SpeakerRecognitionClient() - + if not speaker_client.enabled: return { "service_available": False, "healthy": False, "message": "Speaker recognition service is not configured or disabled", - "status": "disabled" + "status": "disabled", } - + # Perform health check health_result = await speaker_client.health_check() - + if health_result: return { "service_available": True, "healthy": True, "message": "Speaker recognition service is healthy", "service_url": speaker_client.service_url, - "status": "healthy" + "status": "healthy", } else: return { @@ -637,17 +677,17 @@ async def get_speaker_service_status(): "healthy": False, "message": "Speaker recognition service is not responding", "service_url": speaker_client.service_url, - "status": "unhealthy" + "status": "unhealthy", } - + except Exception as e: logger.exception("Error checking speaker service status") raise e - # Memory Configuration Management Functions + async def get_memory_config_raw(): """Get current memory configuration (memory section of config.yml) as YAML.""" try: @@ -655,7 +695,7 @@ async def get_memory_config_raw(): if not os.path.exists(cfg_path): raise FileNotFoundError(f"Config file not found: {cfg_path}") - with open(cfg_path, 'r') as f: + with open(cfg_path, "r") as f: data = _yaml.load(f) or {} memory_section = data.get("memory", {}) stream = StringIO() @@ -691,10 +731,10 @@ async def update_memory_config_raw(config_yaml: str): shutil.copy2(cfg_path, backup_path) # Update memory section and write file - with open(cfg_path, 'r') as f: + with open(cfg_path, "r") as f: data = _yaml.load(f) or {} data["memory"] = new_mem - with open(cfg_path, 'w') as f: + with open(cfg_path, "w") as f: _yaml.dump(data, f) # Reload registry @@ -736,7 +776,11 @@ async def reload_memory_config(): try: cfg_path = _find_config_path() load_models_config(force_reload=True) - return {"message": "Configuration reloaded", "config_path": str(cfg_path), "status": "success"} + return { + "message": "Configuration reloaded", + "config_path": str(cfg_path), + "status": "success", + } except Exception as e: logger.exception("Error reloading config") raise e @@ -758,7 +802,7 @@ async def delete_all_user_memories(user: User): "message": f"Successfully deleted {deleted_count} memories", "deleted_count": deleted_count, "user_id": user.user_id, - "status": "success" + "status": "success", } except Exception as e: @@ -768,6 +812,7 @@ async def delete_all_user_memories(user: User): # Memory Provider Configuration Functions + async def get_memory_provider(): """Get current memory provider configuration.""" try: @@ -782,7 +827,7 @@ async def get_memory_provider(): return { "current_provider": current_provider, "available_providers": available_providers, - "status": "success" + "status": "success", } except Exception as e: @@ -798,7 +843,9 @@ async def set_memory_provider(provider: str): valid_providers = ["chronicle", "openmemory_mcp"] if provider not in valid_providers: - raise ValueError(f"Invalid provider '{provider}'. Valid providers: {', '.join(valid_providers)}") + raise ValueError( + f"Invalid provider '{provider}'. Valid providers: {', '.join(valid_providers)}" + ) # Path to .env file (assuming we're running from backends/advanced/) env_path = os.path.join(os.getcwd(), ".env") @@ -807,7 +854,7 @@ async def set_memory_provider(provider: str): raise FileNotFoundError(f".env file not found at {env_path}") # Read current .env file - with open(env_path, 'r') as file: + with open(env_path, "r") as file: lines = file.readlines() # Update or add MEMORY_PROVIDER line @@ -831,7 +878,7 @@ async def set_memory_provider(provider: str): logger.info(f"Created .env backup at {backup_path}") # Write updated .env file - with open(env_path, 'w') as file: + with open(env_path, "w") as file: file.writelines(updated_lines) # Update environment variable for current process @@ -845,7 +892,7 @@ async def set_memory_provider(provider: str): "env_path": env_path, "backup_created": True, "requires_restart": True, - "status": "success" + "status": "success", } except Exception as e: @@ -855,6 +902,7 @@ async def set_memory_provider(provider: str): # LLM Operations Configuration Functions + async def get_llm_operations(): """Get LLM operation configurations and available models.""" try: @@ -906,25 +954,36 @@ async def save_llm_operations(operations: dict): extra_keys = set(op_value.keys()) - valid_keys if extra_keys: - raise HTTPException(status_code=400, detail=f"Invalid keys for '{op_name}': {extra_keys}") + raise HTTPException( + status_code=400, detail=f"Invalid keys for '{op_name}': {extra_keys}" + ) if "temperature" in op_value and op_value["temperature"] is not None: t = op_value["temperature"] if not isinstance(t, (int, float)) or t < 0 or t > 2: - raise HTTPException(status_code=400, detail=f"Invalid temperature for '{op_name}': must be 0-2") + raise HTTPException( + status_code=400, detail=f"Invalid temperature for '{op_name}': must be 0-2" + ) if "max_tokens" in op_value and op_value["max_tokens"] is not None: mt = op_value["max_tokens"] if not isinstance(mt, int) or mt <= 0: - raise HTTPException(status_code=400, detail=f"Invalid max_tokens for '{op_name}': must be positive int") + raise HTTPException( + status_code=400, + detail=f"Invalid max_tokens for '{op_name}': must be positive int", + ) if "model" in op_value and op_value["model"] is not None: if not registry.get_by_name(op_value["model"]): - raise HTTPException(status_code=400, detail=f"Model '{op_value['model']}' not found in registry") + raise HTTPException( + status_code=400, detail=f"Model '{op_value['model']}' not found in registry" + ) if "response_format" in op_value and op_value["response_format"] is not None: if op_value["response_format"] != "json": - raise HTTPException(status_code=400, detail=f"response_format must be 'json' or null") + raise HTTPException( + status_code=400, detail=f"response_format must be 'json' or null" + ) if save_config_section("llm_operations", operations): load_models_config(force_reload=True) @@ -958,11 +1017,21 @@ async def test_llm_model(model_name: Optional[str]): if model_name: model_def = registry.get_by_name(model_name) if not model_def: - return {"success": False, "model_name": model_name, "error": f"Model '{model_name}' not found", "status": "error"} + return { + "success": False, + "model_name": model_name, + "error": f"Model '{model_name}' not found", + "status": "error", + } else: model_def = registry.get_default("llm") if not model_def: - return {"success": False, "model_name": None, "error": "No default LLM configured", "status": "error"} + return { + "success": False, + "model_name": None, + "error": "No default LLM configured", + "status": "error", + } client = create_openai_client( api_key=model_def.api_key or "", @@ -998,6 +1067,7 @@ async def test_llm_model(model_name: Optional[str]): # Chat Configuration Management Functions + async def get_chat_config_yaml() -> str: """Get chat system prompt as plain text.""" try: @@ -1012,11 +1082,11 @@ async def get_chat_config_yaml() -> str: if not os.path.exists(config_path): return default_prompt - with open(config_path, 'r') as f: + with open(config_path, "r") as f: full_config = _yaml.load(f) or {} - chat_config = full_config.get('chat', {}) - system_prompt = chat_config.get('system_prompt', default_prompt) + chat_config = full_config.get("chat", {}) + system_prompt = chat_config.get("system_prompt", default_prompt) # Return just the prompt text, not the YAML structure return system_prompt @@ -1042,26 +1112,26 @@ async def save_chat_config_yaml(prompt_text: str) -> dict: raise ValueError("Prompt too long (maximum 10000 characters)") # Create chat config dict - chat_config = {'system_prompt': prompt_text} + chat_config = {"system_prompt": prompt_text} # Load full config if os.path.exists(config_path): - with open(config_path, 'r') as f: + with open(config_path, "r") as f: full_config = _yaml.load(f) or {} else: full_config = {} # Backup existing config if os.path.exists(config_path): - backup_path = str(config_path) + '.backup' + backup_path = str(config_path) + ".backup" shutil.copy2(config_path, backup_path) logger.info(f"Created config backup at {backup_path}") # Update chat section - full_config['chat'] = chat_config + full_config["chat"] = chat_config # Save - with open(config_path, 'w') as f: + with open(config_path, "w") as f: _yaml.dump(full_config, f) # Reload config in memory (hot-reload) @@ -1098,6 +1168,7 @@ async def validate_chat_config_yaml(prompt_text: str) -> dict: # Plugin Configuration Management Functions + async def get_plugins_config_yaml() -> str: """Get plugins configuration as YAML text.""" try: @@ -1120,7 +1191,7 @@ async def get_plugins_config_yaml() -> str: if not plugins_yml_path.exists(): return default_config - with open(plugins_yml_path, 'r') as f: + with open(plugins_yml_path, "r") as f: yaml_content = f.read() return yaml_content @@ -1142,7 +1213,7 @@ async def save_plugins_config_yaml(yaml_content: str) -> dict: raise ValueError("Configuration must be a YAML dictionary") # Validate has 'plugins' key - if 'plugins' not in parsed_config: + if "plugins" not in parsed_config: raise ValueError("Configuration must contain 'plugins' key") except ValueError: @@ -1155,12 +1226,12 @@ async def save_plugins_config_yaml(yaml_content: str) -> dict: # Backup existing config if plugins_yml_path.exists(): - backup_path = str(plugins_yml_path) + '.backup' + backup_path = str(plugins_yml_path) + ".backup" shutil.copy2(plugins_yml_path, backup_path) logger.info(f"Created plugins config backup at {backup_path}") # Save new config - with open(plugins_yml_path, 'w') as f: + with open(plugins_yml_path, "w") as f: f.write(yaml_content) # Hot-reload plugins and signal worker restart @@ -1201,35 +1272,50 @@ async def validate_plugins_config_yaml(yaml_content: str) -> dict: if not isinstance(parsed_config, dict): return {"valid": False, "error": "Configuration must be a YAML dictionary"} - if 'plugins' not in parsed_config: + if "plugins" not in parsed_config: return {"valid": False, "error": "Configuration must contain 'plugins' key"} - plugins = parsed_config['plugins'] + plugins = parsed_config["plugins"] if not isinstance(plugins, dict): return {"valid": False, "error": "'plugins' must be a dictionary"} # Validate each plugin - valid_access_levels = ['transcript', 'conversation', 'memory'] - valid_trigger_types = ['wake_word', 'always', 'conditional'] + valid_access_levels = ["transcript", "conversation", "memory"] + valid_trigger_types = ["wake_word", "always", "conditional"] for plugin_id, plugin_config in plugins.items(): if not isinstance(plugin_config, dict): - return {"valid": False, "error": f"Plugin '{plugin_id}' config must be a dictionary"} + return { + "valid": False, + "error": f"Plugin '{plugin_id}' config must be a dictionary", + } # Check required fields - if 'enabled' in plugin_config and not isinstance(plugin_config['enabled'], bool): + if "enabled" in plugin_config and not isinstance(plugin_config["enabled"], bool): return {"valid": False, "error": f"Plugin '{plugin_id}': 'enabled' must be boolean"} - if 'access_level' in plugin_config and plugin_config['access_level'] not in valid_access_levels: - return {"valid": False, "error": f"Plugin '{plugin_id}': invalid access_level (must be one of {valid_access_levels})"} + if ( + "access_level" in plugin_config + and plugin_config["access_level"] not in valid_access_levels + ): + return { + "valid": False, + "error": f"Plugin '{plugin_id}': invalid access_level (must be one of {valid_access_levels})", + } - if 'trigger' in plugin_config: - trigger = plugin_config['trigger'] + if "trigger" in plugin_config: + trigger = plugin_config["trigger"] if not isinstance(trigger, dict): - return {"valid": False, "error": f"Plugin '{plugin_id}': 'trigger' must be a dictionary"} + return { + "valid": False, + "error": f"Plugin '{plugin_id}': 'trigger' must be a dictionary", + } - if 'type' in trigger and trigger['type'] not in valid_trigger_types: - return {"valid": False, "error": f"Plugin '{plugin_id}': invalid trigger type (must be one of {valid_trigger_types})"} + if "type" in trigger and trigger["type"] not in valid_trigger_types: + return { + "valid": False, + "error": f"Plugin '{plugin_id}': invalid trigger type (must be one of {valid_trigger_types})", + } return {"valid": True, "message": "Configuration is valid"} @@ -1314,9 +1400,11 @@ async def reload_plugins_controller(app=None) -> dict: return { "success": reload_result.get("success", False), - "message": "Plugins reloaded and worker restart signaled" - if worker_signal_sent - else "Plugins reloaded but worker restart signal failed", + "message": ( + "Plugins reloaded and worker restart signaled" + if worker_signal_sent + else "Plugins reloaded but worker restart signal failed" + ), "reload": reload_result, "worker_signal_sent": worker_signal_sent, } @@ -1324,6 +1412,7 @@ async def reload_plugins_controller(app=None) -> dict: # Structured Plugin Configuration Management Functions (Form-based UI) + async def get_plugins_metadata() -> dict: """Get plugin metadata for form-based configuration UI. @@ -1350,19 +1439,17 @@ async def get_plugins_metadata() -> dict: orchestration_configs = {} if plugins_yml_path.exists(): - with open(plugins_yml_path, 'r') as f: + with open(plugins_yml_path, "r") as f: plugins_data = _yaml.load(f) or {} - orchestration_configs = plugins_data.get('plugins', {}) + orchestration_configs = plugins_data.get("plugins", {}) # Build metadata for each plugin plugins_metadata = [] for plugin_id, plugin_class in discovered_plugins.items(): # Get orchestration config (or empty dict if not configured) - orchestration_config = orchestration_configs.get(plugin_id, { - 'enabled': False, - 'events': [], - 'condition': {'type': 'always'} - }) + orchestration_config = orchestration_configs.get( + plugin_id, {"enabled": False, "events": [], "condition": {"type": "always"}} + ) # Get complete metadata including schema metadata = get_plugin_metadata(plugin_id, plugin_class, orchestration_config) @@ -1370,10 +1457,7 @@ async def get_plugins_metadata() -> dict: logger.info(f"Retrieved metadata for {len(plugins_metadata)} plugins") - return { - "plugins": plugins_metadata, - "status": "success" - } + return {"plugins": plugins_metadata, "status": "success"} except Exception as e: logger.exception("Error getting plugins metadata") @@ -1396,7 +1480,10 @@ async def update_plugin_config_structured(plugin_id: str, config: dict) -> dict: Success message with list of updated files """ try: - from advanced_omi_backend.services.plugin_service import _get_plugins_dir, discover_plugins + from advanced_omi_backend.services.plugin_service import ( + _get_plugins_dir, + discover_plugins, + ) # Validate plugin exists discovered_plugins = discover_plugins() @@ -1406,84 +1493,83 @@ async def update_plugin_config_structured(plugin_id: str, config: dict) -> dict: updated_files = [] # 1. Update config/plugins.yml (orchestration) - if 'orchestration' in config: + if "orchestration" in config: plugins_yml_path = get_plugins_yml_path() # Load current plugins.yml if plugins_yml_path.exists(): - with open(plugins_yml_path, 'r') as f: + with open(plugins_yml_path, "r") as f: plugins_data = _yaml.load(f) or {} else: plugins_data = {} - if 'plugins' not in plugins_data: - plugins_data['plugins'] = {} + if "plugins" not in plugins_data: + plugins_data["plugins"] = {} # Update orchestration config - orchestration = config['orchestration'] - plugins_data['plugins'][plugin_id] = { - 'enabled': orchestration.get('enabled', False), - 'events': orchestration.get('events', []), - 'condition': orchestration.get('condition', {'type': 'always'}) + orchestration = config["orchestration"] + plugins_data["plugins"][plugin_id] = { + "enabled": orchestration.get("enabled", False), + "events": orchestration.get("events", []), + "condition": orchestration.get("condition", {"type": "always"}), } # Create backup if plugins_yml_path.exists(): - backup_path = str(plugins_yml_path) + '.backup' + backup_path = str(plugins_yml_path) + ".backup" shutil.copy2(plugins_yml_path, backup_path) # Create config directory if needed plugins_yml_path.parent.mkdir(parents=True, exist_ok=True) # Write updated plugins.yml - with open(plugins_yml_path, 'w') as f: + with open(plugins_yml_path, "w") as f: _yaml.dump(plugins_data, f) updated_files.append(str(plugins_yml_path)) logger.info(f"Updated orchestration config for '{plugin_id}' in {plugins_yml_path}") # 2. Update plugins/{plugin_id}/config.yml (settings with env var references) - if 'settings' in config: + if "settings" in config: plugins_dir = _get_plugins_dir() plugin_config_path = plugins_dir / plugin_id / "config.yml" # Load current config.yml if plugin_config_path.exists(): - with open(plugin_config_path, 'r') as f: + with open(plugin_config_path, "r") as f: plugin_config_data = _yaml.load(f) or {} else: plugin_config_data = {} # Update settings (preserve ${ENV_VAR} references) - settings = config['settings'] + settings = config["settings"] plugin_config_data.update(settings) # Create backup if plugin_config_path.exists(): - backup_path = str(plugin_config_path) + '.backup' + backup_path = str(plugin_config_path) + ".backup" shutil.copy2(plugin_config_path, backup_path) # Write updated config.yml - with open(plugin_config_path, 'w') as f: + with open(plugin_config_path, "w") as f: _yaml.dump(plugin_config_data, f) updated_files.append(str(plugin_config_path)) logger.info(f"Updated settings for '{plugin_id}' in {plugin_config_path}") # 3. Update per-plugin .env (only changed env vars) - if 'env_vars' in config and config['env_vars']: + if "env_vars" in config and config["env_vars"]: from advanced_omi_backend.services.plugin_service import save_plugin_env # Filter out masked values (unchanged secrets) - changed_vars = { - k: v for k, v in config['env_vars'].items() - if v != '••••••••••••' - } + changed_vars = {k: v for k, v in config["env_vars"].items() if v != "••••••••••••"} if changed_vars: env_path = save_plugin_env(plugin_id, changed_vars) updated_files.append(str(env_path)) - logger.info(f"Saved {len(changed_vars)} env var(s) to per-plugin .env for '{plugin_id}'") + logger.info( + f"Saved {len(changed_vars)} env var(s) to per-plugin .env for '{plugin_id}'" + ) # Update os.environ so hot-reload picks up changes immediately for k, v in changed_vars.items(): @@ -1505,7 +1591,7 @@ async def update_plugin_config_structured(plugin_id: str, config: dict) -> dict: "message": message, "updated_files": updated_files, "reload": reload_result, - "status": "success" + "status": "success", } except Exception as e: @@ -1541,29 +1627,29 @@ async def test_plugin_connection(plugin_id: str, config: dict) -> dict: plugin_class = discovered_plugins[plugin_id] # Check if plugin supports testing - if not hasattr(plugin_class, 'test_connection'): + if not hasattr(plugin_class, "test_connection"): return { "success": False, "message": f"Plugin '{plugin_id}' does not support connection testing", - "status": "unsupported" + "status": "unsupported", } # Build complete config from provided data test_config = {} # Merge settings - if 'settings' in config: - test_config.update(config['settings']) + if "settings" in config: + test_config.update(config["settings"]) # Load per-plugin env for resolving masked values plugin_env = load_plugin_env(plugin_id) # Add env vars (expand any ${ENV_VAR} references with test values) - if 'env_vars' in config: - for key, value in config['env_vars'].items(): + if "env_vars" in config: + for key, value in config["env_vars"].items(): # For masked values, resolve from per-plugin .env then os.environ - if value == '••••••••••••': - value = plugin_env.get(key) or os.getenv(key, '') + if value == "••••••••••••": + value = plugin_env.get(key) or os.getenv(key, "") test_config[key.lower()] = value # Expand any remaining env var references @@ -1578,15 +1664,12 @@ async def test_plugin_connection(plugin_id: str, config: dict) -> dict: except Exception as e: logger.exception(f"Error testing connection for plugin '{plugin_id}'") - return { - "success": False, - "message": f"Connection test failed: {str(e)}", - "status": "error" - } + return {"success": False, "message": f"Connection test failed: {str(e)}", "status": "error"} # Plugin Lifecycle Management Functions (create / write-code / delete) + def _snake_to_pascal(snake_str: str) -> str: """Convert snake_case to PascalCase.""" return "".join(word.capitalize() for word in snake_str.split("_")) @@ -1615,14 +1698,20 @@ async def create_plugin( Returns: Success dict with plugin_id and created_files list """ - from advanced_omi_backend.services.plugin_service import _get_plugins_dir, discover_plugins + from advanced_omi_backend.services.plugin_service import ( + _get_plugins_dir, + discover_plugins, + ) # Validate name if not plugin_name.replace("_", "").isalnum(): return {"success": False, "error": "Plugin name must be alphanumeric with underscores only"} if not re.match(r"^[a-z][a-z0-9_]*$", plugin_name): - return {"success": False, "error": "Plugin name must be lowercase snake_case starting with a letter"} + return { + "success": False, + "error": "Plugin name must be lowercase snake_case starting with a letter", + } plugins_dir = _get_plugins_dir() plugin_dir = plugins_dir / plugin_name @@ -1650,8 +1739,12 @@ async def create_plugin( (plugin_dir / "plugin.py").write_text(plugin_code, encoding="utf-8") else: # Write standard boilerplate - events_str = ", ".join(f'"{e}"' for e in events) if events else '"conversation.complete"' - boilerplate = inspect.cleandoc(f''' + events_str = ( + ", ".join(f'"{e}"' for e in events) if events else '"conversation.complete"' + ) + boilerplate = ( + inspect.cleandoc( + f''' """ {class_name} implementation. @@ -1688,7 +1781,10 @@ async def cleanup(self): async def on_conversation_complete(self, context: PluginContext) -> Optional[PluginResult]: logger.info(f"Processing conversation for user: {{context.user_id}}") return PluginResult(success=True, message="OK") - ''') + "\n" + ''' + ) + + "\n" + ) (plugin_dir / "plugin.py").write_text(boilerplate, encoding="utf-8") created_files.append("plugin.py") @@ -1699,7 +1795,7 @@ async def on_conversation_complete(self, context: PluginContext) -> Optional[Plu # config.yml config_yml = {"description": description} - with open(plugin_dir / "config.yml", 'w', encoding="utf-8") as f: + with open(plugin_dir / "config.yml", "w", encoding="utf-8") as f: _yaml.dump(config_yml, f) created_files.append("config.yml") @@ -1848,7 +1944,10 @@ async def delete_plugin(plugin_id: str, remove_files: bool = False) -> dict: logger.info(f"Removed plugin directory: {plugin_dir}") if not removed_from_yml and not files_removed: - return {"success": False, "error": f"Plugin '{plugin_id}' not found in plugins.yml or on disk"} + return { + "success": False, + "error": f"Plugin '{plugin_id}' not found in plugins.yml or on disk", + } logger.info(f"Deleted plugin '{plugin_id}' (yml={removed_from_yml}, files={files_removed})") return { diff --git a/backends/advanced/src/advanced_omi_backend/llm_client.py b/backends/advanced/src/advanced_omi_backend/llm_client.py index 96ccc77b..8b5f2d43 100644 --- a/backends/advanced/src/advanced_omi_backend/llm_client.py +++ b/backends/advanced/src/advanced_omi_backend/llm_client.py @@ -11,7 +11,7 @@ from typing import Any, Dict, Optional from advanced_omi_backend.model_registry import get_models_registry -from advanced_omi_backend.openai_factory import create_openai_client, is_langfuse_enabled +from advanced_omi_backend.openai_factory import create_openai_client from advanced_omi_backend.services.memory.config import ( load_config_yml as _load_root_config, ) @@ -62,7 +62,9 @@ def __init__( self.base_url = base_url self.model = model if not self.api_key or not self.base_url or not self.model: - raise ValueError(f"LLM configuration incomplete: api_key={'set' if self.api_key else 'MISSING'}, base_url={'set' if self.base_url else 'MISSING'}, model={'set' if self.model else 'MISSING'}") + raise ValueError( + f"LLM configuration incomplete: api_key={'set' if self.api_key else 'MISSING'}, base_url={'set' if self.base_url else 'MISSING'}, model={'set' if self.model else 'MISSING'}" + ) # Initialize OpenAI client with optional Langfuse tracing try: @@ -78,31 +80,32 @@ def __init__( raise def generate( - self, prompt: str, model: str | None = None, temperature: float | None = None, - **langfuse_kwargs, + self, + prompt: str, + model: str | None = None, + temperature: float | None = None, ) -> str: """Generate text completion using OpenAI-compatible API.""" try: model_name = model or self.model temp = temperature if temperature is not None else self.temperature - params = { - "model": model_name, - "messages": [{"role": "user", "content": prompt}], - "temperature": temp, - } - if is_langfuse_enabled(): - params.update(langfuse_kwargs) - - response = self.client.chat.completions.create(**params) + response = self.client.chat.completions.create( + model=model_name, + messages=[{"role": "user", "content": prompt}], + temperature=temp, + ) return response.choices[0].message.content.strip() except Exception as e: self.logger.error(f"Error generating completion: {e}") raise def chat_with_tools( - self, messages: list, tools: list | None = None, model: str | None = None, - temperature: float | None = None, **langfuse_kwargs, + self, + messages: list, + tools: list | None = None, + model: str | None = None, + temperature: float | None = None, ): """Chat completion with tool/function calling support. Returns raw response object.""" model_name = model or self.model @@ -113,8 +116,6 @@ def chat_with_tools( } if tools: params["tools"] = tools - if is_langfuse_enabled(): - params.update(langfuse_kwargs) return self.client.chat.completions.create(**params) def health_check(self) -> Dict: @@ -157,11 +158,13 @@ class LLMClientFactory: def create_client() -> LLMClient: """Create an LLM client based on model registry configuration (config.yml).""" registry = get_models_registry() - + if registry: llm_def = registry.get_default("llm") if llm_def: - logger.info(f"Creating LLM client from registry: {llm_def.name} ({llm_def.model_provider})") + logger.info( + f"Creating LLM client from registry: {llm_def.name} ({llm_def.model_provider})" + ) params = llm_def.model_params or {} return OpenAILLMClient( api_key=llm_def.api_key, @@ -169,7 +172,7 @@ def create_client() -> LLMClient: model=llm_def.model_name, temperature=params.get("temperature", 0.1), ) - + raise ValueError("No default LLM defined in config.yml") @staticmethod @@ -196,20 +199,12 @@ def reset_llm_client(): _llm_client = None -def _langfuse_metadata(session_id: str | None) -> dict: - """Return metadata dict with langfuse_session_id if Langfuse is enabled.""" - if session_id and is_langfuse_enabled(): - return {"langfuse_session_id": session_id} - return {} - - # Async wrapper for blocking LLM operations async def async_generate( prompt: str, model: str | None = None, temperature: float | None = None, operation: str | None = None, - langfuse_session_id: str | None = None, ) -> str: """Async wrapper for LLM text generation. @@ -218,9 +213,8 @@ async def async_generate( The resolved config determines model, temperature, max_tokens, etc. Explicit ``model``/``temperature`` kwargs still override the resolved values. - When ``langfuse_session_id`` is provided and Langfuse is enabled, - the session ID is set on the current Langfuse trace to group all - LLM calls for a conversation. + Tracing is handled automatically by the OTEL instrumentor; use + ``set_otel_session()`` at job boundaries to group calls by session. """ if operation: registry = get_models_registry() @@ -233,16 +227,13 @@ async def async_generate( if model is not None: api_params["model"] = model api_params["messages"] = [{"role": "user", "content": prompt}] - api_params["metadata"] = _langfuse_metadata(langfuse_session_id) response = await client.chat.completions.create(**api_params) return response.choices[0].message.content.strip() # Fallback: use singleton client client = get_llm_client() loop = asyncio.get_running_loop() - return await loop.run_in_executor( - None, lambda: client.generate(prompt, model, temperature) - ) + return await loop.run_in_executor(None, lambda: client.generate(prompt, model, temperature)) async def async_chat_with_tools( @@ -251,11 +242,11 @@ async def async_chat_with_tools( model: str | None = None, temperature: float | None = None, operation: str | None = None, - langfuse_session_id: str | None = None, ): """Async wrapper for chat completion with tool calling. When ``operation`` is provided, parameters are resolved from config. + Tracing is handled automatically by the OTEL instrumentor. """ if operation: registry = get_models_registry() @@ -270,7 +261,6 @@ async def async_chat_with_tools( api_params["messages"] = messages if tools: api_params["tools"] = tools - api_params["metadata"] = _langfuse_metadata(langfuse_session_id) return await client.chat.completions.create(**api_params) # Fallback: use singleton client diff --git a/backends/advanced/src/advanced_omi_backend/observability/otel_setup.py b/backends/advanced/src/advanced_omi_backend/observability/otel_setup.py index 488dcb0d..dffa4f1e 100644 --- a/backends/advanced/src/advanced_omi_backend/observability/otel_setup.py +++ b/backends/advanced/src/advanced_omi_backend/observability/otel_setup.py @@ -26,6 +26,16 @@ def is_galileo_enabled() -> bool: return bool(os.getenv("GALILEO_API_KEY")) +@lru_cache(maxsize=1) +def is_langfuse_enabled() -> bool: + """Check if Langfuse OTEL is configured.""" + return bool( + os.getenv("LANGFUSE_PUBLIC_KEY") + and os.getenv("LANGFUSE_SECRET_KEY") + and os.getenv("LANGFUSE_HOST") + ) + + def is_otel_enabled() -> bool: """Check if any OTel exporter has been initialised.""" return _otel_initialised @@ -66,38 +76,83 @@ def clear_otel_session() -> None: def init_otel() -> None: - """Initialize OTEL with Galileo exporter and OpenAI instrumentor. + """Initialize OTEL with configured exporters and OpenAI instrumentor. - Call once at app startup. Safe to call if Galileo is not configured (no-op). + Supports multiple backends simultaneously: + - Galileo: if GALILEO_API_KEY is set + - Langfuse: if LANGFUSE_PUBLIC_KEY, LANGFUSE_SECRET_KEY, LANGFUSE_HOST are set + + Call once at app startup. No-op if no backends are configured. """ - if not is_galileo_enabled(): - logger.info("Galileo not configured, skipping OTEL initialization") + galileo = is_galileo_enabled() + langfuse = is_langfuse_enabled() + + if not galileo and not langfuse: + logger.info("No OTEL backends configured (Galileo/Langfuse), skipping initialization") return try: - from galileo import otel - from openinference.instrumentation.openai import OpenAIInstrumentor from opentelemetry.sdk import trace as trace_sdk - project = os.getenv("GALILEO_PROJECT", "chronicle") - logstream = os.getenv("GALILEO_LOG_STREAM", "default") - tracer_provider = trace_sdk.TracerProvider() - galileo_processor = otel.GalileoSpanProcessor( - project=project, logstream=logstream - ) - tracer_provider.add_span_processor(galileo_processor) - - # Auto-instrument all OpenAI SDK calls - OpenAIInstrumentor().instrument(tracer_provider=tracer_provider) + backends = [] + + # --- Galileo backend --- + if galileo: + try: + from galileo import otel + + project = os.getenv("GALILEO_PROJECT", "chronicle") + logstream = os.getenv("GALILEO_LOG_STREAM", "default") + galileo_processor = otel.GalileoSpanProcessor(project=project, logstream=logstream) + tracer_provider.add_span_processor(galileo_processor) + backends.append("Galileo") + except ImportError: + logger.warning( + "Galileo packages not installed. " "Install with: uv pip install '.[galileo]'" + ) + except Exception as e: + logger.error(f"Failed to add Galileo span processor: {e}") + + # --- Langfuse backend --- + if langfuse: + try: + from langfuse.opentelemetry import LangfuseSpanProcessor + + langfuse_processor = LangfuseSpanProcessor() + tracer_provider.add_span_processor(langfuse_processor) + backends.append("Langfuse") + except ImportError: + logger.warning( + "Langfuse OTEL packages not installed. " "Ensure langfuse>=3.13.0 is installed." + ) + except Exception as e: + logger.error(f"Failed to add Langfuse span processor: {e}") + + if not backends: + logger.warning("No OTEL span processors were successfully added") + return + + # Auto-instrument all OpenAI SDK calls (backend-agnostic) + try: + from openinference.instrumentation.openai import OpenAIInstrumentor + + OpenAIInstrumentor().instrument(tracer_provider=tracer_provider) + except ImportError: + logger.warning( + "OpenAI OTEL instrumentor not installed. " + "Install with: uv pip install '.[galileo]'" + ) + return global _otel_initialised _otel_initialised = True - logger.info("OTEL initialized with Galileo exporter + OpenAI instrumentor") + logger.info( + f"OTEL initialized with {' + '.join(backends)} exporter(s) + OpenAI instrumentor" + ) except ImportError: logger.warning( - "Galileo/OTEL packages not installed. " - "Install with: uv pip install '.[galileo]'" + "OTEL SDK packages not installed. " "Install opentelemetry-api and opentelemetry-sdk." ) except Exception as e: logger.error(f"Failed to initialize OTEL: {e}") diff --git a/backends/advanced/src/advanced_omi_backend/openai_factory.py b/backends/advanced/src/advanced_omi_backend/openai_factory.py index 17f6eba1..b10f72f0 100644 --- a/backends/advanced/src/advanced_omi_backend/openai_factory.py +++ b/backends/advanced/src/advanced_omi_backend/openai_factory.py @@ -1,29 +1,22 @@ -"""Centralized OpenAI client factory with optional LangFuse tracing. +"""Centralized OpenAI client factory. Single source of truth for creating OpenAI/AsyncOpenAI clients. All other modules that need an OpenAI client should use this factory instead of -duplicating LangFuse detection logic. +creating clients directly. + +Tracing is handled by the OTEL instrumentor (see observability/otel_setup.py), +which auto-instruments all OpenAI calls at startup. No per-client wrapping needed. """ import logging -import os -from functools import lru_cache - -logger = logging.getLogger(__name__) +import openai -@lru_cache(maxsize=1) -def is_langfuse_enabled() -> bool: - """Check if LangFuse is properly configured (cached).""" - return bool( - os.getenv("LANGFUSE_PUBLIC_KEY") - and os.getenv("LANGFUSE_SECRET_KEY") - and os.getenv("LANGFUSE_HOST") - ) +logger = logging.getLogger(__name__) def create_openai_client(api_key: str, base_url: str, is_async: bool = False): - """Create an OpenAI client with optional LangFuse tracing. + """Create an OpenAI client. Args: api_key: OpenAI API key @@ -31,18 +24,9 @@ def create_openai_client(api_key: str, base_url: str, is_async: bool = False): is_async: Whether to return AsyncOpenAI or sync OpenAI client Returns: - OpenAI or AsyncOpenAI client instance (with or without LangFuse wrapping) + OpenAI or AsyncOpenAI client instance """ - if is_langfuse_enabled(): - import langfuse.openai as openai_module - - logger.debug("Creating OpenAI client with LangFuse tracing") - else: - import openai as openai_module - - logger.debug("Creating OpenAI client without tracing") - if is_async: - return openai_module.AsyncOpenAI(api_key=api_key, base_url=base_url) + return openai.AsyncOpenAI(api_key=api_key, base_url=base_url) else: - return openai_module.OpenAI(api_key=api_key, base_url=base_url) + return openai.OpenAI(api_key=api_key, base_url=base_url) diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py index 29719566..cb0f7137 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py @@ -382,6 +382,14 @@ async def clear_jobs( for job_id in job_ids: try: job = Job.fetch(job_id, connection=redis_conn) + # Skip jobs that are currently running (their ID may have been + # reused by a new session's job with the same ID) + if job.get_status() in ("started", "queued", "deferred"): + logger.debug( + f"Skipping {registry_name} job {job_id}: currently {job.get_status()}" + ) + registry.remove(job_id) # Remove stale registry entry only + continue job.delete() total_removed += 1 except Exception: diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/base.py b/backends/advanced/src/advanced_omi_backend/services/memory/base.py index bae18e56..9eddddbc 100644 --- a/backends/advanced/src/advanced_omi_backend/services/memory/base.py +++ b/backends/advanced/src/advanced_omi_backend/services/memory/base.py @@ -341,8 +341,10 @@ class LLMProviderBase(ABC): @abstractmethod async def extract_memories( - self, text: str, prompt: str, user_id: Optional[str] = None, - langfuse_session_id: Optional[str] = None, + self, + text: str, + prompt: str, + user_id: Optional[str] = None, ) -> List[str]: """Extract meaningful fact memories from text using an LLM. @@ -350,7 +352,6 @@ async def extract_memories( text: Input text to extract memories from prompt: System prompt to guide the extraction process user_id: Optional user ID for per-user prompt override resolution - langfuse_session_id: Optional session ID for Langfuse trace grouping Returns: List of extracted fact memory strings @@ -358,7 +359,10 @@ async def extract_memories( pass @abstractmethod - async def generate_embeddings(self, texts: List[str]) -> List[List[float]]: + async def generate_embeddings( + self, + texts: List[str], + ) -> List[List[float]]: """Generate vector embeddings for the given texts. Args: @@ -375,7 +379,6 @@ async def propose_memory_actions( retrieved_old_memory: List[Dict[str, str]], new_facts: List[str], custom_prompt: Optional[str] = None, - langfuse_session_id: Optional[str] = None, ) -> Dict[str, Any]: """Propose memory management actions based on existing and new information. @@ -401,7 +404,6 @@ async def propose_reprocess_actions( diff_context: str, new_transcript: str, custom_prompt: Optional[str] = None, - langfuse_session_id: Optional[str] = None, ) -> Dict[str, Any]: """Propose memory updates after transcript reprocessing (e.g., speaker changes). diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py index d1f51775..2363e5a8 100644 --- a/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py +++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py @@ -155,8 +155,9 @@ async def add_memory( if self.config.extraction_enabled and self.config.extraction_prompt: fact_memories_text = await asyncio.wait_for( self.llm_provider.extract_memories( - transcript, self.config.extraction_prompt, user_id=user_id, - langfuse_session_id=source_id, + transcript, + self.config.extraction_prompt, + user_id=user_id, ), timeout=self.config.timeout_seconds, ) @@ -194,8 +195,12 @@ async def add_memory( if allow_update and fact_memories_text: memory_logger.info(f"🔍 Allowing update for {source_id}") created_ids = await self._process_memory_updates( - fact_memories_text, embeddings, user_id, client_id, source_id, user_email, - langfuse_session_id=source_id, + fact_memories_text, + embeddings, + user_id, + client_id, + source_id, + user_email, ) else: memory_logger.info(f"🔍 Not allowing update for {source_id}") @@ -533,9 +538,7 @@ async def reprocess_memory( try: # 1. Get existing memories for this conversation - existing_memories = await self.vector_store.get_memories_by_source( - user_id, source_id - ) + existing_memories = await self.vector_store.get_memories_by_source(user_id, source_id) # 2. If no existing memories, fall back to normal extraction if not existing_memories: @@ -544,7 +547,11 @@ async def reprocess_memory( f"falling back to normal extraction" ) return await self.add_memory( - transcript, client_id, source_id, user_id, user_email, + transcript, + client_id, + source_id, + user_id, + user_email, allow_update=True, ) @@ -555,7 +562,11 @@ async def reprocess_memory( f"falling back to normal extraction" ) return await self.add_memory( - transcript, client_id, source_id, user_id, user_email, + transcript, + client_id, + source_id, + user_id, + user_email, allow_update=True, ) @@ -580,24 +591,29 @@ async def reprocess_memory( existing_memories=existing_memory_dicts, diff_context=diff_text, new_transcript=transcript, - langfuse_session_id=source_id, - ) - memory_logger.info( - f"🔄 Reprocess LLM returned actions: {actions_obj}" ) + memory_logger.info(f"🔄 Reprocess LLM returned actions: {actions_obj}") except NotImplementedError: memory_logger.warning( "LLM provider does not support propose_reprocess_actions, " "falling back to normal extraction" ) return await self.add_memory( - transcript, client_id, source_id, user_id, user_email, + transcript, + client_id, + source_id, + user_id, + user_email, allow_update=True, ) except Exception as e: memory_logger.error(f"Reprocess LLM call failed: {e}") return await self.add_memory( - transcript, client_id, source_id, user_id, user_email, + transcript, + client_id, + source_id, + user_id, + user_email, allow_update=True, ) @@ -619,13 +635,9 @@ async def reprocess_memory( self.llm_provider.generate_embeddings(texts_needing_embeddings), timeout=self.config.timeout_seconds, ) - text_to_embedding = dict( - zip(texts_needing_embeddings, embeddings, strict=True) - ) + text_to_embedding = dict(zip(texts_needing_embeddings, embeddings, strict=True)) except Exception as e: - memory_logger.warning( - f"Batch embedding generation failed for reprocess: {e}" - ) + memory_logger.warning(f"Batch embedding generation failed for reprocess: {e}") # 8. Apply the actions (reuses existing infrastructure) created_ids = await self._apply_memory_actions( @@ -639,21 +651,20 @@ async def reprocess_memory( ) memory_logger.info( - f"✅ Reprocess complete for {source_id}: " - f"{len(created_ids)} memories affected" + f"✅ Reprocess complete for {source_id}: " f"{len(created_ids)} memories affected" ) return True, created_ids except Exception as e: - memory_logger.error( - f"❌ Reprocess memory failed for {source_id}: {e}" - ) + memory_logger.error(f"❌ Reprocess memory failed for {source_id}: {e}") # Fall back to normal extraction on any unexpected error - memory_logger.info( - f"🔄 Falling back to normal extraction after reprocess error" - ) + memory_logger.info(f"🔄 Falling back to normal extraction after reprocess error") return await self.add_memory( - transcript, client_id, source_id, user_id, user_email, + transcript, + client_id, + source_id, + user_id, + user_email, allow_update=True, ) @@ -688,8 +699,7 @@ def _format_speaker_diff(transcript_diff: list) -> str: ) elif change_type == "new_segment": lines.append( - f"- New segment: {change.get('speaker', '?')}: " - f"\"{change.get('text', '')}\"" + f"- New segment: {change.get('speaker', '?')}: " f"\"{change.get('text', '')}\"" ) return "\n".join(lines) @@ -789,7 +799,6 @@ async def _process_memory_updates( client_id: str, source_id: str, user_email: str, - langfuse_session_id: Optional[str] = None, ) -> List[str]: """Process memory updates using LLM-driven action proposals. @@ -852,7 +861,6 @@ async def _process_memory_updates( retrieved_old_memory=retrieved_old_memory, new_facts=memories_text, custom_prompt=None, - langfuse_session_id=langfuse_session_id, ) memory_logger.info(f"📝 UpdateMemory LLM returned: {type(actions_obj)} - {actions_obj}") except Exception as e_actions: diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py index 3a81b53e..4d440fba 100644 --- a/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py +++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py @@ -15,10 +15,7 @@ from typing import Any, Dict, List, Optional from advanced_omi_backend.model_registry import ModelDef, get_models_registry -from advanced_omi_backend.openai_factory import ( - create_openai_client, - is_langfuse_enabled, -) +from advanced_omi_backend.openai_factory import create_openai_client from advanced_omi_backend.prompt_registry import get_prompt_registry from advanced_omi_backend.utils.text_chunking import semantic_chunk_text @@ -42,13 +39,6 @@ memory_logger = logging.getLogger("memory_service") -def _langfuse_metadata(session_id: str | None) -> dict: - """Return metadata dict with langfuse_session_id if Langfuse is enabled.""" - if session_id and is_langfuse_enabled(): - return {"langfuse_session_id": session_id} - return {} - - def _get_openai_client(api_key: str, base_url: str, is_async: bool = False): """Get OpenAI client with optional Langfuse tracing. @@ -75,10 +65,7 @@ async def generate_openai_embeddings( base_url=base_url, is_async=True, ) - response = await client.embeddings.create( - model=model, - input=texts, - ) + response = await client.embeddings.create(model=model, input=texts) return [data.embedding for data in response.data] @@ -160,9 +147,7 @@ def __init__(self, config: Dict[str, Any]): # Ignore provider-specific envs; use registry as single source of truth registry = get_models_registry() if not registry: - raise RuntimeError( - "config.yml not found or invalid; cannot initialize model registry" - ) + raise RuntimeError("config.yml not found or invalid; cannot initialize model registry") self._registry = registry @@ -182,12 +167,8 @@ def __init__(self, config: Dict[str, Any]): self.embedding_model = ( self.embed_def.model_name if self.embed_def else self.llm_def.model_name ) - self.embedding_api_key = ( - self.embed_def.api_key if self.embed_def else self.api_key - ) - self.embedding_base_url = ( - self.embed_def.model_url if self.embed_def else self.base_url - ) + self.embedding_api_key = self.embed_def.api_key if self.embed_def else self.api_key + self.embedding_base_url = self.embed_def.model_url if self.embed_def else self.base_url # CRITICAL: Validate API keys are present - fail fast instead of hanging if not self.api_key or self.api_key.strip() == "": @@ -197,9 +178,7 @@ def __init__(self, config: Dict[str, Any]): f"Cannot proceed without valid API credentials." ) - if self.embed_def and ( - not self.embedding_api_key or self.embedding_api_key.strip() == "" - ): + if self.embed_def and (not self.embedding_api_key or self.embedding_api_key.strip() == ""): raise RuntimeError( f"API key is missing or empty for embedding provider '{self.embed_def.model_provider}' (model: {self.embedding_model}). " f"Please set the API key in config.yml or environment variables." @@ -213,7 +192,6 @@ async def extract_memories( text: str, prompt: str, user_id: Optional[str] = None, - langfuse_session_id: Optional[str] = None, ) -> List[str]: """Extract memories using OpenAI API with the enhanced fact retrieval prompt. @@ -221,7 +199,6 @@ async def extract_memories( text: Input text to extract memories from prompt: System prompt to guide extraction (uses default if empty) user_id: Optional user ID for per-user prompt override resolution - langfuse_session_id: Optional session ID for Langfuse trace grouping Returns: List of extracted memory strings @@ -248,9 +225,7 @@ async def _embed_for_chunking(texts: List[str]) -> List[List[float]]: model=self.embedding_model, ) - chunking_config = self._registry.memory.get("extraction", {}).get( - "chunking", {} - ) + chunking_config = self._registry.memory.get("extraction", {}).get("chunking", {}) dialogue_turns = [line for line in text.split("\n") if line.strip()] text_chunks = await semantic_chunk_text( text, @@ -266,9 +241,7 @@ async def _embed_for_chunking(texts: List[str]) -> List[List[float]]: # Process all chunks in sequence, not concurrently results = [ - await self._process_chunk( - system_prompt, chunk, i, langfuse_session_id=langfuse_session_id - ) + await self._process_chunk(system_prompt, chunk, i) for i, chunk in enumerate(text_chunks) ] @@ -289,7 +262,6 @@ async def _process_chunk( system_prompt: str, chunk: str, index: int, - langfuse_session_id: Optional[str] = None, ) -> List[str]: """Process a single text chunk to extract memories using OpenAI API. @@ -301,7 +273,6 @@ async def _process_chunk( system_prompt: System prompt that guides the memory extraction behavior chunk: Individual text chunk to process for memory extraction index: Index of the chunk for logging and error tracking purposes - langfuse_session_id: Optional session ID for Langfuse trace grouping Returns: List of extracted memory fact strings from the chunk. Returns empty list @@ -320,7 +291,6 @@ async def _process_chunk( {"role": "system", "content": system_prompt}, {"role": "user", "content": chunk}, ], - metadata=_langfuse_metadata(langfuse_session_id), ) facts = (response.choices[0].message.content or "").strip() if not facts: @@ -332,7 +302,10 @@ async def _process_chunk( memory_logger.error(f"Error processing chunk {index}: {e}") return [] - async def generate_embeddings(self, texts: List[str]) -> List[List[float]]: + async def generate_embeddings( + self, + texts: List[str], + ) -> List[List[float]]: """Generate embeddings using OpenAI API. Args: @@ -381,7 +354,6 @@ async def propose_memory_actions( retrieved_old_memory: List[Dict[str, str]] | List[str], new_facts: List[str], custom_prompt: Optional[str] = None, - langfuse_session_id: Optional[str] = None, ) -> Dict[str, Any]: """Use OpenAI chat completion with enhanced prompt to propose memory actions. @@ -389,7 +361,6 @@ async def propose_memory_actions( retrieved_old_memory: List of existing memories for context new_facts: List of new facts to process custom_prompt: Optional custom prompt to override default - langfuse_session_id: Optional session ID for Langfuse trace grouping Returns: Dictionary containing proposed memory actions @@ -409,7 +380,6 @@ async def propose_memory_actions( response = await client.chat.completions.create( **op.to_api_params(), messages=update_memory_messages, - metadata=_langfuse_metadata(langfuse_session_id), ) content = (response.choices[0].message.content or "").strip() if not content: @@ -434,7 +404,6 @@ async def propose_reprocess_actions( diff_context: str, new_transcript: str, custom_prompt: Optional[str] = None, - langfuse_session_id: Optional[str] = None, ) -> Dict[str, Any]: """Propose memory updates after speaker re-identification. @@ -464,9 +433,7 @@ async def propose_reprocess_actions( else: try: registry = get_prompt_registry() - system_prompt = await registry.get_prompt( - "memory.reprocess_speaker_update" - ) + system_prompt = await registry.get_prompt("memory.reprocess_speaker_update") except Exception as e: memory_logger.debug( f"Registry prompt fetch failed for " @@ -497,7 +464,6 @@ async def propose_reprocess_actions( response = await client.chat.completions.create( **op.to_api_params(), messages=messages, - metadata=_langfuse_metadata(langfuse_session_id), ) content = (response.choices[0].message.content or "").strip() @@ -553,16 +519,12 @@ def _parse_memories_content(content: str) -> List[str]: for key in ("facts", "preferences"): value = parsed.get(key) if isinstance(value, list): - collected.extend( - [str(item).strip() for item in value if str(item).strip()] - ) + collected.extend([str(item).strip() for item in value if str(item).strip()]) # If the dict didn't contain expected keys, try to flatten any list values if not collected: for value in parsed.values(): if isinstance(value, list): - collected.extend( - [str(item).strip() for item in value if str(item).strip()] - ) + collected.extend([str(item).strip() for item in value if str(item).strip()]) if collected: return collected except Exception: @@ -597,17 +559,13 @@ def _try_parse_list_or_object(text: str) -> List[str] | None: for key in ("facts", "preferences"): value = data.get(key) if isinstance(value, list): - collected.extend( - [str(item).strip() for item in value if str(item).strip()] - ) + collected.extend([str(item).strip() for item in value if str(item).strip()]) if collected: return collected # As a last attempt, flatten any list values for value in data.values(): if isinstance(value, list): - collected.extend( - [str(item).strip() for item in value if str(item).strip()] - ) + collected.extend([str(item).strip() for item in value if str(item).strip()]) return collected if collected else None except Exception: return None diff --git a/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py b/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py index 63036ce1..96c52f57 100644 --- a/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py +++ b/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py @@ -118,7 +118,9 @@ def analyze_speech(transcript_data: dict) -> dict: else: # Check minimum duration threshold when we have timing data min_duration = settings.get("min_duration", 10.0) - logger.info(f"📏 Comparing duration {speech_duration:.1f}s vs threshold {min_duration:.1f}s") + logger.info( + f"📏 Comparing duration {speech_duration:.1f}s vs threshold {min_duration:.1f}s" + ) if speech_duration < min_duration: return { "has_speech": False, @@ -164,7 +166,6 @@ async def generate_title_and_summary( text: str, segments: Optional[list] = None, user_id: Optional[str] = None, - langfuse_session_id: Optional[str] = None, ) -> tuple[str, str]: """ Generate title and short summary in a single LLM call using full conversation context. @@ -222,7 +223,7 @@ async def generate_title_and_summary( "{conversation_text}" """ - response = await async_generate(prompt, operation="title_summary", langfuse_session_id=langfuse_session_id) + response = await async_generate(prompt, operation="title_summary") # Parse response for Title: and Summary: lines title = None @@ -249,12 +250,10 @@ async def generate_title_and_summary( return fallback_title or "Conversation", fallback_summary or "No content" - async def generate_detailed_summary( text: str, segments: Optional[list] = None, memory_context: Optional[str] = None, - langfuse_session_id: Optional[str] = None, ) -> str: """ Generate a comprehensive, detailed summary of the conversation. @@ -330,7 +329,7 @@ async def generate_detailed_summary( "{conversation_text}" """ - summary = await async_generate(prompt, operation="detailed_summary", langfuse_session_id=langfuse_session_id) + summary = await async_generate(prompt, operation="detailed_summary") return summary.strip().strip('"').strip("'") or "No meaningful content to summarize" except Exception as e: @@ -350,7 +349,6 @@ async def generate_detailed_summary( # ============================================================================ - def extract_speakers_from_segments(segments: list) -> List[str]: """ Extract unique speaker names from segments. @@ -364,7 +362,11 @@ def extract_speakers_from_segments(segments: list) -> List[str]: speakers = [] if segments: for seg in segments: - speaker = seg.get("speaker", "Unknown") if isinstance(seg, dict) else (seg.speaker or "Unknown") + speaker = ( + seg.get("speaker", "Unknown") + if isinstance(seg, dict) + else (seg.speaker or "Unknown") + ) if speaker and speaker != "Unknown" and speaker not in speakers: speakers.append(speaker) return speakers diff --git a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py index 34285062..ba2a4ee0 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py @@ -109,9 +109,7 @@ async def handle_end_of_conversation( from advanced_omi_backend.models.conversation import Conversation - conversation = await Conversation.find_one( - Conversation.conversation_id == conversation_id - ) + conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id) if conversation: # Convert string to enum try: @@ -126,9 +124,7 @@ async def handle_end_of_conversation( f"💾 Saved conversation {conversation_id[:12]} end_reason: {conversation.end_reason}" ) else: - logger.warning( - f"⚠️ Conversation {conversation_id} not found for end reason tracking" - ) + logger.warning(f"⚠️ Conversation {conversation_id} not found for end reason tracking") # Increment conversation count for this session conversation_count_key = f"session:conversation_count:{session_id}" @@ -138,15 +134,36 @@ async def handle_end_of_conversation( # Check if session is still active (user still recording) and restart listening jobs session_key = f"audio:session:{session_id}" - session_status = await redis_client.hget(session_key, "status") - if session_status: - status_str = ( - session_status.decode() - if isinstance(session_status, bytes) - else session_status - ) + # Fetch both status and websocket_connected in one Redis call + status_raw, ws_connected_raw = await redis_client.hmget( + session_key, "status", "websocket_connected" + ) + if status_raw: + status_str = status_raw.decode() if isinstance(status_raw, bytes) else status_raw + ws_connected = ( + ws_connected_raw.decode() + if isinstance(ws_connected_raw, bytes) + else (ws_connected_raw or "false") + ) == "true" + + # Determine if we should restart speech detection + should_restart = False if status_str == "active": + should_restart = True + elif ws_connected: + # Race condition recovery: WebSocket is still connected but status got + # corrupted (e.g., status endpoint polling set "finished" during the + # inter-conversation gap). Reset status and restart anyway. + logger.warning( + f"⚠️ Race condition recovery for session {session_id[:12]}: " + f"status={status_str} but websocket_connected=true. " + f"Resetting status to 'active' and restarting speech detection." + ) + await redis_client.hset(session_key, "status", "active") + should_restart = True + + if should_restart: # Session still active - enqueue new speech detection for next conversation logger.info( f"🔄 Enqueueing new speech detection (conversation #{conversation_count + 1})" @@ -175,7 +192,7 @@ async def handle_end_of_conversation( client_id, job_timeout=86400, # 24 hours to match max_runtime in stream_speech_detection_job result_ttl=JOB_RESULT_TTL, - job_id=f"speech-detect_{session_id[:12]}_{conversation_count}", + job_id=f"speech-detect_{session_id}_{conversation_count}", description=f"Listening for speech (conversation #{conversation_count + 1})", meta={"client_id": client_id, "session_level": True}, ) @@ -192,7 +209,8 @@ async def handle_end_of_conversation( logger.info(f"✅ Enqueued speech detection job {speech_job.id}") else: logger.info( - f"Session {session_id} status={status_str}, not restarting (user stopped recording)" + f"Session {session_id} status={status_str}, ws_connected={ws_connected}, " + f"not restarting (user stopped recording)" ) else: logger.info(f"Session {session_id} not found, not restarting (session ended)") @@ -246,9 +264,7 @@ def _validate_segments(segments: list) -> list: start = seg.get("start", 0.0) end = seg.get("end", 0.0) if end <= start: - logger.debug( - f"Segment {i} has invalid timing (start={start}, end={end}), correcting" - ) + logger.debug(f"Segment {i} has invalid timing (start={start}, end={end}), correcting") estimated_duration = len(text.split()) * 0.5 # ~0.5 seconds per word seg["end"] = start + estimated_duration @@ -297,9 +313,7 @@ async def _initialize_conversation( conversation = None if existing_conversation_id_bytes: existing_conversation_id = existing_conversation_id_bytes.decode() - logger.info( - f"🔍 Found Redis key with conversation_id={existing_conversation_id}" - ) + logger.info(f"🔍 Found Redis key with conversation_id={existing_conversation_id}") # Try to fetch the existing conversation by conversation_id conversation = await Conversation.find_one( @@ -314,16 +328,13 @@ async def _initialize_conversation( f"processing_status={processing_status}" ) else: - logger.warning( - f"⚠️ Conversation {existing_conversation_id} not found in database!" - ) + logger.warning(f"⚠️ Conversation {existing_conversation_id} not found in database!") # Verify it's a placeholder conversation (always_persist=True, processing_status='pending_transcription') if ( conversation and getattr(conversation, "always_persist", False) - and getattr(conversation, "processing_status", None) - == "pending_transcription" + and getattr(conversation, "processing_status", None) == "pending_transcription" ): logger.info( f"🔄 Reusing placeholder conversation {conversation.conversation_id} for session {session_id}" @@ -342,9 +353,7 @@ async def _initialize_conversation( ) conversation = None else: - logger.info( - f"🔍 No Redis key found for {conversation_key}, creating new conversation" - ) + logger.info(f"🔍 No Redis key found for {conversation_key}, creating new conversation") # If no valid placeholder found, create new conversation if not conversation: @@ -356,18 +365,14 @@ async def _initialize_conversation( ) await conversation.insert() conversation_id = conversation.conversation_id - logger.info( - f"✅ Created streaming conversation {conversation_id} for session {session_id}" - ) + logger.info(f"✅ Created streaming conversation {conversation_id} for session {session_id}") # Attach markers from Redis session (e.g., button events captured during streaming) session_key = f"audio:session:{session_id}" markers_json = await redis_client.hget(session_key, "markers") if markers_json: try: - markers_data = ( - markers_json if isinstance(markers_json, str) else markers_json.decode() - ) + markers_data = markers_json if isinstance(markers_json, str) else markers_json.decode() conversation.markers = json.loads(markers_data) await conversation.save() logger.info( @@ -387,9 +392,7 @@ async def _initialize_conversation( speaker_check_job_id = speech_job.meta.get("speaker_check_job_id") if speaker_check_job_id: try: - speaker_check_job = Job.fetch( - speaker_check_job_id, connection=redis_conn - ) + speaker_check_job = Job.fetch(speaker_check_job_id, connection=redis_conn) speaker_check_job.meta["conversation_id"] = conversation_id speaker_check_job.save_meta() except Exception as e: @@ -413,9 +416,7 @@ async def _initialize_conversation( # Signal audio persistence job to rotate to this conversation's file rotation_signal_key = f"conversation:current:{session_id}" - await redis_client.set( - rotation_signal_key, conversation_id, ex=86400 - ) # 24 hour TTL + await redis_client.set(rotation_signal_key, conversation_id, ex=86400) # 24 hour TTL logger.info( f"🔄 Signaled audio persistence to rotate file for conversation {conversation_id[:12]}" ) @@ -444,16 +445,12 @@ async def _monitor_conversation_loop( close_requested_reason, last_result_count, and last_word_count. """ session_key = f"audio:session:{state.session_id}" - max_runtime = ( - 10740 # 3 hours - 60 seconds (single conversations shouldn't exceed 3 hours) - ) + max_runtime = 10740 # 3 hours - 60 seconds (single conversations shouldn't exceed 3 hours) finalize_received = False # Inactivity timeout configuration - inactivity_timeout_seconds = float( - os.getenv("SPEECH_INACTIVITY_THRESHOLD_SECONDS", "60") - ) + inactivity_timeout_seconds = float(os.getenv("SPEECH_INACTIVITY_THRESHOLD_SECONDS", "60")) inactivity_timeout_minutes = inactivity_timeout_seconds / 60 last_inactivity_log_time = ( time.time() @@ -461,9 +458,7 @@ async def _monitor_conversation_loop( # Test mode: wait for audio queue to drain before timing out # In real usage, ambient noise keeps connection alive. In tests, chunks arrive in bursts. - wait_for_queue_drain = ( - os.getenv("WAIT_FOR_AUDIO_QUEUE_DRAIN", "false").lower() == "true" - ) + wait_for_queue_drain = os.getenv("WAIT_FOR_AUDIO_QUEUE_DRAIN", "false").lower() == "true" logger.info( f"📊 Conversation timeout configured: {inactivity_timeout_minutes} minutes ({inactivity_timeout_seconds}s)" @@ -480,51 +475,59 @@ async def _monitor_conversation_loop( # Check if session is finalizing (set by producer when recording stops) if not finalize_received: - status = await redis_client.hget(session_key, "status") - status_str = status.decode() if status else None + # Fetch status, completion_reason, and websocket_connected in one call + status_raw, reason_raw, ws_raw = await redis_client.hmget( + session_key, "status", "completion_reason", "websocket_connected" + ) + status_str = status_raw.decode() if status_raw else None + completion_reason_str = reason_raw.decode() if reason_raw else "unknown" + ws_connected = (ws_raw.decode() if ws_raw else "false") == "true" if status_str in ["finalizing", "finished"]: - finalize_received = True - - # Get completion reason (guaranteed to exist with unified API) - completion_reason = await redis_client.hget( - session_key, "completion_reason" - ) - completion_reason_str = ( - completion_reason.decode() if completion_reason else "unknown" - ) - - if completion_reason_str == "websocket_disconnect": + # Check for spurious "finished" from status endpoint race condition: + # If status is "finished" but WebSocket is still connected and reason + # is "all_jobs_complete", this was set during the inter-conversation gap. + # Reset to "active" and continue monitoring. + if ( + status_str == "finished" + and ws_connected + and completion_reason_str == "all_jobs_complete" + ): logger.warning( - f"🔌 WebSocket disconnected for session {state.session_id[:12]} - " - f"ending conversation early" - ) - state.timeout_triggered = ( - False # This is a disconnect, not a timeout + f"⚠️ Ignoring spurious 'finished' for session {state.session_id[:12]}: " + f"websocket_connected=true, reason=all_jobs_complete. " + f"Resetting status to 'active' and continuing." ) + await redis_client.hset(session_key, "status", "active") + # Do NOT break - continue monitoring else: - logger.info( - f"🛑 Session finalizing (reason: {completion_reason_str}), " - f"waiting for audio persistence job to complete..." - ) - break # Exit immediately when finalize signal received + finalize_received = True + + if completion_reason_str == "websocket_disconnect": + logger.warning( + f"🔌 WebSocket disconnected for session {state.session_id[:12]} - " + f"ending conversation early" + ) + state.timeout_triggered = False # This is a disconnect, not a timeout + else: + logger.info( + f"🛑 Session finalizing (reason: {completion_reason_str}), " + f"waiting for audio persistence job to complete..." + ) + break # Exit immediately when finalize signal received # Check for conversation close request (set by API, plugins, button press) if not finalize_received: - close_reason = await redis_client.hget( - session_key, "conversation_close_requested" - ) + close_reason = await redis_client.hget(session_key, "conversation_close_requested") if close_reason: await redis_client.hdel(session_key, "conversation_close_requested") state.close_requested_reason = ( - close_reason.decode() - if isinstance(close_reason, bytes) - else close_reason + close_reason.decode() if isinstance(close_reason, bytes) else close_reason ) - logger.info( - f"🔒 Conversation close requested: {state.close_requested_reason}" + logger.info(f"🔒 Conversation close requested: {state.close_requested_reason}") + state.timeout_triggered = ( + True # Session stays active (same restart behavior as inactivity timeout) ) - state.timeout_triggered = True # Session stays active (same restart behavior as inactivity timeout) finalize_received = True break @@ -583,9 +586,7 @@ async def _monitor_conversation_loop( # Can't reliably detect inactivity, so skip timeout check this iteration inactivity_duration = 0 if speech_analysis.get("fallback", False): - logger.debug( - "⚠️ Skipping inactivity check (no audio timestamps available)" - ) + logger.debug("⚠️ Skipping inactivity check (no audio timestamps available)") current_time = time.time() @@ -697,15 +698,11 @@ async def _save_streaming_transcript( """ from advanced_omi_backend.models.conversation import Conversation - logger.info( - f"📝 Retrieving final streaming transcript for conversation {conversation_id[:12]}" - ) + logger.info(f"📝 Retrieving final streaming transcript for conversation {conversation_id[:12]}") final_transcript = await aggregator.get_combined_results(session_id) # Fetch conversation from database to ensure we have latest state - conversation = await Conversation.find_one( - Conversation.conversation_id == conversation_id - ) + conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id) if not conversation: logger.error(f"❌ Conversation {conversation_id} not found in database") raise ValueError(f"Conversation {conversation_id} not found") @@ -979,9 +976,7 @@ async def open_conversation_job( logger.info(f"📊 Using completion_reason from session: {state.end_reason}") elif state.close_requested_reason: state.end_reason = "close_requested" - logger.info( - f"📊 Conversation closed by request: {state.close_requested_reason}" - ) + logger.info(f"📊 Conversation closed by request: {state.close_requested_reason}") elif state.timeout_triggered: state.end_reason = "inactivity_timeout" elif time.time() - state.start_time > 10740: @@ -989,9 +984,7 @@ async def open_conversation_job( else: state.end_reason = "user_stopped" - logger.info( - f"📊 Conversation {conversation_id[:12]} end_reason determined: {state.end_reason}" - ) + logger.info(f"📊 Conversation {conversation_id[:12]} end_reason determined: {state.end_reason}") # Phase 4-7: Post-processing (wrapped in try/finally for guaranteed cleanup) end_of_conversation_handled = False @@ -1061,9 +1054,7 @@ async def open_conversation_job( end_reason=state.end_reason, ) - logger.info( - f"📦 MongoDB audio chunks ready for conversation {conversation_id[:12]}" - ) + logger.info(f"📦 MongoDB audio chunks ready for conversation {conversation_id[:12]}") # Phase 6: Save streaming transcript version_id = await _save_streaming_transcript( @@ -1117,9 +1108,7 @@ async def open_conversation_job( @async_job(redis=True, beanie=True) -async def generate_title_summary_job( - conversation_id: str, *, redis_client=None -) -> Dict[str, Any]: +async def generate_title_summary_job(conversation_id: str, *, redis_client=None) -> Dict[str, Any]: """ Generate title, short summary, and detailed summary for a conversation using LLM. @@ -1143,16 +1132,12 @@ async def generate_title_summary_job( ) set_otel_session(conversation_id) - logger.info( - f"📝 Starting title/summary generation for conversation {conversation_id}" - ) + logger.info(f"📝 Starting title/summary generation for conversation {conversation_id}") start_time = time.time() # Get the conversation - conversation = await Conversation.find_one( - Conversation.conversation_id == conversation_id - ) + conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id) if not conversation: logger.error(f"Conversation {conversation_id} not found") return {"success": False, "error": "Conversation not found"} @@ -1162,9 +1147,7 @@ async def generate_title_summary_job( segments = conversation.segments or [] if not transcript_text and (not segments or len(segments) == 0): - logger.warning( - f"⚠️ No transcript or segments available for conversation {conversation_id}" - ) + logger.warning(f"⚠️ No transcript or segments available for conversation {conversation_id}") return { "success": False, "error": "No transcript or segments available", @@ -1196,9 +1179,7 @@ async def generate_title_summary_job( else: logger.info(f"📚 No memories found for context enrichment") except Exception as mem_error: - logger.warning( - f"⚠️ Could not fetch memory context (continuing without): {mem_error}" - ) + logger.warning(f"⚠️ Could not fetch memory context (continuing without): {mem_error}") # Generate title+summary (one call) and detailed summary in parallel import asyncio @@ -1208,13 +1189,11 @@ async def generate_title_summary_job( transcript_text, segments=segments, user_id=conversation.user_id, - langfuse_session_id=conversation_id, ), generate_detailed_summary( transcript_text, segments=segments, memory_context=memory_context, - langfuse_session_id=conversation_id, ), ) @@ -1224,9 +1203,7 @@ async def generate_title_summary_job( logger.info(f"✅ Generated title: '{conversation.title}'") logger.info(f"✅ Generated summary: '{conversation.summary}'") - logger.info( - f"✅ Generated detailed summary: {len(conversation.detailed_summary)} chars" - ) + logger.info(f"✅ Generated detailed summary: {len(conversation.detailed_summary)} chars") # Update processing status for placeholder/reprocessing conversations if getattr(conversation, "processing_status", None) in [ @@ -1323,16 +1300,12 @@ async def dispatch_conversation_complete_event_job( """ from advanced_omi_backend.models.conversation import Conversation - logger.info( - f"📌 Dispatching conversation.complete event for conversation {conversation_id}" - ) + logger.info(f"📌 Dispatching conversation.complete event for conversation {conversation_id}") start_time = time.time() # Get the conversation to include in event data - conversation = await Conversation.find_one( - Conversation.conversation_id == conversation_id - ) + conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id) if not conversation: logger.error(f"Conversation {conversation_id} not found") return {"success": False, "error": "Conversation not found"} diff --git a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py index a9e98c5f..6cbf5af3 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py @@ -79,9 +79,7 @@ async def apply_speaker_recognition( speaker_client = SpeakerRecognitionClient() if not speaker_client.enabled: - logger.info( - f"🎤 Speaker recognition disabled, using original speaker labels" - ) + logger.info(f"🎤 Speaker recognition disabled, using original speaker labels") return segments logger.info( @@ -122,9 +120,7 @@ def get_speaker_at_time(timestamp: float, speaker_segments: list) -> str: updated_count = 0 for seg in segments: seg_mid = (seg.start + seg.end) / 2.0 - identified_speaker = get_speaker_at_time( - seg_mid, speaker_identified_segments - ) + identified_speaker = get_speaker_at_time(seg_mid, speaker_identified_segments) if identified_speaker and identified_speaker != "Unknown": original_speaker = seg.speaker @@ -187,9 +183,7 @@ async def transcribe_full_audio_job( start_time = time.time() # Get the conversation - conversation = await Conversation.find_one( - Conversation.conversation_id == conversation_id - ) + conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id) if not conversation: raise ValueError(f"Conversation {conversation_id} not found") @@ -206,23 +200,18 @@ async def transcribe_full_audio_job( logger.info(f"Using transcription provider: {provider_name}") # Reconstruct audio from MongoDB chunks - logger.info( - f"📦 Reconstructing audio from MongoDB chunks for conversation {conversation_id}" - ) + logger.info(f"📦 Reconstructing audio from MongoDB chunks for conversation {conversation_id}") try: # Reconstruct WAV from MongoDB chunks (already in memory as bytes) wav_data = await reconstruct_wav_from_conversation(conversation_id) logger.info( - f"📦 Reconstructed audio from MongoDB chunks: " - f"{len(wav_data) / 1024 / 1024:.2f} MB" + f"📦 Reconstructed audio from MongoDB chunks: " f"{len(wav_data) / 1024 / 1024:.2f} MB" ) except ValueError as e: # No chunks found for conversation - raise FileNotFoundError( - f"No audio chunks found for conversation {conversation_id}: {e}" - ) + raise FileNotFoundError(f"No audio chunks found for conversation {conversation_id}: {e}") except Exception as e: logger.error(f"Failed to reconstruct audio from MongoDB: {e}", exc_info=True) raise RuntimeError(f"Audio reconstruction failed: {e}") @@ -305,9 +294,7 @@ def _on_batch_progress(event: dict) -> None: description=f"conversation={conversation_id[:12]}, words={len(words)}", ) except Exception as e: - logger.exception( - f"⚠️ Error triggering transcript plugins in batch mode: {e}" - ) + logger.exception(f"⚠️ Error triggering transcript plugins in batch mode: {e}") logger.info(f"🔍 DEBUG: Plugin processing complete, moving to speech validation") @@ -362,9 +349,7 @@ def _on_batch_progress(event: dict) -> None: f"Job {job_id} hash not found (likely already completed or expired)" ) else: - logger.debug( - f"Job {job_id} not found or already completed: {e}" - ) + logger.debug(f"Job {job_id} not found or already completed: {e}") if cancelled_jobs: logger.info( @@ -594,9 +579,7 @@ async def create_audio_only_conversation( # Update status to show batch transcription is starting placeholder_conversation.processing_status = "batch_transcription" placeholder_conversation.title = "Audio Recording (Batch Transcription...)" - placeholder_conversation.summary = ( - "Processing audio with offline transcription..." - ) + placeholder_conversation.summary = "Processing audio with offline transcription..." await placeholder_conversation.save() # Audio chunks are already linked to this conversation_id @@ -623,9 +606,7 @@ async def create_audio_only_conversation( ) await conversation.insert() - logger.info( - f"✅ Created batch transcription conversation {session_id[:12]} for fallback" - ) + logger.info(f"✅ Created batch transcription conversation {session_id[:12]} for fallback") return conversation @@ -771,18 +752,14 @@ async def transcription_fallback_check_job( sample_rate, channels, sample_width = 16000, 1, 2 session_key = f"audio:session:{session_id}" try: - audio_format_raw = await redis_client.hget( - session_key, "audio_format" - ) + audio_format_raw = await redis_client.hget(session_key, "audio_format") if audio_format_raw: audio_format = json.loads(audio_format_raw) sample_rate = int(audio_format.get("rate", 16000)) channels = int(audio_format.get("channels", 1)) sample_width = int(audio_format.get("width", 2)) except Exception as e: - logger.warning( - f"Failed to read audio_format from Redis for {session_id}: {e}" - ) + logger.warning(f"Failed to read audio_format from Redis for {session_id}: {e}") bytes_per_second = sample_rate * channels * sample_width logger.info( @@ -791,9 +768,7 @@ async def transcription_fallback_check_job( ) # Create conversation placeholder - conversation = await create_audio_only_conversation( - session_id, user_id, client_id - ) + conversation = await create_audio_only_conversation(session_id, user_id, client_id) # Save audio to MongoDB chunks for batch transcription num_chunks = await convert_audio_to_chunks( @@ -810,9 +785,7 @@ async def transcription_fallback_check_job( ) except Exception as e: - logger.error( - f"❌ Failed to extract audio from Redis stream: {e}", exc_info=True - ) + logger.error(f"❌ Failed to extract audio from Redis stream: {e}", exc_info=True) raise else: logger.info( @@ -821,9 +794,7 @@ async def transcription_fallback_check_job( ) # Create conversation placeholder for batch transcription - conversation = await create_audio_only_conversation( - session_id, user_id, client_id - ) + conversation = await create_audio_only_conversation(session_id, user_id, client_id) # Enqueue batch transcription job version_id = f"batch_fallback_{session_id[:12]}" @@ -919,14 +890,10 @@ async def stream_speech_detection_job( # Get conversation count conversation_count_key = f"session:conversation_count:{session_id}" conversation_count_bytes = await redis_client.get(conversation_count_key) - conversation_count = ( - int(conversation_count_bytes) if conversation_count_bytes else 0 - ) + conversation_count = int(conversation_count_bytes) if conversation_count_bytes else 0 # Check if speaker filtering is enabled - speaker_filter_enabled = ( - os.getenv("RECORD_ONLY_ENROLLED_SPEAKERS", "false").lower() == "true" - ) + speaker_filter_enabled = os.getenv("RECORD_ONLY_ENROLLED_SPEAKERS", "false").lower() == "true" logger.info( f"📊 Conversation #{conversation_count + 1}, Speaker filter: {'enabled' if speaker_filter_enabled else 'disabled'}" ) @@ -969,24 +936,17 @@ async def stream_speech_detection_job( ) # Exit if grace period expired without speech - if ( - session_closed_at - and (time.time() - session_closed_at) > final_check_grace_period - ): + if session_closed_at and (time.time() - session_closed_at) > final_check_grace_period: logger.info(f"✅ Session ended without speech (grace period expired)") break # Consume any stale conversation close request (defensive — shouldn't normally # appear since services.py gates on conversation:current, but handles race conditions) - close_reason = await redis_client.hget( - session_key, "conversation_close_requested" - ) + close_reason = await redis_client.hget(session_key, "conversation_close_requested") if close_reason: await redis_client.hdel(session_key, "conversation_close_requested") close_reason_str = ( - close_reason.decode() - if isinstance(close_reason, bytes) - else close_reason + close_reason.decode() if isinstance(close_reason, bytes) else close_reason ) logger.info( f"🔒 Conversation close requested ({close_reason_str}) during speech detection — " @@ -1003,15 +963,11 @@ async def stream_speech_detection_job( # Health check: detect transcription errors early during grace period if session_closed_at: # Check for streaming consumer errors in session metadata - error_status = await redis_client.hget( - session_key, "transcription_error" - ) + error_status = await redis_client.hget(session_key, "transcription_error") if error_status: error_msg = error_status.decode() logger.error(f"❌ Transcription service error: {error_msg}") - logger.error( - f"❌ Session failed - transcription service unavailable" - ) + logger.error(f"❌ Session failed - transcription service unavailable") break # Check if we've been waiting too long with no results at all @@ -1021,9 +977,7 @@ async def stream_speech_detection_job( logger.error( f"❌ No transcription activity after {grace_elapsed:.1f}s - possible API key or connectivity issue" ) - logger.error( - f"❌ Session failed - check transcription service configuration" - ) + logger.error(f"❌ Session failed - check transcription service configuration") break await asyncio.sleep(2) @@ -1063,9 +1017,7 @@ async def stream_speech_detection_job( "last_event", f"speech_detected:{datetime.utcnow().isoformat()}", ) - await redis_client.hset( - session_key, "speech_detected_at", datetime.utcnow().isoformat() - ) + await redis_client.hset(session_key, "speech_detected_at", datetime.utcnow().isoformat()) # Step 2: If speaker filter enabled, check for enrolled speakers identified_speakers = [] @@ -1090,7 +1042,7 @@ async def stream_speech_detection_job( client_id, job_timeout=300, # 5 minutes for speaker recognition result_ttl=600, - job_id=f"speaker-check_{session_id[:12]}_{conversation_count}", + job_id=f"speaker-check_{session_id}_{conversation_count}", description=f"Speaker check for conversation #{conversation_count+1}", meta={"client_id": client_id}, ) @@ -1117,9 +1069,7 @@ async def stream_speech_detection_job( result = speaker_check_job.result enrolled_present = result.get("enrolled_present", False) identified_speakers = result.get("identified_speakers", []) - logger.info( - f"✅ Speaker check completed: enrolled={enrolled_present}" - ) + logger.info(f"✅ Speaker check completed: enrolled={enrolled_present}") # Update session event for speaker check complete await redis_client.hset( @@ -1148,9 +1098,7 @@ async def stream_speech_detection_job( "last_event", f"speaker_check_failed:{datetime.utcnow().isoformat()}", ) - await redis_client.hset( - session_key, "speaker_check_status", "failed" - ) + await redis_client.hset(session_key, "speaker_check_status", "failed") break await asyncio.sleep(poll_interval) waited += poll_interval @@ -1197,15 +1145,13 @@ async def stream_speech_detection_job( speech_job_id, # Pass speech detection job ID job_timeout=10800, # 3 hours to match max_runtime in open_conversation_job result_ttl=JOB_RESULT_TTL, # Use configured TTL (24 hours) instead of 10 minutes - job_id=f"open-conv_{session_id[:12]}_{conversation_count}", - description=f"Conversation #{conversation_count+1} for {session_id[:12]}", + job_id=f"open-conv_{session_id}_{conversation_count}", + description=f"Conversation #{conversation_count+1} for {session_id}", meta={"client_id": client_id}, ) # Track the job - await redis_client.set( - open_job_key, open_job.id, ex=10800 - ) # 3 hours to match job timeout + await redis_client.set(open_job_key, open_job.id, ex=10800) # 3 hours to match job timeout # Store metadata in speech detection job if current_job: @@ -1218,31 +1164,23 @@ async def stream_speech_detection_job( current_job.meta.update( { "conversation_job_id": open_job.id, - "speaker_check_job_id": ( - speaker_check_job.id if speaker_check_job else None - ), + "speaker_check_job_id": (speaker_check_job.id if speaker_check_job else None), "detected_speakers": identified_speakers, - "speech_detected_at": datetime.fromtimestamp( - speech_detected_at - ).isoformat(), + "speech_detected_at": datetime.fromtimestamp(speech_detected_at).isoformat(), "session_id": session_id, "client_id": client_id, # For job grouping } ) current_job.save_meta() - logger.info( - f"✅ Started conversation job {open_job.id}, exiting speech detection" - ) + logger.info(f"✅ Started conversation job {open_job.id}, exiting speech detection") return { "session_id": session_id, "user_id": user_id, "client_id": client_id, "conversation_job_id": open_job.id, - "speech_detected_at": datetime.fromtimestamp( - speech_detected_at - ).isoformat(), + "speech_detected_at": datetime.fromtimestamp(speech_detected_at).isoformat(), "runtime_seconds": time.time() - start_time, } @@ -1270,9 +1208,7 @@ async def stream_speech_detection_job( # Check if this is an always_persist conversation that needs to be marked as failed # NOTE: We check MongoDB directly because the conversation:current Redis key might have been # deleted by the audio persistence job cleanup (which runs in parallel). - logger.info( - f"🔍 Checking MongoDB for always_persist conversation with client_id: {client_id}" - ) + logger.info(f"🔍 Checking MongoDB for always_persist conversation with client_id: {client_id}") # Find conversation by client_id that matches this session # session_id == client_id for streaming sessions (set in _initialize_streaming_session) @@ -1312,7 +1248,7 @@ async def stream_speech_detection_job( client_id, timeout_seconds=config_timeout, job_timeout=config_timeout + 300, # Extra 5 min overhead for fallback check - job_id=f"fallback_check_{session_id[:12]}", + job_id=f"fallback_check_{session_id}", description=f"Transcription fallback check for {session_id[:8]} (no speech)", meta={"session_id": session_id, "client_id": client_id, "no_speech": True}, ) diff --git a/backends/advanced/webui/src/pages/ConnectApp.tsx b/backends/advanced/webui/src/pages/ConnectApp.tsx deleted file mode 100644 index 768cdb6e..00000000 --- a/backends/advanced/webui/src/pages/ConnectApp.tsx +++ /dev/null @@ -1,120 +0,0 @@ -import { useState } from 'react' -import { QRCodeSVG } from 'qrcode.react' -import { Smartphone, Copy, Check } from 'lucide-react' -import { useTheme } from '../contexts/ThemeContext' - -function getBackendHttpUrl(): string { - const { protocol, hostname, port } = window.location - - const isStandardPort = - (protocol === 'https:' && (port === '' || port === '443')) || - (protocol === 'http:' && (port === '' || port === '80')) - - const basePath = import.meta.env.BASE_URL - if (isStandardPort && basePath && basePath !== '/') { - // Caddy path-based routing — return full origin - return `${protocol}//${hostname}` - } - - if (import.meta.env.VITE_BACKEND_URL) { - const url = import.meta.env.VITE_BACKEND_URL as string - // If it's a relative URL, make it absolute - if (url.startsWith('/') || url === '') { - return `${protocol}//${hostname}${port ? `:${port}` : ''}` - } - return url - } - - if (isStandardPort) { - return `${protocol}//${hostname}` - } - - if (port === '5173') { - return `${protocol}//${hostname}:8000` - } - - return `${protocol}//${hostname}${port ? `:${port}` : ''}` -} - -export default function ConnectApp() { - const { isDark } = useTheme() - const [copied, setCopied] = useState(false) - const backendUrl = getBackendHttpUrl() - - const handleCopy = async () => { - try { - await navigator.clipboard.writeText(backendUrl) - setCopied(true) - setTimeout(() => setCopied(false), 2000) - } catch { - // Fallback for older browsers - const textArea = document.createElement('textarea') - textArea.value = backendUrl - document.body.appendChild(textArea) - textArea.select() - document.execCommand('copy') - document.body.removeChild(textArea) - setCopied(true) - setTimeout(() => setCopied(false), 2000) - } - } - - return ( -
-
- -

- Connect App -

-
- -

- Scan this QR code with the Chronicle mobile app to connect it to your backend. -

- - {/* QR Code */} -
-
- -
- - {/* URL display + copy */} -
- - {backendUrl} - - -
-
- - {/* Instructions */} -
-

- How to connect -

-
    -
  1. Open the Chronicle app on your phone
  2. -
  3. Go to Settings and tap Scan QR Code
  4. -
  5. Point your camera at the QR code above
  6. -
  7. The backend URL will be configured automatically
  8. -
-
-
- ) -} diff --git a/backends/advanced/webui/src/pages/Queue.tsx b/backends/advanced/webui/src/pages/Queue.tsx index f1c6a784..1812db83 100644 --- a/backends/advanced/webui/src/pages/Queue.tsx +++ b/backends/advanced/webui/src/pages/Queue.tsx @@ -22,7 +22,7 @@ import { Repeat, Zap } from 'lucide-react'; -import { queueApi } from '../services/api'; +import { queueApi, conversationsApi } from '../services/api'; interface QueueStats { total_jobs: number; @@ -1143,7 +1143,7 @@ const Queue: React.FC = () => { className={`flex items-center justify-between p-3 cursor-pointer transition-colors ${hasFailedJob ? 'hover:bg-red-100' : 'hover:bg-cyan-100'}`} onClick={() => toggleConversationExpansion(conversationId)} > -
+
{isExpanded ? ( @@ -1169,7 +1169,7 @@ const Queue: React.FC = () => { )}
-
+
Conversation: {conversationId.substring(0, 8)}... • {createdAt && `Started: ${new Date(createdAt).toLocaleTimeString()} • `} Words: {wordCount} @@ -1181,6 +1181,27 @@ const Queue: React.FC = () => {
)}
+ {/* Close Conversation Button - only for actively running conversations */} + {openConvJob && openConvJob.status === 'started' && ( + + )}
{/* Expanded Jobs Section */} diff --git a/backends/advanced/webui/src/services/api.ts b/backends/advanced/webui/src/services/api.ts index 8929fb28..7961c285 100644 --- a/backends/advanced/webui/src/services/api.ts +++ b/backends/advanced/webui/src/services/api.ts @@ -150,6 +150,9 @@ export const conversationsApi = { activateTranscriptVersion: (conversationId: string, versionId: string) => api.post(`/api/conversations/${conversationId}/activate-transcript/${versionId}`), activateMemoryVersion: (conversationId: string, versionId: string) => api.post(`/api/conversations/${conversationId}/activate-memory/${versionId}`), getVersionHistory: (conversationId: string) => api.get(`/api/conversations/${conversationId}/versions`), + + // Active conversation management + closeActiveConversation: (clientId: string) => api.post(`/api/conversations/${clientId}/close`), } export const memoriesApi = {