From 77c0002c5bbf3a61166c1aa7f7ebf00eaf0489c7 Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Tue, 24 Feb 2026 19:17:17 +0000
Subject: [PATCH 1/4] Enhance user experience and configuration management in
 setup process

- Updated `wizard.py` to prompt for Neo4j password with improved handling, allowing users to remember previous values on re-runs.
- Enhanced `app.json` to include camera permissions for QR code scanning, improving user interaction during backend connection setup.
- Added new dependencies for `expo-camera` and `expo-image-picker` in `package.json` to support camera functionalities.
- Introduced a new `QRScanner` component for scanning QR codes, integrated into the `BackendStatus` component for seamless user experience.
- Implemented utility functions for URL conversion to support WebSocket connections, enhancing backend integration capabilities.
- Improved error handling and logging in various components to provide clearer feedback during setup and operation.
---
 app/app.json                                  |  13 +-
 app/package-lock.json                         |  81 +-
 app/package.json                              |   4 +-
 app/src/components/BackendStatus.tsx          |  36 +-
 app/src/components/QRScanner.tsx              | 225 +++++
 app/src/utils/urlConversion.ts                |  46 +
 backends/advanced/init.py                     |   8 +-
 .../observability/otel_setup.py               |  28 +-
 .../advanced_omi_backend/plugins/router.py    |  42 +
 .../routers/modules/obsidian_routes.py        | 100 +-
 .../memory/providers/llm_providers.py         | 133 ++-
 .../services/obsidian_service.py              | 151 ++-
 .../services/transcription/__init__.py        |  71 +-
 .../utils/text_chunking.py                    | 188 ++++
 .../workers/conversation_jobs.py              | 859 ++++++++++--------
 .../workers/obsidian_jobs.py                  |  19 +-
 .../advanced/src/scripts/cleanup_state.py     | 418 ++++++---
 .../advanced/tests/test_obsidian_service.py   | 157 ++--
 backends/advanced/tests/test_text_chunking.py | 376 ++++++++
 backends/advanced/webui/package-lock.json     |  10 +
 backends/advanced/webui/package.json          |   3 +-
 backends/advanced/webui/src/App.tsx           |   8 +
 .../src/components/PluginSettingsForm.tsx     |   6 +-
 .../webui/src/components/layout/Layout.tsx    |   3 +-
 .../plugins/OrchestrationSection.tsx          |  82 +-
 .../components/plugins/PluginConfigPanel.tsx  |   3 +-
 .../advanced/webui/src/pages/ConnectApp.tsx   | 120 +++
 .../webui/src/pages/ConversationDetail.tsx    |  32 +-
 backends/advanced/webui/src/pages/Queue.tsx   |  23 +-
 backends/advanced/webui/src/services/api.ts   |  16 +-
 tests/Makefile                                |  18 +-
 .../websocket_streaming_tests.robot           |   4 +-
 .../websocket_transcription_e2e_test.robot    |  25 +-
 wizard.py                                     |  12 +-
 34 files changed, 2449 insertions(+), 871 deletions(-)
 create mode 100644 app/src/components/QRScanner.tsx
 create mode 100644 app/src/utils/urlConversion.ts
 create mode 100644 backends/advanced/src/advanced_omi_backend/utils/text_chunking.py
 create mode 100644 backends/advanced/tests/test_text_chunking.py
 create mode 100644 backends/advanced/webui/src/pages/ConnectApp.tsx

diff --git a/app/app.json b/app/app.json
index 2315b46b..d2bf04ec 100644
--- a/app/app.json
+++ b/app/app.json
@@ -19,6 +19,7 @@
       "supportsTablet": true,
       "bundleIdentifier": "com.cupbearer5517.chronicle",
       "infoPlist": {
+        "NSCameraUsageDescription": "Chronicle uses the camera to scan QR codes for backend connection setup.",
         "NSMicrophoneUsageDescription": "Chronicle needs access to your microphone to stream audio to the backend for processing.",
         "NSAppTransportSecurity": {
           "NSAllowsArbitraryLoads": true,
@@ -40,7 +41,8 @@
         "android.permission.FOREGROUND_SERVICE",
         "android.permission.FOREGROUND_SERVICE_DATA_SYNC",
         "android.permission.POST_NOTIFICATIONS",
-        "android.permission.RECORD_AUDIO"
+        "android.permission.RECORD_AUDIO",
+        "android.permission.CAMERA"
       ],
       "usesCleartextTraffic": true
     },
@@ -96,6 +98,13 @@
           }
         }
       ],
+      [
+        "expo-camera",
+        {
+          "cameraPermission": "Chronicle uses the camera to scan QR codes for backend connection setup."
+        }
+      ],
+      "expo-image-picker",
       "./plugins/with-ats"
     ],
     "extra": {
@@ -104,4 +113,4 @@
       }
     }
   }
-}
\ No newline at end of file
+}
diff --git a/app/package-lock.json b/app/package-lock.json
index c76c02ac..c4ceb0c8 100644
--- a/app/package-lock.json
+++ b/app/package-lock.json
@@ -16,7 +16,9 @@
         "deprecated-react-native-prop-types": "^5.0.0",
         "expo": "~53.0.9",
         "expo-build-properties": "~0.14.8",
+        "expo-camera": "~16.1.11",
         "expo-dev-client": "~5.2.4",
+        "expo-image-picker": "~16.1.4",
         "expo-router": "~5.0.6",
         "expo-status-bar": "~2.2.3",
         "friend-lite-react-native": "^1.0.2",
@@ -80,7 +82,6 @@
       "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.28.4.tgz",
       "integrity": "sha512-2BCOP7TN8M+gVDj7/ht3hsaO/B/n5oDbiAyyvnRlNOs+u1o+JWNYTQrmpuNp1/Wq2gcFrI01JAW+paEKDMx/CA==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@babel/code-frame": "^7.27.1",
         "@babel/generator": "^7.28.3",
@@ -2813,7 +2814,6 @@
       "integrity": "sha512-Q7UnBqOO/JsWfgmO9qZjrKgMi/0U9ih0FywXXheml8VH1hn/pBXKIeO/BvzA6g5gHIvBZ/6KyhdGoNok1R/ZJw==",
       "devOptional": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@react-native-community/cli-clean": "20.0.1",
         "@react-native-community/cli-config": "20.0.1",
@@ -3429,7 +3429,6 @@
       "resolved": "https://registry.npmjs.org/@react-navigation/native/-/native-7.1.28.tgz",
       "integrity": "sha512-d1QDn+KNHfHGt3UIwOZvupvdsDdiHYZBEj7+wL2yDVo3tMezamYy60H9s3EnNVE1Ae1ty0trc7F2OKqo/RmsdQ==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@react-navigation/core": "^7.14.0",
         "escape-string-regexp": "^4.0.0",
@@ -3625,7 +3624,6 @@
       "integrity": "sha512-ixLZ7zG7j1fM0DijL9hDArwhwcCb4vqmePgwtV0GfnkHRSCUEv4LvzarcTdhoqgyMznUx/EhoTUv31CKZzkQlw==",
       "devOptional": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "csstype": "^3.0.2"
       }
@@ -3750,7 +3748,6 @@
       "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz",
       "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "fast-deep-equal": "^3.1.3",
         "fast-uri": "^3.0.1",
@@ -4334,7 +4331,6 @@
         }
       ],
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "caniuse-lite": "^1.0.30001737",
         "electron-to-chromium": "^1.5.211",
@@ -5290,7 +5286,6 @@
       "resolved": "https://registry.npmjs.org/expo/-/expo-53.0.22.tgz",
       "integrity": "sha512-sJ2I4W/e5iiM4u/wYCe3qmW4D7WPCRqByPDD0hJcdYNdjc9HFFFdO4OAudZVyC/MmtoWZEIH5kTJP1cw9FjzYA==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@babel/runtime": "^7.20.0",
         "@expo/cli": "0.24.21",
@@ -5374,12 +5369,31 @@
         "node": ">=10"
       }
     },
+    "node_modules/expo-camera": {
+      "version": "16.1.11",
+      "resolved": "https://registry.npmjs.org/expo-camera/-/expo-camera-16.1.11.tgz",
+      "integrity": "sha512-etA5ZKoC6nPBnWWqiTmlX//zoFZ6cWQCCIdmpUHTGHAKd4qZNCkhPvBWbi8o32pDe57lix1V4+TPFgEcvPwsaA==",
+      "license": "MIT",
+      "dependencies": {
+        "invariant": "^2.2.4"
+      },
+      "peerDependencies": {
+        "expo": "*",
+        "react": "*",
+        "react-native": "*",
+        "react-native-web": "*"
+      },
+      "peerDependenciesMeta": {
+        "react-native-web": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/expo-constants": {
       "version": "17.1.7",
       "resolved": "https://registry.npmjs.org/expo-constants/-/expo-constants-17.1.7.tgz",
       "integrity": "sha512-byBjGsJ6T6FrLlhOBxw4EaiMXrZEn/MlUYIj/JAd+FS7ll5X/S4qVRbIimSJtdW47hXMq0zxPfJX6njtA56hHA==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@expo/config": "~11.0.12",
         "@expo/env": "~1.0.7"
@@ -5472,7 +5486,6 @@
       "resolved": "https://registry.npmjs.org/expo-font/-/expo-font-13.3.2.tgz",
       "integrity": "sha512-wUlMdpqURmQ/CNKK/+BIHkDA5nGjMqNlYmW0pJFXY/KE/OG80Qcavdu2sHsL4efAIiNGvYdBS10WztuQYU4X0A==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "fontfaceobserver": "^2.1.0"
       },
@@ -5481,6 +5494,27 @@
         "react": "*"
       }
     },
+    "node_modules/expo-image-loader": {
+      "version": "5.1.0",
+      "resolved": "https://registry.npmjs.org/expo-image-loader/-/expo-image-loader-5.1.0.tgz",
+      "integrity": "sha512-sEBx3zDQIODWbB5JwzE7ZL5FJD+DK3LVLWBVJy6VzsqIA6nDEnSFnsnWyCfCTSvbGigMATs1lgkC2nz3Jpve1Q==",
+      "license": "MIT",
+      "peerDependencies": {
+        "expo": "*"
+      }
+    },
+    "node_modules/expo-image-picker": {
+      "version": "16.1.4",
+      "resolved": "https://registry.npmjs.org/expo-image-picker/-/expo-image-picker-16.1.4.tgz",
+      "integrity": "sha512-bTmmxtw1AohUT+HxEBn2vYwdeOrj1CLpMXKjvi9FKSoSbpcarT4xxI0z7YyGwDGHbrJqyyic3I9TTdP2J2b4YA==",
+      "license": "MIT",
+      "dependencies": {
+        "expo-image-loader": "~5.1.0"
+      },
+      "peerDependencies": {
+        "expo": "*"
+      }
+    },
     "node_modules/expo-json-utils": {
       "version": "0.15.0",
       "resolved": "https://registry.npmjs.org/expo-json-utils/-/expo-json-utils-0.15.0.tgz",
@@ -5502,6 +5536,7 @@
       "resolved": "https://registry.npmjs.org/expo-linking/-/expo-linking-8.0.11.tgz",
       "integrity": "sha512-+VSaNL5om3kOp/SSKO5qe6cFgfSIWnnQDSbA7XLs3ECkYzXRquk5unxNS3pg7eK5kNUmQ4kgLI7MhTggAEUBLA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "expo-constants": "~18.0.12",
         "invariant": "^2.2.4"
@@ -5516,6 +5551,7 @@
       "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.10.4.tgz",
       "integrity": "sha512-vG6SvB6oYEhvgisZNFRmRCUkLz11c7rp+tbNTynGqc6mS1d5ATd/sGyV6W0KZZnXRKMTzZDRgQT3Ou9jhpAfUg==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@babel/highlight": "^7.10.4"
       }
@@ -5525,6 +5561,7 @@
       "resolved": "https://registry.npmjs.org/@expo/config/-/config-12.0.13.tgz",
       "integrity": "sha512-Cu52arBa4vSaupIWsF0h7F/Cg//N374nYb7HAxV0I4KceKA7x2UXpYaHOL7EEYYvp7tZdThBjvGpVmr8ScIvaQ==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@babel/code-frame": "~7.10.4",
         "@expo/config-plugins": "~54.0.4",
@@ -5546,6 +5583,7 @@
       "resolved": "https://registry.npmjs.org/@expo/config-plugins/-/config-plugins-54.0.4.tgz",
       "integrity": "sha512-g2yXGICdoOw5i3LkQSDxl2Q5AlQCrG7oniu0pCPPO+UxGb7He4AFqSvPSy8HpRUj55io17hT62FTjYRD+d6j3Q==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@expo/config-types": "^54.0.10",
         "@expo/json-file": "~10.0.8",
@@ -5567,13 +5605,15 @@
       "version": "54.0.10",
       "resolved": "https://registry.npmjs.org/@expo/config-types/-/config-types-54.0.10.tgz",
       "integrity": "sha512-/J16SC2an1LdtCZ67xhSkGXpALYUVUNyZws7v+PVsFZxClYehDSoKLqyRaGkpHlYrCc08bS0RF5E0JV6g50psA==",
-      "license": "MIT"
+      "license": "MIT",
+      "peer": true
     },
     "node_modules/expo-linking/node_modules/@expo/env": {
       "version": "2.0.8",
       "resolved": "https://registry.npmjs.org/@expo/env/-/env-2.0.8.tgz",
       "integrity": "sha512-5VQD6GT8HIMRaSaB5JFtOXuvfDVU80YtZIuUT/GDhUF782usIXY13Tn3IdDz1Tm/lqA9qnRZQ1BF4t7LlvdJPA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "chalk": "^4.0.0",
         "debug": "^4.3.4",
@@ -5587,6 +5627,7 @@
       "resolved": "https://registry.npmjs.org/@expo/json-file/-/json-file-10.0.8.tgz",
       "integrity": "sha512-9LOTh1PgKizD1VXfGQ88LtDH0lRwq9lsTb4aichWTWSWqy3Ugfkhfm3BhzBIkJJfQQ5iJu3m/BoRlEIjoCGcnQ==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@babel/code-frame": "~7.10.4",
         "json5": "^2.2.3"
@@ -5597,6 +5638,7 @@
       "resolved": "https://registry.npmjs.org/@expo/plist/-/plist-0.4.8.tgz",
       "integrity": "sha512-pfNtErGGzzRwHP+5+RqswzPDKkZrx+Cli0mzjQaus1ZWFsog5ibL+nVT3NcporW51o8ggnt7x813vtRbPiyOrQ==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@xmldom/xmldom": "^0.8.8",
         "base64-js": "^1.2.3",
@@ -5608,6 +5650,7 @@
       "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz",
       "integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==",
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": "18 || 20 || >=22"
       }
@@ -5617,6 +5660,7 @@
       "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.3.tgz",
       "integrity": "sha512-fy6KJm2RawA5RcHkLa1z/ScpBeA762UF9KmZQxwIbDtRJrgLzM10depAiEQ+CXYcoiqW1/m96OAAoke2nE9EeA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "balanced-match": "^4.0.2"
       },
@@ -5629,6 +5673,7 @@
       "resolved": "https://registry.npmjs.org/commander/-/commander-4.1.1.tgz",
       "integrity": "sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA==",
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">= 6"
       }
@@ -5638,6 +5683,7 @@
       "resolved": "https://registry.npmjs.org/expo-constants/-/expo-constants-18.0.13.tgz",
       "integrity": "sha512-FnZn12E1dRYKDHlAdIyNFhBurKTS3F9CrfrBDJI5m3D7U17KBHMQ6JEfYlSj7LG7t+Ulr+IKaj58L1k5gBwTcQ==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@expo/config": "~12.0.13",
         "@expo/env": "~2.0.8"
@@ -5652,6 +5698,7 @@
       "resolved": "https://registry.npmjs.org/glob/-/glob-13.0.6.tgz",
       "integrity": "sha512-Wjlyrolmm8uDpm/ogGyXZXb1Z+Ca2B8NbJwqBVg0axK9GbBeoS7yGV6vjXnYdGm6X53iehEuxxbyiKp8QmN4Vw==",
       "license": "BlueOak-1.0.0",
+      "peer": true,
       "dependencies": {
         "minimatch": "^10.2.2",
         "minipass": "^7.1.3",
@@ -5669,6 +5716,7 @@
       "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.2.6.tgz",
       "integrity": "sha512-ESL2CrkS/2wTPfuend7Zhkzo2u0daGJ/A2VucJOgQ/C48S/zB8MMeMHSGKYpXhIjbPxfuezITkaBH1wqv00DDQ==",
       "license": "BlueOak-1.0.0",
+      "peer": true,
       "engines": {
         "node": "20 || >=22"
       }
@@ -5678,6 +5726,7 @@
       "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.2.tgz",
       "integrity": "sha512-+G4CpNBxa5MprY+04MbgOw1v7So6n5JY166pFi9KfYwT78fxScCeSNQSNzp6dpPSW2rONOps6Ocam1wFhCgoVw==",
       "license": "BlueOak-1.0.0",
+      "peer": true,
       "dependencies": {
         "brace-expansion": "^5.0.2"
       },
@@ -5693,6 +5742,7 @@
       "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-2.0.2.tgz",
       "integrity": "sha512-3O/iVVsJAPsOnpwWIeD+d6z/7PmqApyQePUtCndjatj/9I5LylHvt5qluFaBT3I5h3r1ejfR056c+FCv+NnNXg==",
       "license": "BlueOak-1.0.0",
+      "peer": true,
       "dependencies": {
         "lru-cache": "^11.0.0",
         "minipass": "^7.1.2"
@@ -5709,6 +5759,7 @@
       "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz",
       "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==",
       "license": "ISC",
+      "peer": true,
       "bin": {
         "semver": "bin/semver.js"
       },
@@ -5721,6 +5772,7 @@
       "resolved": "https://registry.npmjs.org/sucrase/-/sucrase-3.35.1.tgz",
       "integrity": "sha512-DhuTmvZWux4H1UOnWMB3sk0sbaCVOoQZjv8u1rDoTV0HTdGem9hkAZtl4JZy8P2z4Bg0nT+YMeOFyVr4zcG5Tw==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@jridgewell/gen-mapping": "^0.3.2",
         "commander": "^4.0.0",
@@ -5968,6 +6020,7 @@
       "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz",
       "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==",
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">=12.0.0"
       },
@@ -8645,7 +8698,6 @@
       "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-3.0.1.tgz",
       "integrity": "sha512-I3EurrIQMlRc9IaAZnqRR044Phh2DXY+55o7uJ0V+hYZAcQYSuFWsc9q5PvyDHUSCe1Qxn/iBz+78s86zWnGag==",
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=10"
       },
@@ -8934,7 +8986,6 @@
       "resolved": "https://registry.npmjs.org/react/-/react-19.0.0.tgz",
       "integrity": "sha512-V8AVnmPIICiWpGfm6GLzCR/W5FXLchHop40W4nXBmdlEceh16rCN8O8LNWm5bh5XUX91fh7KpA+W0TgMKmgTpQ==",
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=0.10.0"
       }
@@ -8999,7 +9050,6 @@
       "resolved": "https://registry.npmjs.org/react-native/-/react-native-0.79.6.tgz",
       "integrity": "sha512-kvIWSmf4QPfY41HC25TR285N7Fv0Pyn3DAEK8qRL9dA35usSaxsJkHfw+VqnonqJjXOaoKCEanwudRAJ60TBGA==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@jest/create-cache-key-function": "^29.7.0",
         "@react-native/assets-registry": "0.79.6",
@@ -9098,7 +9148,6 @@
       "resolved": "https://registry.npmjs.org/react-native-safe-area-context/-/react-native-safe-area-context-5.4.0.tgz",
       "integrity": "sha512-JaEThVyJcLhA+vU0NU8bZ0a1ih6GiF4faZ+ArZLqpYbL6j7R3caRqj+mE3lEtKCuHgwjLg3bCxLL1GPUJZVqUA==",
       "license": "MIT",
-      "peer": true,
       "peerDependencies": {
         "react": "*",
         "react-native": "*"
@@ -9109,7 +9158,6 @@
       "resolved": "https://registry.npmjs.org/react-native-screens/-/react-native-screens-4.11.1.tgz",
       "integrity": "sha512-F0zOzRVa3ptZfLpD0J8ROdo+y1fEPw+VBFq1MTY/iyDu08al7qFUO5hLMd+EYMda5VXGaTFCa8q7bOppUszhJw==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "react-freeze": "^1.0.0",
         "react-native-is-edge-to-edge": "^1.1.7",
@@ -10519,6 +10567,7 @@
       "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
       "integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "fdir": "^6.5.0",
         "picomatch": "^4.0.3"
@@ -10535,6 +10584,7 @@
       "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
       "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">=12"
       },
@@ -10613,7 +10663,6 @@
       "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
       "dev": true,
       "license": "Apache-2.0",
-      "peer": true,
       "bin": {
         "tsc": "bin/tsc",
         "tsserver": "bin/tsserver"
diff --git a/app/package.json b/app/package.json
index 2d59b699..dc260c19 100644
--- a/app/package.json
+++ b/app/package.json
@@ -30,7 +30,9 @@
     "setimmediate": "^1.0.5",
     "webidl-conversions": "^7.0.0",
     "react-native-screens": "~4.11.1",
-    "react-native-safe-area-context": "5.4.0"
+    "react-native-safe-area-context": "5.4.0",
+    "expo-camera": "~16.1.11",
+    "expo-image-picker": "~16.1.4"
   },
   "devDependencies": {
     "@babel/core": "^7.20.0",
diff --git a/app/src/components/BackendStatus.tsx b/app/src/components/BackendStatus.tsx
index f69f7a57..e0eec8d5 100644
--- a/app/src/components/BackendStatus.tsx
+++ b/app/src/components/BackendStatus.tsx
@@ -1,6 +1,8 @@
 import React, { useState, useEffect } from 'react';
 import { View, Text, TextInput, TouchableOpacity, StyleSheet, Alert, ActivityIndicator } from 'react-native';
 import { useTheme, ThemeColors } from '../theme';
+import { QRScanner } from './QRScanner';
+import { httpUrlToWebSocketUrl } from '../utils/urlConversion';
 
 interface BackendStatusProps {
   backendUrl: string;
@@ -26,6 +28,7 @@ export const BackendStatus: React.FC<BackendStatusProps> = ({
     status: 'unknown',
     message: 'Not checked',
   });
+  const [showQRScanner, setShowQRScanner] = useState(false);
 
   const checkBackendHealth = async (showAlert: boolean = false) => {
     if (!backendUrl.trim()) {
@@ -130,6 +133,13 @@ export const BackendStatus: React.FC<BackendStatusProps> = ({
         )}
       </View>
 
+      <TouchableOpacity
+        style={s.qrButton}
+        onPress={() => setShowQRScanner(true)}
+      >
+        <Text style={s.qrButtonText}>Scan QR Code</Text>
+      </TouchableOpacity>
+
       <TouchableOpacity
         style={[s.button, healthStatus.status === 'checking' ? s.buttonDisabled : null]}
         onPress={() => checkBackendHealth(true)}
@@ -139,8 +149,17 @@ export const BackendStatus: React.FC<BackendStatusProps> = ({
       </TouchableOpacity>
 
       <Text style={s.helpText}>
-        Enter the WebSocket URL of your backend server. Status is automatically checked.
+        Enter the WebSocket URL or scan a QR code from the Chronicle dashboard.
       </Text>
+
+      <QRScanner
+        visible={showQRScanner}
+        onScanned={(httpUrl) => {
+          const wsUrl = httpUrlToWebSocketUrl(httpUrl);
+          onBackendUrlChange(wsUrl);
+        }}
+        onClose={() => setShowQRScanner(false)}
+      />
     </View>
   );
 };
@@ -215,6 +234,21 @@ const createStyles = (colors: ThemeColors) => StyleSheet.create({
     textAlign: 'center',
     fontStyle: 'italic',
   },
+  qrButton: {
+    backgroundColor: colors.card,
+    paddingVertical: 12,
+    paddingHorizontal: 20,
+    borderRadius: 8,
+    alignItems: 'center',
+    marginBottom: 10,
+    borderWidth: 1,
+    borderColor: colors.primary,
+  },
+  qrButtonText: {
+    color: colors.primary,
+    fontSize: 16,
+    fontWeight: '600',
+  },
   button: {
     backgroundColor: colors.primary,
     paddingVertical: 12,
diff --git a/app/src/components/QRScanner.tsx b/app/src/components/QRScanner.tsx
new file mode 100644
index 00000000..87bedd2d
--- /dev/null
+++ b/app/src/components/QRScanner.tsx
@@ -0,0 +1,225 @@
+import React, { useState, useEffect } from 'react';
+import {
+  View,
+  Text,
+  TouchableOpacity,
+  StyleSheet,
+  Modal,
+  Alert,
+} from 'react-native';
+import { CameraView, useCameraPermissions, scanFromURLAsync } from 'expo-camera';
+import * as ImagePicker from 'expo-image-picker';
+import { isValidBackendUrl } from '../utils/urlConversion';
+import { useTheme, ThemeColors } from '../theme';
+
+interface QRScannerProps {
+  visible: boolean;
+  onScanned: (url: string) => void;
+  onClose: () => void;
+}
+
+export const QRScanner: React.FC<QRScannerProps> = ({ visible, onScanned, onClose }) => {
+  const { colors } = useTheme();
+  const s = createStyles(colors);
+  const [permission, requestPermission] = useCameraPermissions();
+  const [scanned, setScanned] = useState(false);
+
+  useEffect(() => {
+    if (visible) {
+      setScanned(false);
+    }
+  }, [visible]);
+
+  const handleBarCodeScanned = ({ data }: { data: string }) => {
+    if (scanned) return;
+    setScanned(true);
+
+    if (isValidBackendUrl(data)) {
+      onScanned(data);
+      onClose();
+    } else {
+      Alert.alert(
+        'Invalid QR Code',
+        'The scanned QR code does not contain a valid backend URL. Please scan the QR code from the Chronicle dashboard.',
+        [{ text: 'Try Again', onPress: () => setScanned(false) }]
+      );
+    }
+  };
+
+  const handlePickFromGallery = async () => {
+    try {
+      const result = await ImagePicker.launchImageLibraryAsync({
+        mediaTypes: ['images'],
+        quality: 1,
+      });
+
+      if (result.canceled || !result.assets?.[0]?.uri) return;
+
+      const scanResult = await scanFromURLAsync(result.assets[0].uri, ['qr']);
+
+      if (scanResult.length > 0 && scanResult[0].data) {
+        handleBarCodeScanned({ data: scanResult[0].data });
+      } else {
+        Alert.alert('No QR Code Found', 'Could not find a QR code in the selected image.');
+      }
+    } catch (error) {
+      console.log('[QRScanner] Gallery scan error:', error);
+      Alert.alert('Error', 'Failed to scan QR code from image.');
+    }
+  };
+
+  const renderContent = () => {
+    if (!permission) {
+      return <Text style={s.messageText}>Requesting camera permission...</Text>;
+    }
+
+    if (!permission.granted) {
+      return (
+        <View style={s.permissionContainer}>
+          <Text style={s.messageText}>Camera access is needed to scan QR codes.</Text>
+          <TouchableOpacity style={s.permissionButton} onPress={requestPermission}>
+            <Text style={s.permissionButtonText}>Grant Camera Access</Text>
+          </TouchableOpacity>
+          <Text style={s.orText}>or</Text>
+          <TouchableOpacity style={s.galleryButton} onPress={handlePickFromGallery}>
+            <Text style={s.galleryButtonText}>Pick from Gallery</Text>
+          </TouchableOpacity>
+        </View>
+      );
+    }
+
+    return (
+      <View style={s.cameraContainer}>
+        <CameraView
+          style={s.camera}
+          facing="back"
+          barcodeScannerSettings={{ barcodeTypes: ['qr'] }}
+          onBarcodeScanned={scanned ? undefined : handleBarCodeScanned}
+        />
+        <View style={s.overlay}>
+          <Text style={s.overlayText}>Point at QR code on Chronicle dashboard</Text>
+        </View>
+        <TouchableOpacity style={s.galleryButton} onPress={handlePickFromGallery}>
+          <Text style={s.galleryButtonText}>Pick from Gallery</Text>
+        </TouchableOpacity>
+      </View>
+    );
+  };
+
+  return (
+    <Modal visible={visible} animationType="slide" presentationStyle="pageSheet">
+      <View style={s.container}>
+        <View style={s.header}>
+          <Text style={s.headerTitle}>Scan QR Code</Text>
+          <TouchableOpacity onPress={onClose} style={s.closeButton}>
+            <Text style={s.closeButtonText}>Close</Text>
+          </TouchableOpacity>
+        </View>
+        {renderContent()}
+      </View>
+    </Modal>
+  );
+};
+
+const createStyles = (colors: ThemeColors) =>
+  StyleSheet.create({
+    container: {
+      flex: 1,
+      backgroundColor: colors.background,
+    },
+    header: {
+      flexDirection: 'row',
+      justifyContent: 'space-between',
+      alignItems: 'center',
+      paddingHorizontal: 20,
+      paddingTop: 60,
+      paddingBottom: 15,
+      borderBottomWidth: 1,
+      borderBottomColor: colors.separator,
+      backgroundColor: colors.card,
+    },
+    headerTitle: {
+      fontSize: 18,
+      fontWeight: '600',
+      color: colors.text,
+    },
+    closeButton: {
+      padding: 8,
+    },
+    closeButtonText: {
+      fontSize: 16,
+      color: colors.primary,
+      fontWeight: '500',
+    },
+    cameraContainer: {
+      flex: 1,
+      alignItems: 'center',
+    },
+    camera: {
+      flex: 1,
+      width: '100%',
+    },
+    overlay: {
+      position: 'absolute',
+      top: 40,
+      left: 20,
+      right: 20,
+      alignItems: 'center',
+    },
+    overlayText: {
+      color: '#ffffff',
+      fontSize: 16,
+      fontWeight: '500',
+      textAlign: 'center',
+      backgroundColor: 'rgba(0,0,0,0.5)',
+      paddingHorizontal: 16,
+      paddingVertical: 8,
+      borderRadius: 8,
+      overflow: 'hidden',
+    },
+    permissionContainer: {
+      flex: 1,
+      justifyContent: 'center',
+      alignItems: 'center',
+      padding: 30,
+    },
+    messageText: {
+      fontSize: 16,
+      color: colors.textSecondary,
+      textAlign: 'center',
+      marginBottom: 20,
+    },
+    permissionButton: {
+      backgroundColor: colors.primary,
+      paddingVertical: 12,
+      paddingHorizontal: 24,
+      borderRadius: 8,
+    },
+    permissionButtonText: {
+      color: '#ffffff',
+      fontSize: 16,
+      fontWeight: '600',
+    },
+    orText: {
+      fontSize: 14,
+      color: colors.textTertiary,
+      marginVertical: 12,
+    },
+    galleryButton: {
+      paddingVertical: 12,
+      paddingHorizontal: 24,
+      borderRadius: 8,
+      borderWidth: 1,
+      borderColor: colors.primary,
+      marginTop: 12,
+      marginBottom: 20,
+    },
+    galleryButtonText: {
+      color: colors.primary,
+      fontSize: 16,
+      fontWeight: '500',
+      textAlign: 'center',
+    },
+  });
+
+export default QRScanner;
diff --git a/app/src/utils/urlConversion.ts b/app/src/utils/urlConversion.ts
new file mode 100644
index 00000000..530e53fc
--- /dev/null
+++ b/app/src/utils/urlConversion.ts
@@ -0,0 +1,46 @@
+/**
+ * Converts an HTTP(S) URL to the corresponding WebSocket URL for the Chronicle backend.
+ *
+ * Examples:
+ *   https://100.64.1.5       → wss://100.64.1.5/ws
+ *   http://localhost:8000     → ws://localhost:8000/ws
+ *   https://my.server.com    → wss://my.server.com/ws
+ */
+export function httpUrlToWebSocketUrl(httpUrl: string): string {
+  let url = httpUrl.trim().replace(/\/+$/, '')
+
+  if (url.startsWith('https://')) {
+    url = 'wss://' + url.slice('https://'.length)
+  } else if (url.startsWith('http://')) {
+    url = 'ws://' + url.slice('http://'.length)
+  } else {
+    // If no scheme, assume wss
+    url = 'wss://' + url
+  }
+
+  // Append /ws if not already present
+  if (!url.endsWith('/ws')) {
+    url += '/ws'
+  }
+
+  return url
+}
+
+/**
+ * Validates that a scanned string looks like a valid HTTP(S) backend URL.
+ */
+export function isValidBackendUrl(url: string): boolean {
+  if (!url || typeof url !== 'string') return false
+
+  const trimmed = url.trim()
+  if (!trimmed.startsWith('http://') && !trimmed.startsWith('https://')) {
+    return false
+  }
+
+  try {
+    const parsed = new URL(trimmed)
+    return parsed.protocol === 'http:' || parsed.protocol === 'https:'
+  } catch {
+    return false
+  }
+}
diff --git a/backends/advanced/init.py b/backends/advanced/init.py
index 375818be..eaf9f92f 100644
--- a/backends/advanced/init.py
+++ b/backends/advanced/init.py
@@ -837,7 +837,13 @@ def setup_neo4j(self):
                 "Neo4j is used for Knowledge Graph (entity/relationship extraction)"
             )
             self.console.print()
-            neo4j_password = self.prompt_password("Neo4j password (min 8 chars)")
+            neo4j_password = self.prompt_with_existing_masked(
+                "Neo4j password (min 8 chars)",
+                env_key="NEO4J_PASSWORD",
+                placeholders=["", "your-neo4j-password"],
+                is_password=True,
+                default="neo4jpassword",
+            )
 
         self.config["NEO4J_HOST"] = "neo4j"
         self.config["NEO4J_USER"] = "neo4j"
diff --git a/backends/advanced/src/advanced_omi_backend/observability/otel_setup.py b/backends/advanced/src/advanced_omi_backend/observability/otel_setup.py
index 9c891aeb..488dcb0d 100644
--- a/backends/advanced/src/advanced_omi_backend/observability/otel_setup.py
+++ b/backends/advanced/src/advanced_omi_backend/observability/otel_setup.py
@@ -69,7 +69,6 @@ def init_otel() -> None:
     """Initialize OTEL with Galileo exporter and OpenAI instrumentor.
 
     Call once at app startup. Safe to call if Galileo is not configured (no-op).
-    Filters out embedding spans — only LLM (chat completion) calls are exported.
     """
     if not is_galileo_enabled():
         logger.info("Galileo not configured, skipping OTEL initialization")
@@ -78,41 +77,16 @@ def init_otel() -> None:
     try:
         from galileo import otel
         from openinference.instrumentation.openai import OpenAIInstrumentor
-        from opentelemetry import context
         from opentelemetry.sdk import trace as trace_sdk
-        from opentelemetry.sdk.trace import ReadableSpan, Span, SpanProcessor
 
         project = os.getenv("GALILEO_PROJECT", "chronicle")
         logstream = os.getenv("GALILEO_LOG_STREAM", "default")
 
-        class _LLMOnlyProcessor(SpanProcessor):
-            """Wraps GalileoSpanProcessor, dropping EMBEDDING spans."""
-
-            def __init__(self, inner: SpanProcessor):
-                self._inner = inner
-
-            def on_start(
-                self, span: Span, parent_context: context.Context | None = None
-            ) -> None:
-                self._inner.on_start(span, parent_context)
-
-            def on_end(self, span: ReadableSpan) -> None:
-                kind = span.attributes.get("openinference.span.kind", "")
-                if kind == "EMBEDDING":
-                    return  # drop
-                self._inner.on_end(span)
-
-            def shutdown(self) -> None:
-                self._inner.shutdown()
-
-            def force_flush(self, timeout_millis: int = 30000) -> bool:
-                return self._inner.force_flush(timeout_millis)
-
         tracer_provider = trace_sdk.TracerProvider()
         galileo_processor = otel.GalileoSpanProcessor(
             project=project, logstream=logstream
         )
-        tracer_provider.add_span_processor(_LLMOnlyProcessor(galileo_processor))
+        tracer_provider.add_span_processor(galileo_processor)
 
         # Auto-instrument all OpenAI SDK calls
         OpenAIInstrumentor().instrument(tracer_provider=tracer_provider)
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/router.py b/backends/advanced/src/advanced_omi_backend/plugins/router.py
index e06d3043..970ae9e7 100644
--- a/backends/advanced/src/advanced_omi_backend/plugins/router.py
+++ b/backends/advanced/src/advanced_omi_backend/plugins/router.py
@@ -550,6 +550,48 @@ async def check_connectivity(self) -> Dict[str, Dict[str, Any]]:
 
         return results
 
+    def get_asr_keywords(self) -> list[str]:
+        """Collect all wake words and keywords from enabled plugins.
+
+        These are meant to be injected into STT providers as keyword
+        boosting hints (e.g. Deepgram ``keyterm``, VibeVoice ``context_info``)
+        so that the transcription engine is more likely to correctly
+        recognise them.
+
+        Returns:
+            Deduplicated list of keyword strings.
+        """
+        seen: set[str] = set()
+        result: list[str] = []
+
+        for plugin in self.plugins.values():
+            if not plugin.enabled:
+                continue
+            condition = plugin.condition or {}
+            condition_type = condition.get("type", "always")
+
+            words: list[str] = []
+            if condition_type == "wake_word":
+                words = condition.get("wake_words", [])
+                if not words:
+                    w = condition.get("wake_word", "")
+                    if w:
+                        words = [w]
+            elif condition_type == "keyword_anywhere":
+                words = condition.get("keywords", [])
+                if not words:
+                    w = condition.get("keyword", "")
+                    if w:
+                        words = [w]
+
+            for w in words:
+                normalised = w.strip().lower()
+                if normalised and normalised not in seen:
+                    seen.add(normalised)
+                    result.append(normalised)
+
+        return result
+
     async def cleanup_all(self):
         """Clean up all registered plugins"""
         for plugin_id, plugin in self.plugins.items():
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/obsidian_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/obsidian_routes.py
index b02ed426..d3d6f844 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/modules/obsidian_routes.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/obsidian_routes.py
@@ -1,4 +1,3 @@
-
 import json
 import logging
 import os
@@ -13,7 +12,7 @@
 
 from advanced_omi_backend.auth import current_active_user, current_superuser
 from advanced_omi_backend.controllers.queue_controller import default_queue, redis_conn
-from advanced_omi_backend.services.obsidian_service import obsidian_service
+from advanced_omi_backend.services.obsidian_service import get_obsidian_service
 from advanced_omi_backend.users import User
 from advanced_omi_backend.utils.file_utils import ZipExtractionError, extract_zip
 from advanced_omi_backend.workers.obsidian_jobs import (
@@ -25,23 +24,26 @@
 
 router = APIRouter(prefix="/obsidian", tags=["obsidian"])
 
+
 class IngestRequest(BaseModel):
     vault_path: str
 
+
 @router.post("/ingest")
 async def ingest_obsidian_vault(
-    request: IngestRequest,
-    current_user: User = Depends(current_active_user)
+    request: IngestRequest, current_user: User = Depends(current_active_user)
 ):
     """
     Immediate/synchronous ingestion endpoint (legacy). Not recommended for UI.
     Prefer the upload_zip + start endpoints to enable progress reporting.
     """
     if not os.path.exists(request.vault_path):
-        raise HTTPException(status_code=400, detail=f"Path not found: {request.vault_path}")
+        raise HTTPException(
+            status_code=400, detail=f"Path not found: {request.vault_path}"
+        )
 
     try:
-        result = await obsidian_service.ingest_vault(request.vault_path)
+        result = await get_obsidian_service().ingest_vault(request.vault_path)
         return {"message": "Ingestion complete", **result}
     except Exception as e:
         logger.error(f"Ingestion failed: {e}")
@@ -50,15 +52,16 @@ async def ingest_obsidian_vault(
 
 @router.post("/upload_zip")
 async def upload_obsidian_zip(
-    file: UploadFile = File(...),
-    current_user: User = Depends(current_superuser)
+    file: UploadFile = File(...), current_user: User = Depends(current_superuser)
 ):
     """
     Upload a zipped Obsidian vault. Returns a job_id that can be started later.
     Uses upload_files_async pattern from upload_files.py for proper file handling.
     """
-    if not file.filename.lower().endswith('.zip'):
-        raise HTTPException(status_code=400, detail="Please upload a .zip file of your Obsidian vault")
+    if not file.filename.lower().endswith(".zip"):
+        raise HTTPException(
+            status_code=400, detail="Please upload a .zip file of your Obsidian vault"
+        )
 
     job_id = str(uuid.uuid4())
     base_dir = Path("/app/data/obsidian_jobs")
@@ -67,21 +70,23 @@ async def upload_obsidian_zip(
     job_dir.mkdir(parents=True, exist_ok=True)
     zip_path = job_dir / "vault.zip"
     extract_dir = job_dir / "vault"
-    
+
     # Use upload_files_async pattern for proper file handling with cleanup
     zip_file_handle = None
     try:
         # Read file content
         file_content = await file.read()
-        
+
         # Save zip file using proper file handling pattern from upload_files_async
         try:
-            zip_file_handle = open(zip_path, 'wb')
+            zip_file_handle = open(zip_path, "wb")
             zip_file_handle.write(file_content)
         except IOError as e:
             logger.error(f"Error writing zip file {zip_path}: {e}")
-            raise HTTPException(status_code=500, detail=f"Failed to save uploaded zip: {e}")
-        
+            raise HTTPException(
+                status_code=500, detail=f"Failed to save uploaded zip: {e}"
+            )
+
         # Extract zip file using utility function
         try:
             extract_zip(zip_path, extract_dir)
@@ -90,10 +95,12 @@ async def upload_obsidian_zip(
             raise HTTPException(status_code=400, detail=f"Invalid zip file: {e}")
         except ZipExtractionError as e:
             logger.error(f"Error extracting zip file: {e}")
-            raise HTTPException(status_code=500, detail=f"Failed to extract zip file: {e}")
+            raise HTTPException(
+                status_code=500, detail=f"Failed to extract zip file: {e}"
+            )
 
         total = count_markdown_files(str(extract_dir))
-        
+
         # Store pending job state in Redis
         pending_state = {
             "status": "ready",
@@ -101,16 +108,20 @@ async def upload_obsidian_zip(
             "processed": 0,
             "errors": [],
             "vault_path": str(extract_dir),
-            "job_id": job_id
+            "job_id": job_id,
         }
-        redis_conn.set(f"obsidian_pending:{job_id}", json.dumps(pending_state), ex=3600*24) # 24h expiry
+        redis_conn.set(
+            f"obsidian_pending:{job_id}", json.dumps(pending_state), ex=3600 * 24
+        )  # 24h expiry
 
         return {"job_id": job_id, "vault_path": str(extract_dir), "total_files": total}
     except HTTPException:
         raise
     except Exception as e:
         logger.exception(f"Failed to process uploaded zip: {e}")
-        raise HTTPException(status_code=500, detail=f"Failed to process uploaded zip: {e}")
+        raise HTTPException(
+            status_code=500, detail=f"Failed to process uploaded zip: {e}"
+        )
     finally:
         # Ensure file handle is closed (following upload_files_async pattern)
         if zip_file_handle:
@@ -123,17 +134,17 @@ async def upload_obsidian_zip(
 @router.post("/start")
 async def start_ingestion(
     job_id: str = Body(..., embed=True),
-    current_user: User = Depends(current_active_user)
+    current_user: User = Depends(current_active_user),
 ):
     # Check if job is pending
     pending_key = f"obsidian_pending:{job_id}"
     pending_data = redis_conn.get(pending_key)
-    
+
     if pending_data:
         try:
             job_data = json.loads(pending_data)
             vault_path = job_data.get("vault_path")
-            
+
             # Enqueue to RQ
             rq_job = default_queue.enqueue(
                 ingest_obsidian_vault_job,
@@ -141,27 +152,31 @@ async def start_ingestion(
                 vault_path,  # arg2
                 job_id=job_id,  # Set RQ job ID to match our ID
                 description=f"Obsidian ingestion for job {job_id}",
-                job_timeout=3600  # 1 hour timeout
+                job_timeout=3600,  # 1 hour timeout
             )
-            
+
             # Remove pending key
             redis_conn.delete(pending_key)
-            
-            return {"message": "Ingestion started", "job_id": job_id, "rq_job_id": rq_job.id}
+
+            return {
+                "message": "Ingestion started",
+                "job_id": job_id,
+                "rq_job_id": rq_job.id,
+            }
         except Exception as e:
             logger.exception(f"Failed to start job {job_id}: {e}")
             raise HTTPException(status_code=500, detail=f"Failed to start job: {e}")
-            
+
     # Check if already in RQ
     try:
         job = Job.fetch(job_id, connection=redis_conn)
         status = job.get_status()
         if status in ("queued", "started", "deferred", "scheduled"):
-             raise HTTPException(status_code=400, detail=f"Job already {status}")
-        
+            raise HTTPException(status_code=400, detail=f"Job already {status}")
+
         # If finished/failed, we could potentially restart? But for now let's say it's done.
         raise HTTPException(status_code=400, detail=f"Job is in state: {status}")
-        
+
     except NoSuchJobError:
         raise HTTPException(status_code=404, detail="Job not found")
 
@@ -171,7 +186,7 @@ async def get_status(job_id: str, current_user: User = Depends(current_active_us
     # 1. Try RQ first
     try:
         job = Job.fetch(job_id, connection=redis_conn)
-        
+
         # Get status
         status = job.get_status()
         if status == "started":
@@ -181,13 +196,18 @@ async def get_status(job_id: str, current_user: User = Depends(current_active_us
         meta = job.meta or {}
 
         # If meta has status, prefer it (for granular updates)
-        if "status" in meta and meta["status"] in ("running", "finished", "failed", "canceled"):
-             status = meta["status"]
+        if "status" in meta and meta["status"] in (
+            "running",
+            "finished",
+            "failed",
+            "canceled",
+        ):
+            status = meta["status"]
 
         total = meta.get("total_files", 0)
         processed = meta.get("processed", 0)
         percent = int((processed / total) * 100) if total else 0
-        
+
         return {
             "job_id": job_id,
             "status": status,
@@ -196,14 +216,14 @@ async def get_status(job_id: str, current_user: User = Depends(current_active_us
             "percent": percent,
             "errors": meta.get("errors", []),
             "vault_path": meta.get("vault_path"),
-            "rq_job_id": job.id
+            "rq_job_id": job.id,
         }
-        
+
     except NoSuchJobError:
         # 2. Check pending
         pending_key = f"obsidian_pending:{job_id}"
         pending_data = redis_conn.get(pending_key)
-        
+
         if pending_data:
             try:
                 job_data = json.loads(pending_data)
@@ -214,10 +234,8 @@ async def get_status(job_id: str, current_user: User = Depends(current_active_us
                     "processed": 0,
                     "percent": 0,
                     "errors": [],
-                    "vault_path": job_data.get("vault_path")
+                    "vault_path": job_data.get("vault_path"),
                 }
             except:
                 raise HTTPException(status_code=500, detail="Failed to get job status")
         raise HTTPException(status_code=404, detail="Job not found")
-
-
diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py
index 2d83d24c..3a81b53e 100644
--- a/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py
+++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py
@@ -15,8 +15,12 @@
 from typing import Any, Dict, List, Optional
 
 from advanced_omi_backend.model_registry import ModelDef, get_models_registry
-from advanced_omi_backend.openai_factory import create_openai_client, is_langfuse_enabled
+from advanced_omi_backend.openai_factory import (
+    create_openai_client,
+    is_langfuse_enabled,
+)
 from advanced_omi_backend.prompt_registry import get_prompt_registry
+from advanced_omi_backend.utils.text_chunking import semantic_chunk_text
 
 from ..base import LLMProviderBase
 from ..prompts import (
@@ -77,6 +81,7 @@ async def generate_openai_embeddings(
     )
     return [data.embedding for data in response.data]
 
+
 # TODO: Re-enable spacy when Docker build is fixed
 # try:
 #     nlp = spacy.load("en_core_web_sm")
@@ -86,6 +91,7 @@ async def generate_openai_embeddings(
 #     nlp = None
 nlp = None  # Temporarily disabled
 
+
 def chunk_text_with_spacy(text: str, max_tokens: int = 100) -> List[str]:
     """Split text into chunks using spaCy sentence segmentation.
     max_tokens is the maximum number of words in a chunk.
@@ -93,14 +99,14 @@ def chunk_text_with_spacy(text: str, max_tokens: int = 100) -> List[str]:
     # Fallback chunking when spacy is not available
     if nlp is None:
         # Simple sentence-based chunking
-        sentences = text.replace('\n', ' ').split('. ')
+        sentences = text.replace("\n", " ").split(". ")
         chunks = []
         current_chunk = ""
         current_tokens = 0
-        
+
         for sentence in sentences:
             sentence_tokens = len(sentence.split())
-            
+
             if current_tokens + sentence_tokens > max_tokens and current_chunk:
                 chunks.append(current_chunk.strip())
                 current_chunk = sentence
@@ -111,23 +117,23 @@ def chunk_text_with_spacy(text: str, max_tokens: int = 100) -> List[str]:
                 else:
                     current_chunk = sentence
                 current_tokens += sentence_tokens
-        
+
         if current_chunk.strip():
             chunks.append(current_chunk.strip())
-        
+
         return chunks if chunks else [text]
-    
+
     # Original spacy implementation when available
     doc = nlp(text)
-    
+
     chunks = []
     current_chunk = ""
     current_tokens = 0
-    
+
     for sent in doc.sents:
         sent_text = sent.text.strip()
         sent_tokens = len(sent_text.split())  # Simple word count
-        
+
         if current_tokens + sent_tokens > max_tokens and current_chunk:
             chunks.append(current_chunk.strip())
             current_chunk = sent_text
@@ -135,12 +141,13 @@ def chunk_text_with_spacy(text: str, max_tokens: int = 100) -> List[str]:
         else:
             current_chunk += " " + sent_text if current_chunk else sent_text
             current_tokens += sent_tokens
-    
+
     if current_chunk.strip():
         chunks.append(current_chunk.strip())
-    
+
     return chunks
 
+
 class OpenAIProvider(LLMProviderBase):
     """Config-driven LLM provider using OpenAI SDK (OpenAI-compatible).
 
@@ -153,7 +160,9 @@ def __init__(self, config: Dict[str, Any]):
         # Ignore provider-specific envs; use registry as single source of truth
         registry = get_models_registry()
         if not registry:
-            raise RuntimeError("config.yml not found or invalid; cannot initialize model registry")
+            raise RuntimeError(
+                "config.yml not found or invalid; cannot initialize model registry"
+            )
 
         self._registry = registry
 
@@ -170,9 +179,15 @@ def __init__(self, config: Dict[str, Any]):
         self.model = self.llm_def.model_name
 
         # Store parameters for embeddings (use separate config if available)
-        self.embedding_model = (self.embed_def.model_name if self.embed_def else self.llm_def.model_name)
-        self.embedding_api_key = (self.embed_def.api_key if self.embed_def else self.api_key)
-        self.embedding_base_url = (self.embed_def.model_url if self.embed_def else self.base_url)
+        self.embedding_model = (
+            self.embed_def.model_name if self.embed_def else self.llm_def.model_name
+        )
+        self.embedding_api_key = (
+            self.embed_def.api_key if self.embed_def else self.api_key
+        )
+        self.embedding_base_url = (
+            self.embed_def.model_url if self.embed_def else self.base_url
+        )
 
         # CRITICAL: Validate API keys are present - fail fast instead of hanging
         if not self.api_key or self.api_key.strip() == "":
@@ -182,7 +197,9 @@ def __init__(self, config: Dict[str, Any]):
                 f"Cannot proceed without valid API credentials."
             )
 
-        if self.embed_def and (not self.embedding_api_key or self.embedding_api_key.strip() == ""):
+        if self.embed_def and (
+            not self.embedding_api_key or self.embedding_api_key.strip() == ""
+        ):
             raise RuntimeError(
                 f"API key is missing or empty for embedding provider '{self.embed_def.model_provider}' (model: {self.embedding_model}). "
                 f"Please set the API key in config.yml or environment variables."
@@ -192,7 +209,10 @@ def __init__(self, config: Dict[str, Any]):
         self._client = None
 
     async def extract_memories(
-        self, text: str, prompt: str, user_id: Optional[str] = None,
+        self,
+        text: str,
+        prompt: str,
+        user_id: Optional[str] = None,
         langfuse_session_id: Optional[str] = None,
     ) -> List[str]:
         """Extract memories using OpenAI API with the enhanced fact retrieval prompt.
@@ -219,26 +239,56 @@ async def extract_memories(
                     current_date=datetime.now().strftime("%Y-%m-%d"),
                 )
 
-            # local models can only handle small chunks of input text
-            text_chunks = chunk_text_with_spacy(text)
+            # Semantic chunking: split dialogue into turns, then group by topic
+            async def _embed_for_chunking(texts: List[str]) -> List[List[float]]:
+                return await generate_openai_embeddings(
+                    texts,
+                    api_key=self.embedding_api_key,
+                    base_url=self.embedding_base_url,
+                    model=self.embedding_model,
+                )
+
+            chunking_config = self._registry.memory.get("extraction", {}).get(
+                "chunking", {}
+            )
+            dialogue_turns = [line for line in text.split("\n") if line.strip()]
+            text_chunks = await semantic_chunk_text(
+                text,
+                embed_fn=_embed_for_chunking,
+                sentences=dialogue_turns,
+                join_str="\n",
+                buffer_size=int(chunking_config.get("buffer_size", 1)),
+                breakpoint_percentile_threshold=float(
+                    chunking_config.get("breakpoint_percentile_threshold", 90.0)
+                ),
+                max_chunk_words=int(chunking_config.get("max_chunk_words", 500)),
+            )
 
             # Process all chunks in sequence, not concurrently
-            results = [await self._process_chunk(system_prompt, chunk, i, langfuse_session_id=langfuse_session_id) for i, chunk in enumerate(text_chunks)]
-            
+            results = [
+                await self._process_chunk(
+                    system_prompt, chunk, i, langfuse_session_id=langfuse_session_id
+                )
+                for i, chunk in enumerate(text_chunks)
+            ]
+
             # Spread list of list of facts into a single list of facts
             cleaned_facts = []
             for result in results:
                 memory_logger.info(f"Cleaned facts: {result}")
                 cleaned_facts.extend(result)
-            
+
             return cleaned_facts
-                
+
         except Exception as e:
             memory_logger.error(f"OpenAI memory extraction failed: {e}")
             return []
-        
+
     async def _process_chunk(
-        self, system_prompt: str, chunk: str, index: int,
+        self,
+        system_prompt: str,
+        chunk: str,
+        index: int,
         langfuse_session_id: Optional[str] = None,
     ) -> List[str]:
         """Process a single text chunk to extract memories using OpenAI API.
@@ -312,11 +362,15 @@ async def test_connection(self) -> bool:
         try:
             # Add 10-second timeout to prevent hanging on API calls
             async with asyncio.timeout(10):
-                client = _get_openai_client(api_key=self.api_key, base_url=self.base_url, is_async=True)
+                client = _get_openai_client(
+                    api_key=self.api_key, base_url=self.base_url, is_async=True
+                )
                 await client.models.list()
                 return True
         except asyncio.TimeoutError:
-            memory_logger.error(f"OpenAI connection test timed out after 10s - check network connectivity and API endpoint")
+            memory_logger.error(
+                f"OpenAI connection test timed out after 10s - check network connectivity and API endpoint"
+            )
             return False
         except Exception as e:
             memory_logger.error(f"OpenAI connection test failed: {e}")
@@ -344,11 +398,11 @@ async def propose_memory_actions(
             # Generate the complete prompt using the helper function
             memory_logger.debug(f"🧠 Facts passed to prompt builder: {new_facts}")
             update_memory_messages = build_update_memory_messages(
-                retrieved_old_memory,
-                new_facts,
-                custom_prompt
+                retrieved_old_memory, new_facts, custom_prompt
+            )
+            memory_logger.debug(
+                f"🧠 Generated prompt user content: {update_memory_messages[1]['content'][:200]}..."
             )
-            memory_logger.debug(f"🧠 Generated prompt user content: {update_memory_messages[1]['content'][:200]}...")
 
             op = self._registry.get_llm_operation("memory_update")
             client = op.get_client(is_async=True)
@@ -374,7 +428,6 @@ async def propose_memory_actions(
             memory_logger.error(f"OpenAI propose_memory_actions failed: {e}")
             return {}
 
-
     async def propose_reprocess_actions(
         self,
         existing_memories: List[Dict[str, str]],
@@ -466,21 +519,23 @@ async def propose_reprocess_actions(
 
 class OllamaProvider(LLMProviderBase):
     """Ollama LLM provider implementation.
-    
+
     Provides memory extraction, embedding generation, and memory action
     proposals using Ollama's GPT and embedding models.
-    
-    
+
+
     Use the openai provider for ollama with different environment variables
-    
-    os.environ["OPENAI_API_KEY"] = "ollama"  
+
+    os.environ["OPENAI_API_KEY"] = "ollama"
     os.environ["OPENAI_BASE_URL"] = "http://localhost:11434/v1"
     os.environ["QDRANT_BASE_URL"] = "localhost"
     os.environ["OPENAI_EMBEDDER_MODEL"] = "erwan2/DeepSeek-R1-Distill-Qwen-1.5B:latest"
-    
+
     """
+
     pass
 
+
 def _parse_memories_content(content: str) -> List[str]:
     """
     Parse LLM content to extract memory strings.
diff --git a/backends/advanced/src/advanced_omi_backend/services/obsidian_service.py b/backends/advanced/src/advanced_omi_backend/services/obsidian_service.py
index b02a6fa0..86943e44 100644
--- a/backends/advanced/src/advanced_omi_backend/services/obsidian_service.py
+++ b/backends/advanced/src/advanced_omi_backend/services/obsidian_service.py
@@ -16,14 +16,12 @@
 import logging
 import os
 import re
-from pathlib import Path
 from typing import List, Literal, Optional, TypedDict
 
 from advanced_omi_backend.services.memory.config import (
     load_config_yml as load_root_config,
 )
 from advanced_omi_backend.services.memory.providers.llm_providers import (
-    chunk_text_with_spacy,
     generate_openai_embeddings,
 )
 from advanced_omi_backend.services.neo4j_client import (
@@ -33,6 +31,7 @@
 )
 from advanced_omi_backend.utils.config_utils import resolve_value
 from advanced_omi_backend.utils.model_utils import get_model_config
+from advanced_omi_backend.utils.text_chunking import semantic_chunk_text
 
 logger = logging.getLogger(__name__)
 
@@ -64,44 +63,11 @@ def __init__(self, stage: Literal["embedding", "database"], message: str):
         self.stage = stage
 
 
-def load_env_file(filepath: Path) -> dict[str, str]:
-    """Load environment variables from a .env file.
-
-    Args:
-        filepath: Path to the .env file to load.
-
-    Returns:
-        Dictionary of key-value pairs from the .env file.
-    """
-    env_vars = {}
-    if filepath.exists():
-        with open(filepath, "r") as f:
-            for line in f:
-                line = line.strip()
-                if not line or line.startswith("#"):
-                    continue
-                if "=" in line:
-                    parts = line.split("=", 1)
-                    key = parts[0].strip()
-                    value = parts[1].strip() if len(parts) > 1 else ""
-                    # Handle quotes
-                    if (value.startswith("'") and value.endswith("'")) or (
-                        value.startswith('"') and value.endswith('"')
-                    ):
-                        value = value[1:-1]
-                    env_vars[key] = value
-    return env_vars
-
-
 class ObsidianService:
     """Service for ingesting Obsidian vaults into Neo4j graph database."""
 
     def __init__(self):
         """Initialize the Obsidian service with configuration from config.yml and environment."""
-        # Resolve paths relative to this file
-        # backends/advanced/src/advanced_omi_backend/services/obsidian_service.py
-        self.CURRENT_DIR = Path(__file__).parent.resolve()
-
         # Load configuration strictly from standard locations
         # Prefer /app/config.yml inside containers (mounted by docker-compose)
         # Fallbacks handled by shared utility
@@ -114,46 +80,36 @@ def __init__(self):
 
         embed_config = get_model_config(config_data, "embedding")
         if not embed_config:
-            raise ValueError("Configuration for 'defaults.embedding' not found in config.yml")
-
-        # Neo4j Connection - Prefer environment variables passed by Docker Compose
-        neo4j_host = os.getenv("NEO4J_HOST")
-        # Load .env file as fallback (for local dev or if env vars not set)
-        candidate_env_files = [
-            Path("/app/.env"),
-            self.CURRENT_DIR.parent.parent.parent.parent
-            / ".env",  # Project root .env file ToDo cleanup needed after k8s is migrated and there is no .env file in the project root.
-            self.CURRENT_DIR.parent.parent.parent.parent
-            / "backends"
-            / "advanced"
-            / ".env",  # repo path
-        ]
-        env_data = {}
-        for p in candidate_env_files:
-            if p.exists():
-                env_data.update(load_env_file(p))
-
-        # Use env var first, then fallback to .env file
-        if not neo4j_host:
-            neo4j_host = env_data.get("NEO4J_HOST")
-
-        if not neo4j_host:
-            raise KeyError("NEO4J_HOST not found in environment or .env")
+            raise ValueError(
+                "Configuration for 'defaults.embedding' not found in config.yml"
+            )
 
+        # Neo4j Connection - environment variables passed by Docker Compose
+        neo4j_host = os.getenv("NEO4J_HOST", "neo4j")
         self.neo4j_uri = f"bolt://{neo4j_host}:7687"
-        self.neo4j_user = os.getenv("NEO4J_USER") or env_data.get("NEO4J_USER", "neo4j")
-        self.neo4j_password = os.getenv("NEO4J_PASSWORD") or env_data.get("NEO4J_PASSWORD", "")
+        self.neo4j_user = os.getenv("NEO4J_USER", "neo4j")
+        self.neo4j_password = os.getenv("NEO4J_PASSWORD", "password")
 
         # Models / API - Loaded strictly from config.yml
         self.embedding_model = str(resolve_value(embed_config["model_name"]))
-        self.embedding_dimensions = int(resolve_value(embed_config["embedding_dimensions"]))
+        self.embedding_dimensions = int(
+            resolve_value(embed_config["embedding_dimensions"])
+        )
         self.openai_base_url = str(resolve_value(llm_config["model_url"]))
         self.openai_api_key = str(resolve_value(llm_config["api_key"]))
 
-        # Chunking - uses shared spaCy/text fallback utility
-        self.chunk_word_limit = 120
+        # Semantic chunking configuration (from config.yml with defaults)
+        obsidian_config = config_data.get("memory", {}).get("obsidian", {})
+        chunking_config = obsidian_config.get("chunking", {})
+        self.semantic_buffer_size = int(chunking_config.get("buffer_size", 1))
+        self.semantic_breakpoint_percentile = float(
+            chunking_config.get("breakpoint_percentile_threshold", 95.0)
+        )
+        self.max_chunk_words = int(chunking_config.get("max_chunk_words", 300))
 
-        self.neo4j_client = Neo4jClient(self.neo4j_uri, self.neo4j_user, self.neo4j_password)
+        self.neo4j_client = Neo4jClient(
+            self.neo4j_uri, self.neo4j_user, self.neo4j_password
+        )
         self.read_interface = Neo4jReadInterface(self.neo4j_client)
         self.write_interface = Neo4jWriteInterface(self.neo4j_client)
 
@@ -191,7 +147,9 @@ def _clean_text(text: str) -> str:
         """Normalize whitespace for embedding inputs."""
         return re.sub(r"\s+", " ", text).strip()
 
-    def parse_obsidian_note(self, root: str, filename: str, vault_path: str) -> NoteData:
+    def parse_obsidian_note(
+        self, root: str, filename: str, vault_path: str
+    ) -> NoteData:
         """Parse an Obsidian markdown file and extract metadata.
 
         Args:
@@ -225,14 +183,8 @@ def parse_obsidian_note(self, root: str, filename: str, vault_path: str) -> Note
         fm_match = re.match(r"^---\s*\n(.*?)\n---\s*\n", raw_text, re.DOTALL)
         content = raw_text[fm_match.end() :] if fm_match else raw_text
 
-        """
-        Pattern breakdown:
-        \[\[ matches [[
-        ([^\]|]+) captures the link name (one or more chars except ] or |)
-        (?:\|[^\]]+)? optionally matches |display text
-        \]\] matches ]]
-        Matches: [[note]] and [[note|display text]]
-        """
+        # Pattern: \[\[ matches [[, ([^\]|]+) captures link name,
+        # (?:\|[^\]]+)? optionally matches |display text, \]\] matches ]]
         links = re.findall(r"\[\[([^\]|]+)(?:\|[^\]]+)?\]\]", content)
         tags = re.findall(r"#([a-zA-Z0-9_\-/]+)", content)
 
@@ -247,7 +199,10 @@ def parse_obsidian_note(self, root: str, filename: str, vault_path: str) -> Note
         }
 
     async def chunking_and_embedding(self, note_data: NoteData) -> List[ChunkPayload]:
-        """Chunk note content and generate embeddings for each chunk.
+        """Chunk note content semantically and generate embeddings for each chunk.
+
+        Uses embedding-similarity-based semantic chunking to find natural topic
+        boundaries, then embeds the resulting chunks for vector storage.
 
         Args:
             note_data: Parsed note data to process.
@@ -255,9 +210,21 @@ async def chunking_and_embedding(self, note_data: NoteData) -> List[ChunkPayload
         Returns:
             List of chunk payloads with text and embedding vectors.
         """
-        text_chunks = chunk_text_with_spacy(
+        api_key = self.openai_api_key
+        base_url = self.openai_base_url
+        model = self.embedding_model
+
+        async def embed_fn(texts: List[str]) -> List[List[float]]:
+            return await generate_openai_embeddings(
+                texts, api_key=api_key, base_url=base_url, model=model
+            )
+
+        text_chunks = await semantic_chunk_text(
             note_data["content"],
-            max_tokens=self.chunk_word_limit,
+            embed_fn=embed_fn,
+            buffer_size=self.semantic_buffer_size,
+            breakpoint_percentile_threshold=self.semantic_breakpoint_percentile,
+            max_chunk_words=self.max_chunk_words,
         )
         logger.info(
             f"Processing: {note_data['path']} ({len(note_data['content'])} chars -> {len(text_chunks)} chunks)"
@@ -284,7 +251,9 @@ async def chunking_and_embedding(self, note_data: NoteData) -> List[ChunkPayload
                 model=self.embedding_model,
             )
         except Exception as e:
-            logger.exception(f"Embedding generation failed for {note_data['path']}: {e}")
+            logger.exception(
+                f"Embedding generation failed for {note_data['path']}: {e}"
+            )
             return []
 
         chunk_payloads: List[ChunkPayload] = []
@@ -296,7 +265,9 @@ async def chunking_and_embedding(self, note_data: NoteData) -> List[ChunkPayload
 
         return chunk_payloads
 
-    def ingest_note_and_chunks(self, note_data: NoteData, chunks: List[ChunkPayload]) -> None:
+    def ingest_note_and_chunks(
+        self, note_data: NoteData, chunks: List[ChunkPayload]
+    ) -> None:
         """Store note and chunks in Neo4j with relationships to folders, tags, and links.
 
         Args:
@@ -416,15 +387,15 @@ async def search_obsidian(self, query: str, limit: int = 5) -> ObsidianSearchRes
         cypher_query = """
         CALL db.index.vector.queryNodes('chunk_embeddings', $limit, $vector)
         YIELD node AS chunk, score
-        
+
         // Find the parent Note
         MATCH (note:Note)-[:HAS_CHUNK]->(chunk)
-        
+
         // Get graph context: What tags and linked files are around this note?
         OPTIONAL MATCH (note)-[:HAS_TAG]->(t:Tag)
         OPTIONAL MATCH (note)-[:LINKS_TO]->(linked:Note)
-        
-        RETURN 
+
+        RETURN
             note.name AS source,
             chunk.text AS content,
             collect(DISTINCT t.name) AS tags,
@@ -470,15 +441,3 @@ def get_obsidian_service() -> ObsidianService:
     if _obsidian_service is None:
         _obsidian_service = ObsidianService()
     return _obsidian_service
-
-
-# Backward compatibility: module-level access uses lazy initialization
-# This property-like access ensures the service is only created when first used
-class _ObsidianServiceProxy:
-    """Proxy for lazy access to obsidian_service."""
-
-    def __getattr__(self, name):
-        return getattr(get_obsidian_service(), name)
-
-
-obsidian_service = _ObsidianServiceProxy()
diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py b/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py
index 804077f4..26516443 100644
--- a/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py
+++ b/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py
@@ -28,6 +28,47 @@
 logger = logging.getLogger(__name__)
 
 
+def _get_plugin_keywords() -> list[str]:
+    """Collect ASR keyword hints from all enabled plugins.
+
+    Returns an empty list if the plugin system is not initialised yet.
+    """
+    try:
+        from advanced_omi_backend.services.plugin_service import get_plugin_router
+
+        router = get_plugin_router()
+        if router:
+            return router.get_asr_keywords()
+    except Exception:
+        pass
+    return []
+
+
+def _merge_hot_words(prompt_hot_words: str, plugin_keywords: list[str]) -> str:
+    """Merge prompt-registry hot words with plugin keywords (deduplicated)."""
+    import re
+
+    parts: list[str] = []
+    seen: set[str] = set()
+
+    # Parse prompt registry hot words first
+    if prompt_hot_words and prompt_hot_words.strip():
+        for word in re.split(r"[,\n]+", prompt_hot_words):
+            word = word.strip().lower()
+            if word and word not in seen:
+                seen.add(word)
+                parts.append(word)
+
+    # Add plugin keywords
+    for kw in plugin_keywords:
+        kw = kw.strip().lower()
+        if kw and kw not in seen:
+            seen.add(kw)
+            parts.append(kw)
+
+    return "\n".join(parts) if parts else ""
+
+
 def _parse_hot_words_to_keyterm(hot_words_str: str) -> str:
     """Convert hot words string to Deepgram keyterm format.
 
@@ -222,7 +263,8 @@ async def transcribe(
         if "diarize" in query:
             query["diarize"] = "true" if diarize else "false"
 
-        # Use caller-provided context or fall back to LangFuse prompt store
+        # Use caller-provided context or fall back to LangFuse prompt store,
+        # then merge with plugin wake words / keywords for ASR boosting.
         if context_info:
             hot_words_str = context_info
         else:
@@ -233,6 +275,8 @@ async def transcribe(
             except Exception as e:
                 logger.debug(f"Failed to fetch asr.hot_words prompt: {e}")
 
+        hot_words_str = _merge_hot_words(hot_words_str, _get_plugin_keywords())
+
         # For Deepgram: inject as keyterm query param
         if self.model.model_provider == "deepgram" and hot_words_str.strip():
             keyterm = _parse_hot_words_to_keyterm(hot_words_str)
@@ -404,17 +448,20 @@ async def start_stream(
         if diarize and "diarize" in query_dict:
             query_dict["diarize"] = "true"
 
-        # Inject hot words for streaming (Deepgram only)
-        if self.model.model_provider == "deepgram":
-            try:
-                registry = get_prompt_registry()
-                hot_words_str = await registry.get_prompt("asr.hot_words")
-                if hot_words_str and hot_words_str.strip():
-                    keyterm = _parse_hot_words_to_keyterm(hot_words_str)
-                    if keyterm:
-                        query_dict["keyterm"] = keyterm
-            except Exception as e:
-                logger.debug(f"Failed to fetch asr.hot_words for streaming: {e}")
+        # Inject hot words for streaming — merge prompt registry + plugin keywords
+        prompt_hot_words = ""
+        try:
+            registry = get_prompt_registry()
+            prompt_hot_words = await registry.get_prompt("asr.hot_words")
+        except Exception as e:
+            logger.debug(f"Failed to fetch asr.hot_words for streaming: {e}")
+
+        merged_hot_words = _merge_hot_words(prompt_hot_words, _get_plugin_keywords())
+
+        if self.model.model_provider == "deepgram" and merged_hot_words:
+            keyterm = _parse_hot_words_to_keyterm(merged_hot_words)
+            if keyterm:
+                query_dict["keyterm"] = keyterm
 
         # NOTE: PULSE/wave (smallest.ai) does NOT support keywords on WebSocket —
         # any `keywords` query param causes 0 responses or HTTP 400.
diff --git a/backends/advanced/src/advanced_omi_backend/utils/text_chunking.py b/backends/advanced/src/advanced_omi_backend/utils/text_chunking.py
new file mode 100644
index 00000000..d2f3ed6e
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/utils/text_chunking.py
@@ -0,0 +1,188 @@
+"""Semantic text chunking using embedding similarity.
+
+Splits text into semantically coherent chunks by comparing consecutive sentence
+embeddings and finding natural topic boundaries. Inspired by LlamaIndex's
+SemanticSplitterNodeParser:
+https://docs.llamaindex.ai/en/stable/examples/node_parsers/semantic_chunking/
+
+The algorithm:
+1. Split text into sentences (regex on sentence-ending punctuation)
+2. Create "buffered" versions by combining each sentence with its neighbors
+3. Batch-embed all buffered sentences in one API call
+4. Compute cosine distances between consecutive embeddings
+5. Find breakpoints where distance exceeds a percentile threshold
+6. Group sentences between breakpoints into chunks
+7. Apply a max-word safety valve to prevent oversized chunks
+"""
+
+import logging
+import re
+from typing import Awaitable, Callable, List, Optional
+
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+
+def split_sentences(text: str) -> List[str]:
+    """Split text into sentences using regex on .!? boundaries.
+
+    Handles abbreviations and decimal numbers reasonably well by requiring
+    the punctuation to be followed by whitespace and an uppercase letter or end-of-string.
+    """
+    # Split on sentence-ending punctuation followed by whitespace
+    parts = re.split(r"(?<=[.!?])\s+", text.strip())
+    return [s.strip() for s in parts if s.strip()]
+
+
+def _build_buffered_sentences(sentences: List[str], buffer_size: int = 1) -> List[str]:
+    """Combine each sentence with its neighbors for richer embedding context.
+
+    For buffer_size=1, sentence i is combined with sentences [i-1, i, i+1].
+    """
+    buffered = []
+    for i in range(len(sentences)):
+        start = max(0, i - buffer_size)
+        end = min(len(sentences), i + buffer_size + 1)
+        buffered.append(" ".join(sentences[start:end]))
+    return buffered
+
+
+def _cosine_distances(embeddings: List[List[float]]) -> List[float]:
+    """Compute cosine distances between consecutive embedding pairs.
+
+    Returns a list of length len(embeddings) - 1.
+    """
+    arr = np.array(embeddings, dtype=np.float64)
+    # Normalize rows
+    norms = np.linalg.norm(arr, axis=1, keepdims=True)
+    norms = np.where(norms == 0, 1.0, norms)
+    normed = arr / norms
+
+    # Cosine similarity between consecutive pairs, then convert to distance
+    similarities = np.sum(normed[:-1] * normed[1:], axis=1)
+    distances = 1.0 - similarities
+    return distances.tolist()
+
+
+def _find_breakpoints(distances: List[float], percentile_threshold: float) -> List[int]:
+    """Find indices where distance exceeds the given percentile.
+
+    Returns sorted list of breakpoint indices (positions in the distances list
+    where a topic transition occurs).
+    """
+    if not distances:
+        return []
+    threshold = float(np.percentile(distances, percentile_threshold))
+    return [i for i, d in enumerate(distances) if d > threshold]
+
+
+def _enforce_max_chunk_words(chunks: List[str], max_words: int) -> List[str]:
+    """Split any chunk that exceeds max_words into smaller pieces."""
+    result = []
+    for chunk in chunks:
+        words = chunk.split()
+        if len(words) <= max_words:
+            result.append(chunk)
+        else:
+            for i in range(0, len(words), max_words):
+                piece = " ".join(words[i : i + max_words])
+                if piece:
+                    result.append(piece)
+    return result
+
+
+async def semantic_chunk_text(
+    text: str,
+    embed_fn: Callable[[List[str]], Awaitable[List[List[float]]]],
+    buffer_size: int = 1,
+    breakpoint_percentile_threshold: float = 95.0,
+    max_chunk_words: int = 300,
+    sentences: Optional[List[str]] = None,
+    join_str: str = " ",
+) -> List[str]:
+    """Split text into semantically coherent chunks using embedding similarity.
+
+    Uses the approach from LlamaIndex's SemanticSplitterNodeParser
+    (https://docs.llamaindex.ai/en/stable/examples/node_parsers/semantic_chunking/)
+    to detect topic transitions via cosine distance between consecutive sentence
+    embeddings.
+
+    Args:
+        text: The text to chunk.
+        embed_fn: Async callable that takes a list of strings and returns
+            a list of embedding vectors. Keeps the chunker decoupled from
+            any specific embedding provider.
+        buffer_size: Number of neighboring sentences to include when building
+            the buffered context for each sentence's embedding.
+        breakpoint_percentile_threshold: Percentile of cosine distances above
+            which a topic transition is detected (higher = fewer breaks).
+        max_chunk_words: Maximum words per chunk. Chunks exceeding this are
+            split further as a safety valve.
+        sentences: Optional pre-split text units (e.g. dialogue turns). When
+            provided, the regex-based split_sentences() call is skipped and
+            these units are used directly as the atomic elements for embedding
+            and breakpoint detection.
+        join_str: String used to join units within a chunk. Default is ``" "``
+            (space). Use ``"\\n"`` for dialogue transcripts to keep speaker
+            labels on separate lines.
+
+    Returns:
+        List of text chunks.
+    """
+    text = text.strip()
+    if not text:
+        return []
+
+    units = sentences if sentences is not None else split_sentences(text)
+    # Filter out empty units
+    units = [u for u in units if u.strip()]
+    if len(units) <= 2:
+        return _enforce_max_chunk_words([text], max_chunk_words)
+
+    # Build buffered sentences for richer embedding context
+    buffered = _build_buffered_sentences(units, buffer_size)
+
+    # Embed all buffered sentences in one batch call
+    try:
+        embeddings = await embed_fn(buffered)
+    except Exception:
+        logger.warning(
+            "Embedding call failed during semantic chunking; returning text as single chunk",
+            exc_info=True,
+        )
+        return _enforce_max_chunk_words([text], max_chunk_words)
+
+    if not embeddings or len(embeddings) != len(units):
+        logger.warning(
+            "Unexpected embedding count (%s vs %s units); returning single chunk",
+            len(embeddings) if embeddings else 0,
+            len(units),
+        )
+        return _enforce_max_chunk_words([text], max_chunk_words)
+
+    # Compute distances and find breakpoints
+    distances = _cosine_distances(embeddings)
+    breakpoints = _find_breakpoints(distances, breakpoint_percentile_threshold)
+
+    # Group units between breakpoints
+    chunks: List[str] = []
+    start = 0
+    for bp in sorted(breakpoints):
+        # bp is the index in distances; the break is *after* unit bp
+        end = bp + 1
+        chunk = join_str.join(units[start:end])
+        if chunk.strip():
+            chunks.append(chunk.strip())
+        start = end
+
+    # Remaining units
+    if start < len(units):
+        chunk = join_str.join(units[start:])
+        if chunk.strip():
+            chunks.append(chunk.strip())
+
+    if not chunks:
+        chunks = [text]
+
+    return _enforce_max_chunk_words(chunks, max_chunk_words)
diff --git a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
index 5f7487e5..34285062 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
@@ -9,6 +9,7 @@
 import logging
 import os
 import time
+from dataclasses import dataclass
 from datetime import datetime
 from typing import Any, Dict, Optional
 
@@ -207,52 +208,83 @@ async def handle_end_of_conversation(
     }
 
 
-@async_job(redis=True, beanie=True)
-async def open_conversation_job(
+@dataclass
+class ConversationState:
+    """Mutable state tracked across the conversation monitoring loop."""
+
+    conversation_id: str = ""
+    session_id: str = ""
+    user_id: str = ""
+    client_id: str = ""
+    start_time: float = 0.0
+    last_result_count: int = 0
+    timeout_triggered: bool = False
+    close_requested_reason: Optional[str] = None
+    last_meaningful_speech_time: float = 0.0
+    last_word_count: int = 0
+    end_reason: str = "unknown"
+
+
+def _validate_segments(segments: list) -> list:
+    """Validate and filter transcription segments, correcting minor issues.
+
+    Filters out non-dict segments and segments with no text. Corrects invalid
+    timing (end <= start) by estimating duration from word count. Ensures
+    speaker field is always a non-empty string.
+    """
+    validated = []
+    for i, seg in enumerate(segments):
+        if not isinstance(seg, dict):
+            logger.warning(f"Segment {i} is not a dict: {type(seg)}")
+            continue
+
+        text = seg.get("text", "").strip()
+        if not text:
+            logger.debug(f"Segment {i} has no text, skipping")
+            continue
+
+        start = seg.get("start", 0.0)
+        end = seg.get("end", 0.0)
+        if end <= start:
+            logger.debug(
+                f"Segment {i} has invalid timing (start={start}, end={end}), correcting"
+            )
+            estimated_duration = len(text.split()) * 0.5  # ~0.5 seconds per word
+            seg["end"] = start + estimated_duration
+
+        speaker = seg.get("speaker")
+        if speaker is None or speaker == "":
+            seg["speaker"] = "SPEAKER_00"
+        elif isinstance(speaker, (int, float)):
+            seg["speaker"] = f"Speaker {int(speaker)}"
+
+        validated.append(seg)
+
+    logger.info(f"Validated {len(validated)}/{len(segments)} segments")
+    return validated
+
+
+async def _initialize_conversation(
     session_id: str,
     user_id: str,
     client_id: str,
-    speech_detected_at: float,
-    speech_job_id: str = None,
-    *,
-    redis_client=None,
-) -> Dict[str, Any]:
-    """
-    Long-running RQ job that creates and continuously updates conversation with transcription results.
+    speech_job_id: str,
+    current_job,
+    redis_client,
+) -> str:
+    """Create or reuse a conversation for this session.
 
-    Creates conversation when speech is detected, then monitors and updates until session ends.
-
-    Args:
-        session_id: Stream session ID
-        user_id: User ID
-        client_id: Client ID
-        speech_detected_at: Timestamp when speech was first detected
-        speech_job_id: Optional speech detection job ID to update with conversation_id
-        redis_client: Redis client (injected by decorator)
+    Checks for an existing placeholder conversation in Redis. If found and valid,
+    reuses it. Otherwise creates a new conversation. Attaches session markers,
+    links job metadata, and signals audio persistence to rotate files.
 
     Returns:
-        Dict with conversation_id, final_result_count, runtime_seconds
-
-    Note: user_email is fetched from the database when needed.
+        conversation_id of the created/reused conversation.
     """
-    from rq import get_current_job
-
     from advanced_omi_backend.models.conversation import (
         Conversation,
         create_conversation,
     )
-    from advanced_omi_backend.services.audio_stream import (
-        TranscriptionResultsAggregator,
-    )
-
-    logger.info(
-        f"📝 Creating and opening conversation for session {session_id} (speech detected at {speech_detected_at})"
-    )
-
-    # Get current job for meta storage
-    current_job = get_current_job()
-    current_job.meta = {}
-    current_job.save_meta()
 
     # Check if a placeholder conversation already exists for this session
     conversation_key = f"conversation:current:{session_id}"
@@ -388,17 +420,34 @@ async def open_conversation_job(
         f"🔄 Signaled audio persistence to rotate file for conversation {conversation_id[:12]}"
     )
 
-    # Use redis_client parameter
-    aggregator = TranscriptionResultsAggregator(redis_client)
+    return conversation_id
 
-    # Job control
-    session_key = f"audio:session:{session_id}"
+
+async def _monitor_conversation_loop(
+    state: ConversationState,
+    aggregator,
+    current_job,
+    redis_client,
+) -> None:
+    """Poll transcription results and track conversation activity until exit.
+
+    Runs the main monitoring loop that:
+    - Detects zombie jobs (job hash missing from Redis)
+    - Handles session finalize signals (disconnect, user stop)
+    - Handles conversation close requests (API, plugin, button)
+    - Polls the transcription aggregator for new results
+    - Validates segments and tracks speech activity
+    - Detects inactivity timeout and closes conversation
+    - Dispatches transcript.streaming plugin events
+
+    Mutates ``state`` in place with final values for timeout_triggered,
+    close_requested_reason, last_result_count, and last_word_count.
+    """
+    session_key = f"audio:session:{state.session_id}"
     max_runtime = (
         10740  # 3 hours - 60 seconds (single conversations shouldn't exceed 3 hours)
     )
-    start_time = time.time()
 
-    last_result_count = 0
     finalize_received = False
 
     # Inactivity timeout configuration
@@ -406,17 +455,9 @@ async def open_conversation_job(
         os.getenv("SPEECH_INACTIVITY_THRESHOLD_SECONDS", "60")
     )
     inactivity_timeout_minutes = inactivity_timeout_seconds / 60
-    last_meaningful_speech_time = (
-        0.0  # Initialize with audio time 0 (will be updated with first speech)
-    )
-    timeout_triggered = False  # Track if closure was due to timeout
-    close_requested_reason = (
-        None  # Track if closure was requested via API/plugin/button
-    )
     last_inactivity_log_time = (
         time.time()
     )  # Track when we last logged inactivity (wall-clock for logging)
-    last_word_count = 0  # Track word count to detect actual new speech
 
     # Test mode: wait for audio queue to drain before timing out
     # In real usage, ambient noise keeps connection alive. In tests, chunks arrive in bursts.
@@ -434,7 +475,7 @@ async def open_conversation_job(
         # Check if job still exists in Redis (detect zombie state)
         from advanced_omi_backend.utils.job_utils import check_job_alive
 
-        if not await check_job_alive(redis_client, current_job, session_id):
+        if not await check_job_alive(redis_client, current_job, state.session_id):
             break
 
         # Check if session is finalizing (set by producer when recording stops)
@@ -455,10 +496,12 @@ async def open_conversation_job(
 
                 if completion_reason_str == "websocket_disconnect":
                     logger.warning(
-                        f"🔌 WebSocket disconnected for session {session_id[:12]} - "
+                        f"🔌 WebSocket disconnected for session {state.session_id[:12]} - "
                         f"ending conversation early"
                     )
-                    timeout_triggered = False  # This is a disconnect, not a timeout
+                    state.timeout_triggered = (
+                        False  # This is a disconnect, not a timeout
+                    )
                 else:
                     logger.info(
                         f"🛑 Session finalizing (reason: {completion_reason_str}), "
@@ -473,92 +516,58 @@ async def open_conversation_job(
             )
             if close_reason:
                 await redis_client.hdel(session_key, "conversation_close_requested")
-                close_requested_reason = (
+                state.close_requested_reason = (
                     close_reason.decode()
                     if isinstance(close_reason, bytes)
                     else close_reason
                 )
                 logger.info(
-                    f"🔒 Conversation close requested: {close_requested_reason}"
+                    f"🔒 Conversation close requested: {state.close_requested_reason}"
                 )
-                timeout_triggered = True  # Session stays active (same restart behavior as inactivity timeout)
+                state.timeout_triggered = True  # Session stays active (same restart behavior as inactivity timeout)
                 finalize_received = True
                 break
 
         # Check max runtime timeout
-        if time.time() - start_time > max_runtime:
-            logger.warning(f"⏱️ Max runtime reached for {conversation_id}")
+        if time.time() - state.start_time > max_runtime:
+            logger.warning(f"⏱️ Max runtime reached for {state.conversation_id}")
             break
 
         # Get combined results from aggregator
-        combined = await aggregator.get_combined_results(session_id)
+        combined = await aggregator.get_combined_results(state.session_id)
         current_count = combined["chunk_count"]
 
         # Analyze speech content using detailed analysis
-
         transcript_data = {"text": combined["text"], "words": combined.get("words", [])}
         speech_analysis = analyze_speech(transcript_data)
 
         # Extract speaker information from segments
         segments = combined.get("segments", [])
 
-        # FIX: Validate and filter segments before processing
-        validated_segments = []
-        for i, seg in enumerate(segments):
-            # Check if segment is a dict
-            if not isinstance(seg, dict):
-                logger.warning(f"Segment {i} is not a dict: {type(seg)}")
-                continue
-
-            # Check for required text field
-            text = seg.get("text", "").strip()
-            if not text:
-                logger.debug(f"Segment {i} has no text, skipping")
-                continue
-
-            # Check for reasonable timing
-            start = seg.get("start", 0.0)
-            end = seg.get("end", 0.0)
-            if end <= start:
-                logger.debug(
-                    f"Segment {i} has invalid timing (start={start}, end={end}), correcting"
-                )
-                # Auto-correct: estimate duration from text length
-                estimated_duration = len(text.split()) * 0.5  # ~0.5 seconds per word
-                seg["end"] = start + estimated_duration
-
-            # Ensure speaker field exists and is a string
-            speaker = seg.get("speaker")
-            if speaker is None or speaker == "":
-                seg["speaker"] = "SPEAKER_00"
-            elif isinstance(speaker, (int, float)):
-                seg["speaker"] = f"Speaker {int(speaker)}"
-
-            validated_segments.append(seg)
-
-        logger.info(f"Validated {len(validated_segments)}/{len(segments)} segments")
+        # Validate and filter segments before processing
+        validated_segments = _validate_segments(segments)
         speakers = extract_speakers_from_segments(validated_segments)
 
         # Track new speech activity (word count based)
-        new_speech_time, last_word_count = await track_speech_activity(
+        new_speech_time, state.last_word_count = await track_speech_activity(
             speech_analysis=speech_analysis,
-            last_word_count=last_word_count,
-            conversation_id=conversation_id,
+            last_word_count=state.last_word_count,
+            conversation_id=state.conversation_id,
             redis_client=redis_client,
         )
         if new_speech_time:
-            last_meaningful_speech_time = new_speech_time
+            state.last_meaningful_speech_time = new_speech_time
 
         # Update job metadata with current progress
         await update_job_progress_metadata(
             current_job=current_job,
-            conversation_id=conversation_id,
-            session_id=session_id,
-            client_id=client_id,
+            conversation_id=state.conversation_id,
+            session_id=state.session_id,
+            client_id=state.client_id,
             combined=combined,
             speech_analysis=speech_analysis,
             speakers=speakers,
-            last_meaningful_speech_time=last_meaningful_speech_time,
+            last_meaningful_speech_time=state.last_meaningful_speech_time,
         )
 
         # Check inactivity timeout using audio time (not wall-clock time)
@@ -567,8 +576,8 @@ async def open_conversation_job(
 
         # Calculate inactivity based on audio timestamps
         # Only check if we have valid audio timing data
-        if current_audio_time > 0 and last_meaningful_speech_time > 0:
-            inactivity_duration = current_audio_time - last_meaningful_speech_time
+        if current_audio_time > 0 and state.last_meaningful_speech_time > 0:
+            inactivity_duration = current_audio_time - state.last_meaningful_speech_time
         else:
             # Fallback: No audio timestamps available (text-only transcription)
             # Can't reliably detect inactivity, so skip timeout check this iteration
@@ -591,7 +600,7 @@ async def open_conversation_job(
             # In test mode, check if there are pending chunks before timing out
             if wait_for_queue_drain:
                 # Check audio persistence queue length
-                persist_queue_key = f"audio:queue:{session_id}"
+                persist_queue_key = f"audio:queue:{state.session_id}"
                 queue_length = await redis_client.llen(persist_queue_key)
 
                 if queue_length > 0:
@@ -603,24 +612,24 @@ async def open_conversation_job(
                     continue
 
             logger.info(
-                f"🕐 Conversation {conversation_id} inactive for "
+                f"🕐 Conversation {state.conversation_id} inactive for "
                 f"{inactivity_duration/60:.1f} minutes (threshold: {inactivity_timeout_minutes} min), "
                 f"auto-closing conversation (session remains active for next conversation)..."
             )
             # DON'T set session to finalizing - just close this conversation
             # Session remains "active" so new conversations can be created
             # Only user manual stop or WebSocket disconnect should finalize the session
-            timeout_triggered = True
+            state.timeout_triggered = True
             finalize_received = True
             break
 
         # Track results progress (conversation will get transcript from transcription job)
-        if current_count > last_result_count:
+        if current_count > state.last_result_count:
             logger.info(
-                f"📊 Conversation {conversation_id} progress: "
+                f"📊 Conversation {state.conversation_id} progress: "
                 f"{current_count} results, {len(combined['text'])} chars, {len(validated_segments)} segments"
             )
-            last_result_count = current_count
+            state.last_result_count = current_count
 
             # Trigger transcript-level plugins on new transcript segments
             try:
@@ -632,22 +641,22 @@ async def open_conversation_job(
                     if transcript_text:
                         plugin_data = {
                             "transcript": transcript_text,
-                            "segment_id": f"{session_id}_{current_count}",
-                            "conversation_id": conversation_id,
+                            "segment_id": f"{state.session_id}_{current_count}",
+                            "conversation_id": state.conversation_id,
                             "segments": validated_segments,
                             "word_count": speech_analysis.get("word_count", 0),
                         }
 
                         logger.info(
                             f"🔌 DISPATCH: transcript.streaming event "
-                            f"(conversation={conversation_id[:12]}, segment_id={session_id}_{current_count})"
+                            f"(conversation={state.conversation_id[:12]}, segment_id={state.session_id}_{current_count})"
                         )
 
                         plugin_results = await plugin_router.dispatch_event(
                             event=PluginEvent.TRANSCRIPT_STREAMING,
-                            user_id=user_id,
+                            user_id=state.user_id,
                             data=plugin_data,
-                            metadata={"client_id": client_id},
+                            metadata={"client_id": state.client_id},
                         )
 
                         logger.info(
@@ -671,85 +680,363 @@ async def open_conversation_job(
 
         await asyncio.sleep(1)  # Check every second for responsiveness
 
+
+async def _save_streaming_transcript(
+    session_id: str,
+    conversation_id: str,
+    aggregator,
+) -> str:
+    """Retrieve final streaming transcript and save it to the conversation document.
+
+    Gets the combined transcription results from the aggregator, converts them
+    to Word and SpeakerSegment model objects, creates a transcript version, and
+    saves to MongoDB.
+
+    Returns:
+        version_id of the saved transcript version.
+    """
+    from advanced_omi_backend.models.conversation import Conversation
+
+    logger.info(
+        f"📝 Retrieving final streaming transcript for conversation {conversation_id[:12]}"
+    )
+    final_transcript = await aggregator.get_combined_results(session_id)
+
+    # Fetch conversation from database to ensure we have latest state
+    conversation = await Conversation.find_one(
+        Conversation.conversation_id == conversation_id
+    )
+    if not conversation:
+        logger.error(f"❌ Conversation {conversation_id} not found in database")
+        raise ValueError(f"Conversation {conversation_id} not found")
+
+    # Create transcript version from streaming results
+    version_id = f"streaming_{session_id[:12]}"
+    transcript_text = final_transcript.get("text", "")
+    words_data = final_transcript.get("words", [])  # All words from aggregator
+
+    # Convert words to Word objects (including per-word speaker labels if present)
+    words = [
+        Conversation.Word(
+            word=w.get("word", ""),
+            start=w.get("start", 0.0),
+            end=w.get("end", 0.0),
+            confidence=w.get("confidence"),
+            speaker=w.get("speaker"),
+            speaker_confidence=w.get("speaker_confidence"),
+        )
+        for w in words_data
+    ]
+
+    # Use provider-supplied segments if available (from streaming diarization),
+    # otherwise leave empty for speaker recognition service to fill later.
+    segments_data = final_transcript.get("segments", [])
+    if segments_data:
+        segments = [
+            Conversation.SpeakerSegment(
+                start=s.get("start", 0.0),
+                end=s.get("end", 0.0),
+                text=s.get("text", ""),
+                speaker=str(s.get("speaker", "Unknown")),
+                words=[
+                    Conversation.Word(
+                        word=sw.get("word", ""),
+                        start=sw.get("start", 0.0),
+                        end=sw.get("end", 0.0),
+                        confidence=sw.get("confidence"),
+                        speaker=sw.get("speaker"),
+                        speaker_confidence=sw.get("speaker_confidence"),
+                    )
+                    for sw in s.get("words", [])
+                ],
+            )
+            for s in segments_data
+        ]
+    else:
+        segments = []
+
+    # Determine provider from streaming results
+    provider = final_transcript.get("provider", "deepgram")
+
+    # Determine diarization source if provider supplied segments
+    diarization_source = "provider" if segments else None
+
+    # Add streaming transcript with words at version level
+    version = conversation.add_transcript_version(
+        version_id=version_id,
+        transcript=transcript_text,
+        words=words,  # Store at version level
+        segments=segments,  # Provider segments or empty (filled by speaker service later)
+        provider=provider,
+        model=provider,  # Provider name as model
+        processing_time_seconds=None,  # Not applicable for streaming
+        metadata={
+            "source": "streaming",
+            "chunk_count": final_transcript.get("chunk_count", 0),
+            "word_count": len(words),
+            "provider_capabilities": {"diarization": bool(segments)},
+        },
+        set_as_active=True,
+    )
+    version.diarization_source = diarization_source
+
+    # Update placeholder conversation if it exists
+    if (
+        getattr(conversation, "always_persist", False)
+        and getattr(conversation, "processing_status", None) == "pending_transcription"
+    ):
+        # Keep placeholder status - will be updated by title_summary_job
+        logger.info(
+            f"📝 Placeholder conversation {conversation_id} has transcript, "
+            f"waiting for title/summary generation"
+        )
+
+    # Save conversation with streaming transcript
+    await conversation.save()
+    segment_info = (
+        f"{len(segments)} provider segments (diarization_source={diarization_source})"
+        if segments
+        else "0 segments (pending speaker recognition)"
+    )
+    logger.info(
+        f"✅ Saved streaming transcript: {len(transcript_text)} chars, "
+        f"{segment_info}, {len(words)} words "
+        f"for conversation {conversation_id[:12]}"
+    )
+
+    return version_id
+
+
+async def _enqueue_post_processing(
+    conversation_id: str,
+    user_id: str,
+    client_id: str,
+    version_id: str,
+    end_reason: str,
+) -> None:
+    """Enqueue post-conversation processing jobs (speaker, memory, title, events).
+
+    Checks configuration for always_batch_retranscribe. If enabled, enqueues
+    a batch transcription job first with post-processing depending on it.
+    Otherwise starts post-processing immediately with the streaming transcript.
+    """
+    from advanced_omi_backend.config_loader import get_backend_config
+
+    transcription_cfg = get_backend_config("transcription")
+    batch_retranscribe = False
+    if transcription_cfg:
+        from omegaconf import OmegaConf
+
+        cfg_dict = OmegaConf.to_container(transcription_cfg, resolve=True)
+        batch_retranscribe = cfg_dict.get("always_batch_retranscribe", False)
+
+    if batch_retranscribe:
+        # BATCH PATH: Streaming transcript saved as preview — user sees it immediately
+        # Full post-processing (speaker, memory, title) waits for batch transcript
+        from advanced_omi_backend.config import get_transcription_job_timeout
+        from advanced_omi_backend.controllers.queue_controller import (
+            JOB_RESULT_TTL,
+            transcription_queue,
+        )
+        from advanced_omi_backend.workers.transcription_jobs import (
+            transcribe_full_audio_job,
+        )
+
+        batch_version_id = f"batch_{conversation_id[:12]}"
+        batch_job = transcription_queue.enqueue(
+            transcribe_full_audio_job,
+            conversation_id,
+            batch_version_id,
+            "always_batch_retranscribe",
+            job_timeout=get_transcription_job_timeout(),
+            result_ttl=JOB_RESULT_TTL,
+            job_id=f"batch_retranscribe_{conversation_id[:12]}",
+            description=f"Batch re-transcription for {conversation_id[:8]}",
+            meta={"conversation_id": conversation_id, "client_id": client_id},
+        )
+
+        logger.info(
+            f"🔄 Batch re-transcribe enabled: enqueued batch job {batch_job.id} "
+            f"(streaming transcript is preview only)"
+        )
+
+        # Run post-processing ONLY after batch completes
+        job_ids = start_post_conversation_jobs(
+            conversation_id=conversation_id,
+            user_id=user_id,
+            transcript_version_id=batch_version_id,
+            depends_on_job=batch_job,
+            client_id=client_id,
+            end_reason=end_reason,
+        )
+
+        logger.info(
+            f"📥 Pipeline: batch_retranscribe({batch_job.id}) → "
+            f"speaker({job_ids['speaker_recognition']}) → "
+            f"[memory({job_ids['memory']}) + title({job_ids['title_summary']})] → "
+            f"event({job_ids['event_dispatch']})"
+        )
+    else:
+        # NORMAL PATH: Process streaming transcript immediately (existing behavior)
+        job_ids = start_post_conversation_jobs(
+            conversation_id=conversation_id,
+            user_id=user_id,
+            transcript_version_id=version_id,  # Pass the streaming transcript version ID
+            depends_on_job=None,  # No dependency - streaming already succeeded
+            client_id=client_id,  # Pass client_id for UI tracking
+            end_reason=end_reason,  # Pass the determined end_reason (websocket_disconnect, inactivity_timeout, etc.)
+        )
+
+        logger.info(
+            f"📥 Pipeline: speaker({job_ids['speaker_recognition']}) → "
+            f"[memory({job_ids['memory']}) + title({job_ids['title_summary']})] → "
+            f"event({job_ids['event_dispatch']})"
+        )
+
+    # Wait a moment to ensure jobs are registered in RQ
+    await asyncio.sleep(0.5)
+
+    logger.info(
+        f"✅ Post-conversation pipeline started with event dispatch job (end_reason={end_reason})"
+    )
+
+
+@async_job(redis=True, beanie=True)
+async def open_conversation_job(
+    session_id: str,
+    user_id: str,
+    client_id: str,
+    speech_detected_at: float,
+    speech_job_id: str = None,
+    *,
+    redis_client=None,
+) -> Dict[str, Any]:
+    """
+    Long-running RQ job that creates and continuously updates conversation with transcription results.
+
+    Creates conversation when speech is detected, then monitors and updates until session ends.
+
+    Args:
+        session_id: Stream session ID
+        user_id: User ID
+        client_id: Client ID
+        speech_detected_at: Timestamp when speech was first detected
+        speech_job_id: Optional speech detection job ID to update with conversation_id
+        redis_client: Redis client (injected by decorator)
+
+    Returns:
+        Dict with conversation_id, final_result_count, runtime_seconds
+
+    Note: user_email is fetched from the database when needed.
+    """
+    from rq import get_current_job
+
+    from advanced_omi_backend.services.audio_stream import (
+        TranscriptionResultsAggregator,
+    )
+
+    logger.info(
+        f"📝 Creating and opening conversation for session {session_id} (speech detected at {speech_detected_at})"
+    )
+
+    # Phase 1: Initialize job and conversation
+    current_job = get_current_job()
+    current_job.meta = {}
+    current_job.save_meta()
+
+    conversation_id = await _initialize_conversation(
+        session_id=session_id,
+        user_id=user_id,
+        client_id=client_id,
+        speech_job_id=speech_job_id,
+        current_job=current_job,
+        redis_client=redis_client,
+    )
+
+    # Phase 2: Monitor conversation (polling loop)
+    aggregator = TranscriptionResultsAggregator(redis_client)
+    state = ConversationState(
+        conversation_id=conversation_id,
+        session_id=session_id,
+        user_id=user_id,
+        client_id=client_id,
+        start_time=time.time(),
+    )
+
+    await _monitor_conversation_loop(state, aggregator, current_job, redis_client)
+
     logger.info(
         f"✅ Conversation {conversation_id} updates complete, checking for meaningful speech..."
     )
 
-    # Determine end reason based on how we exited the loop
-    # Check session completion_reason from Redis (set atomically with status by finalize_session)
+    # Phase 3: Determine end reason
+    session_key = f"audio:session:{session_id}"
     completion_reason = await redis_client.hget(session_key, "completion_reason")
     completion_reason_str = completion_reason.decode() if completion_reason else None
 
-    # Determine end_reason with proper precedence:
-    # 1. completion_reason from Redis (set by WebSocket controller: websocket_disconnect, user_stopped)
-    # 2. close_requested (via API, plugin, or button press)
-    # 3. inactivity_timeout (no speech for SPEECH_INACTIVITY_THRESHOLD_SECONDS)
-    # 4. max_duration (conversation exceeded max runtime)
-    # 5. user_stopped (fallback for any other exit condition)
     if completion_reason_str:
-        end_reason = completion_reason_str
-        logger.info(f"📊 Using completion_reason from session: {end_reason}")
-    elif close_requested_reason:
-        end_reason = "close_requested"
-        logger.info(f"📊 Conversation closed by request: {close_requested_reason}")
-    elif timeout_triggered:
-        end_reason = "inactivity_timeout"
-    elif time.time() - start_time > max_runtime:
-        end_reason = "max_duration"
+        state.end_reason = completion_reason_str
+        logger.info(f"📊 Using completion_reason from session: {state.end_reason}")
+    elif state.close_requested_reason:
+        state.end_reason = "close_requested"
+        logger.info(
+            f"📊 Conversation closed by request: {state.close_requested_reason}"
+        )
+    elif state.timeout_triggered:
+        state.end_reason = "inactivity_timeout"
+    elif time.time() - state.start_time > 10740:
+        state.end_reason = "max_duration"
     else:
-        end_reason = "user_stopped"
+        state.end_reason = "user_stopped"
 
     logger.info(
-        f"📊 Conversation {conversation_id[:12]} end_reason determined: {end_reason}"
+        f"📊 Conversation {conversation_id[:12]} end_reason determined: {state.end_reason}"
     )
 
-    # Wrap all post-processing in try/finally to guarantee handle_end_of_conversation()
-    # is always called, even if an exception occurs during transcript saving, job
-    # enqueuing, etc. Without this, any failure leaves the session in a zombie state
-    # where the WebSocket is open but no new conversation can ever start.
+    # Phase 4-7: Post-processing (wrapped in try/finally for guaranteed cleanup)
     end_of_conversation_handled = False
     try:
-        # FINAL VALIDATION: Check if conversation has meaningful speech before post-processing
-        # This prevents empty/noise-only conversations from being processed and saved
-        # NOTE: Speech was already validated during streaming, so we skip this check
-        # to avoid false negatives from aggregated results lacking proper word-level data
         logger.info(
             "✅ Conversation has meaningful speech (validated during streaming), proceeding with post-processing"
         )
 
-        # Wait for streaming transcription consumer to complete before reading transcript
-        # This fixes the race condition where conversation job reads transcript before
-        # streaming consumer stores all final results (seen as 24+ second delay in logs)
-        completion_key = f"transcription:complete:{session_id}"
-        max_wait_streaming = 30  # seconds
-        waited_streaming = 0.0
-        while waited_streaming < max_wait_streaming:
-            completion_status = await redis_client.get(completion_key)
-            if completion_status:
-                status_str = (
-                    completion_status.decode()
-                    if isinstance(completion_status, bytes)
-                    else completion_status
-                )
-                if status_str == "error":
-                    logger.warning(
-                        f"⚠️ Streaming transcription ended with error for {session_id}, proceeding anyway"
-                    )
-                else:
-                    logger.info(
-                        f"✅ Streaming transcription confirmed complete for {session_id}"
-                    )
-                break
-            await asyncio.sleep(0.5)
-            waited_streaming += 0.5
-
-        if waited_streaming >= max_wait_streaming:
-            logger.warning(
-                f"⚠️ Timed out waiting for streaming completion signal for {session_id} "
-                f"(waited {max_wait_streaming}s), proceeding with available transcript"
+        # Phase 4: Wait for streaming transcription to complete
+        if state.close_requested_reason:
+            logger.info(
+                f"⏩ Skipping transcription:complete wait for close_requested "
+                f"(reason={state.close_requested_reason})"
             )
+        else:
+            completion_key = f"transcription:complete:{session_id}"
+            max_wait_streaming = 30  # seconds
+            waited_streaming = 0.0
+            while waited_streaming < max_wait_streaming:
+                completion_status = await redis_client.get(completion_key)
+                if completion_status:
+                    status_str = (
+                        completion_status.decode()
+                        if isinstance(completion_status, bytes)
+                        else completion_status
+                    )
+                    if status_str == "error":
+                        logger.warning(
+                            f"⚠️ Streaming transcription ended with error for {session_id}, proceeding anyway"
+                        )
+                    else:
+                        logger.info(
+                            f"✅ Streaming transcription confirmed complete for {session_id}"
+                        )
+                    break
+                await asyncio.sleep(0.5)
+                waited_streaming += 0.5
+
+            if waited_streaming >= max_wait_streaming:
+                logger.warning(
+                    f"⚠️ Timed out waiting for streaming completion signal for {session_id} "
+                    f"(waited {max_wait_streaming}s), proceeding with available transcript"
+                )
 
-        # Wait for audio_streaming_persistence_job to complete and write MongoDB chunks
+        # Phase 5: Wait for audio chunks in MongoDB
         from advanced_omi_backend.utils.audio_chunk_utils import wait_for_audio_chunks
 
         chunks_ready = await wait_for_audio_chunks(
@@ -757,235 +1044,55 @@ async def open_conversation_job(
         )
 
         if not chunks_ready:
-            # Mark conversation as deleted - has speech but no audio chunks to process
             await mark_conversation_deleted(
                 conversation_id=conversation_id,
                 deletion_reason="audio_chunks_not_ready",
             )
-
-            # Call shared cleanup/restart logic before returning
             end_of_conversation_handled = True
             return await handle_end_of_conversation(
                 session_id=session_id,
                 conversation_id=conversation_id,
                 client_id=client_id,
                 user_id=user_id,
-                start_time=start_time,
-                last_result_count=last_result_count,
-                timeout_triggered=timeout_triggered,
+                start_time=state.start_time,
+                last_result_count=state.last_result_count,
+                timeout_triggered=state.timeout_triggered,
                 redis_client=redis_client,
-                end_reason=end_reason,
+                end_reason=state.end_reason,
             )
 
         logger.info(
             f"📦 MongoDB audio chunks ready for conversation {conversation_id[:12]}"
         )
 
-        # Get final streaming transcript and save to conversation
-        logger.info(
-            f"📝 Retrieving final streaming transcript for conversation {conversation_id[:12]}"
+        # Phase 6: Save streaming transcript
+        version_id = await _save_streaming_transcript(
+            session_id=session_id,
+            conversation_id=conversation_id,
+            aggregator=aggregator,
         )
-        final_transcript = await aggregator.get_combined_results(session_id)
 
-        # Fetch conversation from database to ensure we have latest state
-        conversation = await Conversation.find_one(
-            Conversation.conversation_id == conversation_id
-        )
-        if not conversation:
-            logger.error(f"❌ Conversation {conversation_id} not found in database")
-            raise ValueError(f"Conversation {conversation_id} not found")
-
-        # Create transcript version from streaming results
-        version_id = f"streaming_{session_id[:12]}"
-        transcript_text = final_transcript.get("text", "")
-        words_data = final_transcript.get("words", [])  # All words from aggregator
-
-        # Convert words to Word objects (including per-word speaker labels if present)
-        words = [
-            Conversation.Word(
-                word=w.get("word", ""),
-                start=w.get("start", 0.0),
-                end=w.get("end", 0.0),
-                confidence=w.get("confidence"),
-                speaker=w.get("speaker"),
-                speaker_confidence=w.get("speaker_confidence"),
-            )
-            for w in words_data
-        ]
-
-        # Use provider-supplied segments if available (from streaming diarization),
-        # otherwise leave empty for speaker recognition service to fill later.
-        segments_data = final_transcript.get("segments", [])
-        if segments_data:
-            segments = [
-                Conversation.SpeakerSegment(
-                    start=s.get("start", 0.0),
-                    end=s.get("end", 0.0),
-                    text=s.get("text", ""),
-                    speaker=str(s.get("speaker", "Unknown")),
-                    words=[
-                        Conversation.Word(
-                            word=sw.get("word", ""),
-                            start=sw.get("start", 0.0),
-                            end=sw.get("end", 0.0),
-                            confidence=sw.get("confidence"),
-                            speaker=sw.get("speaker"),
-                            speaker_confidence=sw.get("speaker_confidence"),
-                        )
-                        for sw in s.get("words", [])
-                    ],
-                )
-                for s in segments_data
-            ]
-        else:
-            segments = []
-
-        # Determine provider from streaming results
-        provider = final_transcript.get("provider", "deepgram")
-
-        # Determine diarization source if provider supplied segments
-        diarization_source = "provider" if segments else None
-
-        # Add streaming transcript with words at version level
-        version = conversation.add_transcript_version(
+        # Phase 7: Enqueue post-processing pipeline
+        await _enqueue_post_processing(
+            conversation_id=conversation_id,
+            user_id=user_id,
+            client_id=client_id,
             version_id=version_id,
-            transcript=transcript_text,
-            words=words,  # Store at version level
-            segments=segments,  # Provider segments or empty (filled by speaker service later)
-            provider=provider,
-            model=provider,  # Provider name as model
-            processing_time_seconds=None,  # Not applicable for streaming
-            metadata={
-                "source": "streaming",
-                "chunk_count": final_transcript.get("chunk_count", 0),
-                "word_count": len(words),
-                "provider_capabilities": {"diarization": bool(segments)},
-            },
-            set_as_active=True,
-        )
-        version.diarization_source = diarization_source
-
-        # Update placeholder conversation if it exists
-        if (
-            getattr(conversation, "always_persist", False)
-            and getattr(conversation, "processing_status", None)
-            == "pending_transcription"
-        ):
-            # Keep placeholder status - will be updated by title_summary_job
-            logger.info(
-                f"📝 Placeholder conversation {conversation_id} has transcript, "
-                f"waiting for title/summary generation"
-            )
-
-        # Save conversation with streaming transcript
-        await conversation.save()
-        segment_info = (
-            f"{len(segments)} provider segments (diarization_source={diarization_source})"
-            if segments
-            else "0 segments (pending speaker recognition)"
-        )
-        logger.info(
-            f"✅ Saved streaming transcript: {len(transcript_text)} chars, "
-            f"{segment_info}, {len(words)} words "
-            f"for conversation {conversation_id[:12]}"
-        )
-
-        # Enqueue post-conversation processing pipeline
-        client_id = conversation.client_id if conversation else None
-
-        # Check if always_batch_retranscribe is enabled
-        from advanced_omi_backend.config_loader import get_backend_config
-
-        transcription_cfg = get_backend_config("transcription")
-        batch_retranscribe = False
-        if transcription_cfg:
-            from omegaconf import OmegaConf
-
-            cfg_dict = OmegaConf.to_container(transcription_cfg, resolve=True)
-            batch_retranscribe = cfg_dict.get("always_batch_retranscribe", False)
-
-        if batch_retranscribe:
-            # BATCH PATH: Streaming transcript saved as preview — user sees it immediately
-            # Full post-processing (speaker, memory, title) waits for batch transcript
-            from advanced_omi_backend.config import get_transcription_job_timeout
-            from advanced_omi_backend.controllers.queue_controller import (
-                JOB_RESULT_TTL,
-                transcription_queue,
-            )
-            from advanced_omi_backend.workers.transcription_jobs import (
-                transcribe_full_audio_job,
-            )
-
-            batch_version_id = f"batch_{conversation_id[:12]}"
-            batch_job = transcription_queue.enqueue(
-                transcribe_full_audio_job,
-                conversation_id,
-                batch_version_id,
-                "always_batch_retranscribe",
-                job_timeout=get_transcription_job_timeout(),
-                result_ttl=JOB_RESULT_TTL,
-                job_id=f"batch_retranscribe_{conversation_id[:12]}",
-                description=f"Batch re-transcription for {conversation_id[:8]}",
-                meta={"conversation_id": conversation_id, "client_id": client_id},
-            )
-
-            logger.info(
-                f"🔄 Batch re-transcribe enabled: enqueued batch job {batch_job.id} "
-                f"(streaming transcript is preview only)"
-            )
-
-            # Run post-processing ONLY after batch completes
-            job_ids = start_post_conversation_jobs(
-                conversation_id=conversation_id,
-                user_id=user_id,
-                transcript_version_id=batch_version_id,
-                depends_on_job=batch_job,
-                client_id=client_id,
-                end_reason=end_reason,
-            )
-
-            logger.info(
-                f"📥 Pipeline: batch_retranscribe({batch_job.id}) → "
-                f"speaker({job_ids['speaker_recognition']}) → "
-                f"[memory({job_ids['memory']}) + title({job_ids['title_summary']})] → "
-                f"event({job_ids['event_dispatch']})"
-            )
-        else:
-            # NORMAL PATH: Process streaming transcript immediately (existing behavior)
-            job_ids = start_post_conversation_jobs(
-                conversation_id=conversation_id,
-                user_id=user_id,
-                transcript_version_id=version_id,  # Pass the streaming transcript version ID
-                depends_on_job=None,  # No dependency - streaming already succeeded
-                client_id=client_id,  # Pass client_id for UI tracking
-                end_reason=end_reason,  # Pass the determined end_reason (websocket_disconnect, inactivity_timeout, etc.)
-            )
-
-            logger.info(
-                f"📥 Pipeline: speaker({job_ids['speaker_recognition']}) → "
-                f"[memory({job_ids['memory']}) + title({job_ids['title_summary']})] → "
-                f"event({job_ids['event_dispatch']})"
-            )
-
-        # Wait a moment to ensure jobs are registered in RQ
-        await asyncio.sleep(0.5)
-
-        logger.info(
-            f"✅ Post-conversation pipeline started with event dispatch job (end_reason={end_reason})"
+            end_reason=state.end_reason,
         )
 
-        # Call shared cleanup/restart logic
+        # Cleanup and session restart
         end_of_conversation_handled = True
         return await handle_end_of_conversation(
             session_id=session_id,
             conversation_id=conversation_id,
             client_id=client_id,
             user_id=user_id,
-            start_time=start_time,
-            last_result_count=last_result_count,
-            timeout_triggered=timeout_triggered,
+            start_time=state.start_time,
+            last_result_count=state.last_result_count,
+            timeout_triggered=state.timeout_triggered,
             redis_client=redis_client,
-            end_reason=end_reason,
+            end_reason=state.end_reason,
         )
     finally:
         if not end_of_conversation_handled:
@@ -999,9 +1106,9 @@ async def open_conversation_job(
                     conversation_id=conversation_id,
                     client_id=client_id,
                     user_id=user_id,
-                    start_time=start_time,
-                    last_result_count=last_result_count,
-                    timeout_triggered=timeout_triggered,
+                    start_time=state.start_time,
+                    last_result_count=state.last_result_count,
+                    timeout_triggered=state.timeout_triggered,
                     redis_client=redis_client,
                     end_reason="error",
                 )
diff --git a/backends/advanced/src/advanced_omi_backend/workers/obsidian_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/obsidian_jobs.py
index 8c67616d..43ed4f32 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/obsidian_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/obsidian_jobs.py
@@ -9,7 +9,7 @@
 from rq.job import Job
 
 from advanced_omi_backend.models.job import async_job
-from advanced_omi_backend.services.obsidian_service import obsidian_service
+from advanced_omi_backend.services.obsidian_service import get_obsidian_service
 
 logger = logging.getLogger(__name__)
 
@@ -26,7 +26,7 @@ def count_markdown_files(vault_path: str) -> int:
 
 
 @async_job(redis=True, beanie=False)
-async def ingest_obsidian_vault_job(job_id: str, vault_path: str, redis_client=None) -> dict: # type: ignore
+async def ingest_obsidian_vault_job(job_id: str, vault_path: str, redis_client=None) -> dict:  # type: ignore
     """
     Long-running ingestion job enqueued on the default RQ queue.
     """
@@ -42,7 +42,7 @@ async def ingest_obsidian_vault_job(job_id: str, vault_path: str, redis_client=N
     job.save_meta()
 
     try:
-        obsidian_service.setup_database()
+        get_obsidian_service().setup_database()
     except Exception as exc:
         logger.exception("Database setup failed for job %s: %s", job.id, exc)
         job.meta["status"] = "failed"
@@ -80,16 +80,17 @@ async def ingest_obsidian_vault_job(job_id: str, vault_path: str, redis_client=N
                 return {"status": "canceled"}
 
             try:
-                note_data = obsidian_service.parse_obsidian_note(root, filename, vault_path)
-                chunks = await obsidian_service.chunking_and_embedding(note_data)
+                svc = get_obsidian_service()
+                note_data = svc.parse_obsidian_note(root, filename, vault_path)
+                chunks = await svc.chunking_and_embedding(note_data)
                 if chunks:
-                    obsidian_service.ingest_note_and_chunks(note_data, chunks)
-                
+                    svc.ingest_note_and_chunks(note_data, chunks)
+
                 processed += 1
                 job.meta["processed"] = processed
                 job.meta["last_file"] = os.path.join(root, filename)
                 job.save_meta()
-                
+
             except Exception as exc:
                 logger.error("Processing %s failed: %s", filename, exc)
                 errors.append(f"{filename}: {exc}")
@@ -103,5 +104,5 @@ async def ingest_obsidian_vault_job(job_id: str, vault_path: str, redis_client=N
         "status": "finished",
         "processed": processed,
         "total": total,
-        "errors": errors
+        "errors": errors,
     }
diff --git a/backends/advanced/src/scripts/cleanup_state.py b/backends/advanced/src/scripts/cleanup_state.py
index 49bfd332..253f3806 100644
--- a/backends/advanced/src/scripts/cleanup_state.py
+++ b/backends/advanced/src/scripts/cleanup_state.py
@@ -35,7 +35,13 @@
     from qdrant_client.models import Distance, VectorParams
     from rich.console import Console
     from rich.panel import Panel
-    from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
+    from rich.progress import (
+        BarColumn,
+        Progress,
+        SpinnerColumn,
+        TextColumn,
+        TimeElapsedColumn,
+    )
     from rich.prompt import Confirm
     from rich.table import Table
     from rich.text import Text
@@ -62,12 +68,18 @@
 # Helpers
 # ---------------------------------------------------------------------------
 
+
 def get_qdrant_collection_name() -> str:
     """Get Qdrant collection name from memory service configuration."""
     try:
         memory_config = build_memory_config_from_env()
-        if hasattr(memory_config, "vector_store_config") and memory_config.vector_store_config:
-            return memory_config.vector_store_config.get("collection_name", "chronicle_memories")
+        if (
+            hasattr(memory_config, "vector_store_config")
+            and memory_config.vector_store_config
+        ):
+            return memory_config.vector_store_config.get(
+                "collection_name", "chronicle_memories"
+            )
     except Exception:
         pass
     return "chronicle_memories"
@@ -93,6 +105,7 @@ def _human_size(nbytes: int) -> str:
 # Stats
 # ---------------------------------------------------------------------------
 
+
 class Stats:
     """Track counts across the system."""
 
@@ -176,8 +189,10 @@ async def gather_stats(
     # LangFuse prompts
     if langfuse_client:
         try:
-            prompts_response = langfuse_client.prompts.list(limit=100)
-            s.langfuse_prompts = len(prompts_response.data) if hasattr(prompts_response, "data") else 0
+            prompts_response = langfuse_client.api.prompts.list(limit=100)
+            s.langfuse_prompts = (
+                len(prompts_response.data) if hasattr(prompts_response, "data") else 0
+            )
         except Exception:
             pass
 
@@ -205,13 +220,21 @@ def render_stats_table(stats: Stats, title: str = "Current State") -> Table:
     def row(label, value, style="white"):
         table.add_row(label, f"[{style}]{value}[/{style}]")
 
-    row("Conversations", str(stats.conversations), "green" if stats.conversations else "dim")
+    row(
+        "Conversations",
+        str(stats.conversations),
+        "green" if stats.conversations else "dim",
+    )
     row(
         "  with transcripts",
         str(stats.conversations_with_transcript),
         "green" if stats.conversations_with_transcript else "dim",
     )
-    row("Audio Chunks", str(stats.audio_chunks), "green" if stats.audio_chunks else "dim")
+    row(
+        "Audio Chunks",
+        str(stats.audio_chunks),
+        "green" if stats.audio_chunks else "dim",
+    )
     row("Waveforms", str(stats.waveforms), "dim")
     row("Chat Sessions", str(stats.chat_sessions), "dim")
     row("Chat Messages", str(stats.chat_messages), "dim")
@@ -220,7 +243,11 @@ def row(label, value, style="white"):
     row("Memories (Qdrant)", str(stats.memories), "yellow" if stats.memories else "dim")
     row("Neo4j Nodes", str(stats.neo4j_nodes), "dim")
     row("Neo4j Relationships", str(stats.neo4j_relationships), "dim")
-    row("LangFuse Prompts", str(stats.langfuse_prompts), "yellow" if stats.langfuse_prompts else "dim")
+    row(
+        "LangFuse Prompts",
+        str(stats.langfuse_prompts),
+        "yellow" if stats.langfuse_prompts else "dim",
+    )
     table.add_section()
     row("Redis Jobs", str(stats.redis_jobs), "dim")
     row("Legacy WAV Files", str(stats.legacy_wav), "dim")
@@ -234,6 +261,7 @@ def row(label, value, style="white"):
 # Backup
 # ---------------------------------------------------------------------------
 
+
 class BackupResult:
     """Track which backup exports succeeded or failed."""
 
@@ -241,7 +269,13 @@ def __init__(self):
         self.exports: dict[str, dict] = {}  # name -> {ok, path, size, sha256, error}
 
     def record(self, name: str, path: Optional[Path], ok: bool, error: str = ""):
-        entry = {"ok": ok, "error": error, "path": str(path) if path else None, "size": 0, "sha256": ""}
+        entry = {
+            "ok": ok,
+            "error": error,
+            "path": str(path) if path else None,
+            "size": 0,
+            "sha256": "",
+        }
         if ok and path and path.exists():
             entry["size"] = path.stat().st_size
             entry["sha256"] = _file_sha256(path)
@@ -255,10 +289,16 @@ def all_ok(self) -> bool:
     def critical_ok(self) -> bool:
         """conversations, audio_metadata, and annotations are critical."""
         critical = ("conversations", "audio_metadata", "annotations")
-        return all(self.exports.get(n, {}).get("ok", False) for n in critical if n in self.exports)
+        return all(
+            self.exports.get(n, {}).get("ok", False)
+            for n in critical
+            if n in self.exports
+        )
 
     def render_table(self) -> Table:
-        table = Table(title="Backup Verification", border_style="dim", title_style="bold white")
+        table = Table(
+            title="Backup Verification", border_style="dim", title_style="bold white"
+        )
         table.add_column("Export", style="white", min_width=24)
         table.add_column("Status", justify="center", min_width=8)
         table.add_column("Size", justify="right", min_width=10)
@@ -285,7 +325,14 @@ def total_size(self) -> int:
 class BackupManager:
     """Export data to a timestamped backup directory."""
 
-    def __init__(self, backup_dir: str, export_audio: bool, mongo_db: Any, neo4j_driver: Any = None, langfuse_client: Any = None):
+    def __init__(
+        self,
+        backup_dir: str,
+        export_audio: bool,
+        mongo_db: Any,
+        neo4j_driver: Any = None,
+        langfuse_client: Any = None,
+    ):
         self.backup_dir = Path(backup_dir)
         self.export_audio = export_audio
         self.mongo_db = mongo_db
@@ -323,7 +370,9 @@ async def run(
                 steps.append(("audio_wav", self._export_audio_wav))
 
             if qdrant_client:
-                steps.append(("memories", lambda r: self._export_memories(qdrant_client, r)))
+                steps.append(
+                    ("memories", lambda r: self._export_memories(qdrant_client, r))
+                )
 
             if self.neo4j_driver:
                 steps.append(("neo4j_graph", self._export_neo4j))
@@ -336,7 +385,8 @@ async def run(
             for name, func in steps:
                 progress.update(task, description=f"Exporting {name}...")
                 try:
-                    path = await func(result) if asyncio.iscoroutinefunction(func) else func(result)
+                    ret = func(result)
+                    path = await ret if asyncio.iscoroutine(ret) else ret
                     if not result.exports.get(name):
                         # func didn't record itself - record success
                         result.record(name, path, True)
@@ -384,19 +434,21 @@ async def _export_audio_metadata(self, result: BackupResult) -> Path:
         cursor = collection.find({})
         data = []
         async for chunk in cursor:
-            data.append({
-                "conversation_id": chunk.get("conversation_id"),
-                "chunk_index": chunk.get("chunk_index"),
-                "start_time": chunk.get("start_time"),
-                "end_time": chunk.get("end_time"),
-                "duration": chunk.get("duration"),
-                "original_size": chunk.get("original_size"),
-                "compressed_size": chunk.get("compressed_size"),
-                "sample_rate": chunk.get("sample_rate", 16000),
-                "channels": chunk.get("channels", 1),
-                "has_speech": chunk.get("has_speech"),
-                "created_at": str(chunk.get("created_at", "")),
-            })
+            data.append(
+                {
+                    "conversation_id": chunk.get("conversation_id"),
+                    "chunk_index": chunk.get("chunk_index"),
+                    "start_time": chunk.get("start_time"),
+                    "end_time": chunk.get("end_time"),
+                    "duration": chunk.get("duration"),
+                    "original_size": chunk.get("original_size"),
+                    "compressed_size": chunk.get("compressed_size"),
+                    "sample_rate": chunk.get("sample_rate", 16000),
+                    "channels": chunk.get("channels", 1),
+                    "has_speech": chunk.get("has_speech"),
+                    "created_at": str(chunk.get("created_at", "")),
+                }
+            )
         path = self.backup_path / "audio_chunks_metadata.json"
         with open(path, "w") as f:
             json.dump(data, f, indent=2, default=str)
@@ -417,14 +469,16 @@ async def _export_chat_sessions(self, result: BackupResult) -> Path:
         cursor = collection.find({})
         data = []
         async for session in cursor:
-            data.append({
-                "session_id": session.get("session_id"),
-                "user_id": session.get("user_id"),
-                "title": session.get("title"),
-                "created_at": str(session.get("created_at", "")),
-                "updated_at": str(session.get("updated_at", "")),
-                "metadata": session.get("metadata", {}),
-            })
+            data.append(
+                {
+                    "session_id": session.get("session_id"),
+                    "user_id": session.get("user_id"),
+                    "title": session.get("title"),
+                    "created_at": str(session.get("created_at", "")),
+                    "updated_at": str(session.get("updated_at", "")),
+                    "metadata": session.get("metadata", {}),
+                }
+            )
         path = self.backup_path / "chat_sessions.json"
         with open(path, "w") as f:
             json.dump(data, f, indent=2, default=str)
@@ -436,16 +490,18 @@ async def _export_chat_messages(self, result: BackupResult) -> Path:
         cursor = collection.find({})
         data = []
         async for msg in cursor:
-            data.append({
-                "message_id": msg.get("message_id"),
-                "session_id": msg.get("session_id"),
-                "user_id": msg.get("user_id"),
-                "role": msg.get("role"),
-                "content": msg.get("content"),
-                "timestamp": str(msg.get("timestamp", "")),
-                "memories_used": msg.get("memories_used", []),
-                "metadata": msg.get("metadata", {}),
-            })
+            data.append(
+                {
+                    "message_id": msg.get("message_id"),
+                    "session_id": msg.get("session_id"),
+                    "user_id": msg.get("user_id"),
+                    "role": msg.get("role"),
+                    "content": msg.get("content"),
+                    "timestamp": str(msg.get("timestamp", "")),
+                    "memories_used": msg.get("memories_used", []),
+                    "metadata": msg.get("metadata", {}),
+                }
+            )
         path = self.backup_path / "chat_messages.json"
         with open(path, "w") as f:
             json.dump(data, f, indent=2, default=str)
@@ -479,7 +535,9 @@ async def _export_audio_wav(self, result: BackupResult) -> Optional[Path]:
 
         for conv in conversations:
             try:
-                ok = await self._export_conversation_audio(conv.conversation_id, audio_dir)
+                ok = await self._export_conversation_audio(
+                    conv.conversation_id, audio_dir
+                )
                 if ok:
                     exported += 1
             except Exception as e:
@@ -491,11 +549,19 @@ async def _export_audio_wav(self, result: BackupResult) -> Optional[Path]:
         result.record("audio_wav", audio_dir, ok, error)
         return audio_dir
 
-    async def _export_conversation_audio(self, conversation_id: str, audio_dir: Path) -> bool:
+    async def _export_conversation_audio(
+        self, conversation_id: str, audio_dir: Path
+    ) -> bool:
         """Decode Opus chunks to WAV for a single conversation. Returns True if audio was exported."""
-        chunks = await AudioChunkDocument.find(
-            AudioChunkDocument.conversation_id == conversation_id
-        ).sort("+chunk_index").to_list()
+        from advanced_omi_backend.utils.audio_chunk_utils import decode_opus_to_pcm
+
+        chunks = (
+            await AudioChunkDocument.find(
+                AudioChunkDocument.conversation_id == conversation_id
+            )
+            .sort("+chunk_index")
+            .to_list()
+        )
 
         if not chunks:
             return False
@@ -506,44 +572,49 @@ async def _export_conversation_audio(self, conversation_id: str, audio_dir: Path
         sample_rate = chunks[0].sample_rate
         channels = chunks[0].channels
 
-        # Try opuslib, fall back gracefully
-        try:
-            import opuslib
-
-            decoder = opuslib.Decoder(sample_rate, channels)
-            pcm_parts = []
-            for chunk in chunks:
-                frame_size = int(sample_rate * chunk.duration / channels)
-                decoded = decoder.decode(bytes(chunk.audio_data), frame_size)
-                pcm_parts.append(decoded)
-        except ImportError:
-            logger.warning("opuslib not available, skipping audio export")
-            return False
-        except Exception as e:
-            logger.warning(f"Opus decode error for {conversation_id}: {e}")
-            return False
+        # Decode all chunks using FFmpeg (same path as UI playback)
+        pcm_buffer = bytearray()
+        for chunk in chunks:
+            try:
+                pcm_data = await decode_opus_to_pcm(
+                    opus_data=bytes(chunk.audio_data),
+                    sample_rate=sample_rate,
+                    channels=channels,
+                )
+                pcm_buffer.extend(pcm_data)
+            except Exception as e:
+                logger.warning(
+                    f"Opus decode error for {conversation_id} chunk {chunk.chunk_index}: {e}"
+                )
+                continue
 
-        all_pcm = b"".join(pcm_parts)
-        samples = struct.unpack(f"<{len(all_pcm) // 2}h", all_pcm)
+        if not pcm_buffer:
+            return False
 
         # Split into 1-minute WAV files
-        samples_per_minute = sample_rate * 60 * channels
         import wave
 
+        bytes_per_minute = (
+            sample_rate * channels * 2 * 60
+        )  # 16-bit = 2 bytes per sample
+        all_pcm = bytes(pcm_buffer)
         chunk_num = 1
-        for start in range(0, len(samples), samples_per_minute):
+
+        for start in range(0, len(all_pcm), bytes_per_minute):
             wav_path = conv_dir / f"chunk_{chunk_num:03d}.wav"
-            segment = samples[start : start + samples_per_minute]
+            segment_pcm = all_pcm[start : start + bytes_per_minute]
             with wave.open(str(wav_path), "wb") as wf:
                 wf.setnchannels(channels)
                 wf.setsampwidth(2)
                 wf.setframerate(sample_rate)
-                wf.writeframes(struct.pack(f"<{len(segment)}h", *segment))
+                wf.writeframes(segment_pcm)
             chunk_num += 1
 
         return True
 
-    async def _export_memories(self, qdrant_client: AsyncQdrantClient, result: BackupResult) -> Path:
+    async def _export_memories(
+        self, qdrant_client: AsyncQdrantClient, result: BackupResult
+    ) -> Path:
         collection_name = get_qdrant_collection_name()
         collections = await qdrant_client.get_collections()
         exists = any(c.name == collection_name for c in collections.collections)
@@ -568,7 +639,9 @@ async def _export_memories(self, qdrant_client: AsyncQdrantClient, result: Backu
             if not points:
                 break
             for pt in points:
-                data.append({"id": str(pt.id), "vector": pt.vector, "payload": pt.payload})
+                data.append(
+                    {"id": str(pt.id), "vector": pt.vector, "payload": pt.payload}
+                )
             if next_offset is None:
                 break
             offset = next_offset
@@ -583,7 +656,9 @@ def _export_neo4j(self, result: BackupResult) -> Path:
         try:
             with self.neo4j_driver.session() as session:
                 nodes_data = []
-                for record in session.run("MATCH (n) RETURN n, labels(n) AS labels, elementId(n) AS eid"):
+                for record in session.run(
+                    "MATCH (n) RETURN n, labels(n) AS labels, elementId(n) AS eid"
+                ):
                     node = dict(record["n"])
                     node["_labels"] = record["labels"]
                     node["_element_id"] = record["eid"]
@@ -594,15 +669,24 @@ def _export_neo4j(self, result: BackupResult) -> Path:
                     "MATCH (a)-[r]->(b) RETURN elementId(a) AS src, type(r) AS rel_type, "
                     "properties(r) AS props, elementId(b) AS dst"
                 ):
-                    rels_data.append({
-                        "source": record["src"],
-                        "type": record["rel_type"],
-                        "properties": dict(record["props"]) if record["props"] else {},
-                        "target": record["dst"],
-                    })
+                    rels_data.append(
+                        {
+                            "source": record["src"],
+                            "type": record["rel_type"],
+                            "properties": (
+                                dict(record["props"]) if record["props"] else {}
+                            ),
+                            "target": record["dst"],
+                        }
+                    )
 
             with open(path, "w") as f:
-                json.dump({"nodes": nodes_data, "relationships": rels_data}, f, indent=2, default=str)
+                json.dump(
+                    {"nodes": nodes_data, "relationships": rels_data},
+                    f,
+                    indent=2,
+                    default=str,
+                )
             result.record("neo4j_graph", path, True)
         except Exception as e:
             result.record("neo4j_graph", None, False, str(e))
@@ -617,7 +701,7 @@ def _export_langfuse_prompts(self, result: BackupResult) -> Path:
         try:
             # Discover all prompt names via list API
             prompt_names = []
-            prompts_response = self.langfuse_client.prompts.list(limit=100)
+            prompts_response = self.langfuse_client.api.prompts.list(limit=100)
             if hasattr(prompts_response, "data"):
                 for p in prompts_response.data:
                     prompt_names.append(p.name)
@@ -654,6 +738,7 @@ def _export_langfuse_prompts(self, result: BackupResult) -> Path:
 # Cleanup
 # ---------------------------------------------------------------------------
 
+
 class CleanupManager:
     """Delete data across all services."""
 
@@ -768,6 +853,7 @@ def _cleanup_legacy_wav(self, stats: Stats):
 # Connection setup
 # ---------------------------------------------------------------------------
 
+
 async def connect_services():
     """Initialize all service connections. Returns (mongo_db, redis_conn, qdrant_client, neo4j_driver, langfuse_client)."""
     # MongoDB
@@ -777,7 +863,13 @@ async def connect_services():
     mongo_db = mongo_client[mongodb_database]
     await init_beanie(
         database=mongo_db,
-        document_models=[Conversation, AudioChunkDocument, WaveformData, User, Annotation],
+        document_models=[
+            Conversation,
+            AudioChunkDocument,
+            WaveformData,
+            User,
+            Annotation,
+        ],
     )
 
     # Redis
@@ -827,6 +919,7 @@ async def connect_services():
 # Display helpers
 # ---------------------------------------------------------------------------
 
+
 def print_header():
     console.print()
     console.print(
@@ -841,11 +934,19 @@ def print_header():
 
 def print_dry_run(stats: Stats, args):
     console.print()
-    console.print(Panel("[bold yellow]DRY-RUN MODE[/bold yellow] - no changes will be made", border_style="yellow"))
+    console.print(
+        Panel(
+            "[bold yellow]DRY-RUN MODE[/bold yellow] - no changes will be made",
+            border_style="yellow",
+        )
+    )
     console.print()
 
     if args.backup or args.backup_only:
-        console.print("[cyan]Would create backup at:[/cyan]", str(Path(args.backup_dir) / f"backup_..."))
+        console.print(
+            "[cyan]Would create backup at:[/cyan]",
+            str(Path(args.backup_dir) / f"backup_..."),
+        )
         if args.export_audio:
             audio_note = f"(from {stats.conversations_with_transcript} conversations with transcripts)"
             console.print(f"[cyan]Would export audio WAV files[/cyan] {audio_note}")
@@ -887,18 +988,26 @@ def print_confirmation(stats: Stats, args) -> bool:
     console.print()
 
     if args.backup or args.backup_only:
-        console.print(Panel(
-            f"[green]Backup will be created at:[/green] {args.backup_dir}\n"
-            + ("[green]Audio WAV export included[/green]" if args.export_audio else "[dim]Audio WAV export: off[/dim]"),
-            title="Backup",
-            border_style="green",
-        ))
+        console.print(
+            Panel(
+                f"[green]Backup will be created at:[/green] {args.backup_dir}\n"
+                + (
+                    "[green]Audio WAV export included[/green]"
+                    if args.export_audio
+                    else "[dim]Audio WAV export: off[/dim]"
+                ),
+                title="Backup",
+                border_style="green",
+            )
+        )
     elif not args.backup_only:
-        console.print(Panel(
-            "[bold red]No backup will be created![/bold red]\nData will be permanently lost.",
-            title="Warning",
-            border_style="red",
-        ))
+        console.print(
+            Panel(
+                "[bold red]No backup will be created![/bold red]\nData will be permanently lost.",
+                title="Warning",
+                border_style="red",
+            )
+        )
 
     if not args.backup_only:
         items = [
@@ -911,18 +1020,22 @@ def print_confirmation(stats: Stats, args) -> bool:
             f"  {stats.memories} memories",
         ]
         if stats.neo4j_nodes:
-            items.append(f"  {stats.neo4j_nodes} Neo4j nodes + {stats.neo4j_relationships} relationships")
+            items.append(
+                f"  {stats.neo4j_nodes} Neo4j nodes + {stats.neo4j_relationships} relationships"
+            )
         items.append(f"  {stats.redis_jobs} Redis jobs")
         if args.include_wav:
             items.append(f"  {stats.legacy_wav} legacy WAV files")
         if args.delete_users:
             items.append(f"  [bold red]{stats.users} users (DANGEROUS)[/bold red]")
 
-        console.print(Panel(
-            "\n".join(items),
-            title="[bold red]Will Delete[/bold red]",
-            border_style="red",
-        ))
+        console.print(
+            Panel(
+                "\n".join(items),
+                title="[bold red]Will Delete[/bold red]",
+                border_style="red",
+            )
+        )
 
     console.print()
     return Confirm.ask("[bold]Proceed?[/bold]", default=False)
@@ -932,6 +1045,7 @@ def print_confirmation(stats: Stats, args) -> bool:
 # Main
 # ---------------------------------------------------------------------------
 
+
 async def main():
     parser = argparse.ArgumentParser(
         description="Chronicle Cleanup & Backup Tool",
@@ -947,14 +1061,37 @@ async def main():
         """,
     )
 
-    parser.add_argument("--backup", action="store_true", help="Create backup before cleaning")
-    parser.add_argument("--backup-only", action="store_true", help="Create backup WITHOUT cleaning (safe)")
-    parser.add_argument("--export-audio", action="store_true", help="Include audio WAV export in backup (conversations with transcripts only)")
-    parser.add_argument("--include-wav", action="store_true", help="Include legacy WAV file cleanup")
-    parser.add_argument("--dry-run", action="store_true", help="Preview without making changes")
+    parser.add_argument(
+        "--backup", action="store_true", help="Create backup before cleaning"
+    )
+    parser.add_argument(
+        "--backup-only",
+        action="store_true",
+        help="Create backup WITHOUT cleaning (safe)",
+    )
+    parser.add_argument(
+        "--export-audio",
+        action="store_true",
+        help="Include audio WAV export in backup (conversations with transcripts only)",
+    )
+    parser.add_argument(
+        "--include-wav", action="store_true", help="Include legacy WAV file cleanup"
+    )
+    parser.add_argument(
+        "--dry-run", action="store_true", help="Preview without making changes"
+    )
     parser.add_argument("--force", action="store_true", help="Skip confirmation prompt")
-    parser.add_argument("--backup-dir", type=str, default="/app/data/backups", help="Backup directory (default: /app/data/backups)")
-    parser.add_argument("--delete-users", action="store_true", help="DANGEROUS: Also delete user accounts")
+    parser.add_argument(
+        "--backup-dir",
+        type=str,
+        default="/app/data/backups",
+        help="Backup directory (default: /app/data/backups)",
+    )
+    parser.add_argument(
+        "--delete-users",
+        action="store_true",
+        help="DANGEROUS: Also delete user accounts",
+    )
 
     args = parser.parse_args()
 
@@ -968,11 +1105,15 @@ async def main():
 
     # Connect
     with console.status("[bold cyan]Connecting to services...", spinner="dots"):
-        mongo_db, redis_conn, qdrant_client, neo4j_driver, langfuse_client = await connect_services()
+        mongo_db, redis_conn, qdrant_client, neo4j_driver, langfuse_client = (
+            await connect_services()
+        )
 
     # Gather stats
     with console.status("[bold cyan]Gathering statistics...", spinner="dots"):
-        stats = await gather_stats(mongo_db, redis_conn, qdrant_client, neo4j_driver, langfuse_client)
+        stats = await gather_stats(
+            mongo_db, redis_conn, qdrant_client, neo4j_driver, langfuse_client
+        )
 
     console.print()
     console.print(render_stats_table(stats, "Current Backend State"))
@@ -993,7 +1134,9 @@ async def main():
     do_backup = args.backup or args.backup_only
     if do_backup:
         console.print()
-        backup_mgr = BackupManager(args.backup_dir, args.export_audio, mongo_db, neo4j_driver, langfuse_client)
+        backup_mgr = BackupManager(
+            args.backup_dir, args.export_audio, mongo_db, neo4j_driver, langfuse_client
+        )
         result = await backup_mgr.run(qdrant_client, stats)
 
         console.print()
@@ -1006,44 +1149,61 @@ async def main():
 
         if not result.critical_ok:
             console.print()
-            console.print(Panel(
-                "[bold red]Critical backup exports failed![/bold red]\n"
-                "Conversations or audio metadata could not be exported.\n"
-                "Cleanup will NOT proceed to protect your data.",
-                title="Backup Verification Failed",
-                border_style="red",
-            ))
+            console.print(
+                Panel(
+                    "[bold red]Critical backup exports failed![/bold red]\n"
+                    "Conversations or audio metadata could not be exported.\n"
+                    "Cleanup will NOT proceed to protect your data.",
+                    title="Backup Verification Failed",
+                    border_style="red",
+                )
+            )
             sys.exit(1)
 
         if not result.all_ok:
             console.print()
-            console.print("[yellow]Some non-critical exports failed (see table above).[/yellow]")
+            console.print(
+                "[yellow]Some non-critical exports failed (see table above).[/yellow]"
+            )
 
     # If backup-only, we're done
     if args.backup_only:
         console.print()
-        console.print(Panel(
-            "[bold green]Backup completed successfully![/bold green]\n"
-            "No data was deleted.",
-            border_style="green",
-        ))
+        console.print(
+            Panel(
+                "[bold green]Backup completed successfully![/bold green]\n"
+                "No data was deleted.",
+                border_style="green",
+            )
+        )
         return
 
     # Cleanup
     console.print()
     cleanup_mgr = CleanupManager(
-        mongo_db, redis_conn, qdrant_client, args.include_wav, args.delete_users, neo4j_driver
+        mongo_db,
+        redis_conn,
+        qdrant_client,
+        args.include_wav,
+        args.delete_users,
+        neo4j_driver,
     )
     success = await cleanup_mgr.run(stats)
 
     if not success:
-        console.print(Panel("[bold red]Cleanup encountered errors![/bold red]", border_style="red"))
+        console.print(
+            Panel(
+                "[bold red]Cleanup encountered errors![/bold red]", border_style="red"
+            )
+        )
         sys.exit(1)
 
     # Verify
     console.print()
     with console.status("[bold cyan]Verifying cleanup...", spinner="dots"):
-        final_stats = await gather_stats(mongo_db, redis_conn, qdrant_client, neo4j_driver, langfuse_client)
+        final_stats = await gather_stats(
+            mongo_db, redis_conn, qdrant_client, neo4j_driver, langfuse_client
+        )
 
     console.print(render_stats_table(final_stats, "After Cleanup"))
 
diff --git a/backends/advanced/tests/test_obsidian_service.py b/backends/advanced/tests/test_obsidian_service.py
index 0daafc1a..e9408290 100644
--- a/backends/advanced/tests/test_obsidian_service.py
+++ b/backends/advanced/tests/test_obsidian_service.py
@@ -1,41 +1,56 @@
-import unittest
 import asyncio
-from unittest.mock import MagicMock, patch, AsyncMock
-import sys
 import os
+import sys
+import unittest
+from unittest.mock import AsyncMock, MagicMock, patch
 
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../src')))
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../src")))
 
 from advanced_omi_backend.services.obsidian_service import (
-    ObsidianService,
     ObsidianSearchError,
+    ObsidianService,
 )
 
+
 class TestObsidianService(unittest.TestCase):
 
     def setUp(self):
         # Patch load_root_config
-        self.config_patcher = patch('advanced_omi_backend.services.obsidian_service.load_root_config')
+        self.config_patcher = patch(
+            "advanced_omi_backend.services.obsidian_service.load_root_config"
+        )
         self.mock_load_config = self.config_patcher.start()
         self.mock_load_config.return_value = {
-            'defaults': {'llm': 'gpt-4', 'embedding': 'text-embedding-3-small'},
-            'models': [
-                {'name': 'gpt-4', 'model_url': 'https://api.openai.com/v1', 'api_key': 'sk-test'},
-                {'name': 'text-embedding-3-small', 'model_name': 'text-embedding-3-small', 'embedding_dimensions': 1536, 'model_url': 'https://api.openai.com/v1', 'api_key': 'sk-test'}
-            ]
+            "defaults": {"llm": "gpt-4", "embedding": "text-embedding-3-small"},
+            "models": [
+                {
+                    "name": "gpt-4",
+                    "model_url": "https://api.openai.com/v1",
+                    "api_key": "sk-test",
+                },
+                {
+                    "name": "text-embedding-3-small",
+                    "model_name": "text-embedding-3-small",
+                    "embedding_dimensions": 1536,
+                    "model_url": "https://api.openai.com/v1",
+                    "api_key": "sk-test",
+                },
+            ],
         }
         self.addCleanup(self.config_patcher.stop)
 
         # Patch embedding helper
         self.embedding_patcher = patch(
-            'advanced_omi_backend.services.obsidian_service.generate_openai_embeddings',
-            new_callable=AsyncMock
+            "advanced_omi_backend.services.obsidian_service.generate_openai_embeddings",
+            new_callable=AsyncMock,
         )
         self.mock_generate_embeddings = self.embedding_patcher.start()
         self.addCleanup(self.embedding_patcher.stop)
 
         # Patch GraphDatabase
-        self.graph_db_patcher = patch('advanced_omi_backend.services.neo4j_client.GraphDatabase')
+        self.graph_db_patcher = patch(
+            "advanced_omi_backend.services.neo4j_client.GraphDatabase"
+        )
         self.mock_graph_db = self.graph_db_patcher.start()
         self.mock_driver = MagicMock()
         self.mock_session = MagicMock()
@@ -44,14 +59,17 @@ def setUp(self):
         self.addCleanup(self.graph_db_patcher.stop)
 
         # Patch environment variables
-        self.env_patcher = patch.dict(os.environ, {
-            "NEO4J_HOST": "localhost",
-            "NEO4J_USER": "neo4j",
-            "NEO4J_PASSWORD": "password"
-        })
+        self.env_patcher = patch.dict(
+            os.environ,
+            {
+                "NEO4J_HOST": "localhost",
+                "NEO4J_USER": "neo4j",
+                "NEO4J_PASSWORD": "password",
+            },
+        )
         self.env_patcher.start()
         self.addCleanup(self.env_patcher.stop)
-        
+
         # Initialize Service
         self.service = ObsidianService()
 
@@ -59,45 +77,45 @@ def test_search_obsidian_success(self):
         # Setup mock embedding response
         mock_embedding = [0.1, 0.2, 0.3]
         self.mock_generate_embeddings.return_value = [mock_embedding]
-        
+
         # Setup mock Neo4j results
         mock_record1 = {
-            'source': 'Note1',
-            'content': 'Content of chunk 1',
-            'tags': ['tag1', 'tag2'],
-            'outgoing_links': ['Note2'],
-            'score': 0.95
+            "source": "Note1",
+            "content": "Content of chunk 1",
+            "tags": ["tag1", "tag2"],
+            "outgoing_links": ["Note2"],
+            "score": 0.95,
         }
         mock_record2 = {
-            'source': 'Note2',
-            'content': 'Content of chunk 2',
-            'tags': [],
-            'outgoing_links': [],
-            'score': 0.90
+            "source": "Note2",
+            "content": "Content of chunk 2",
+            "tags": [],
+            "outgoing_links": [],
+            "score": 0.90,
         }
-        
+
         # The session.run returns an iterable of records
         self.mock_session.run.return_value = [mock_record1, mock_record2]
-        
+
         # Execute search
         response = asyncio.run(self.service.search_obsidian("test query", limit=2))
-        
+
         # Assertions
         # 1. Check embedding call
         self.mock_generate_embeddings.assert_awaited_once()
-        
+
         # 2. Check Neo4j query execution
         self.mock_session.run.assert_called_once()
         args, kwargs = self.mock_session.run.call_args
         self.assertIn("CALL db.index.vector.queryNodes", args[0])
-        self.assertEqual(kwargs['vector'], mock_embedding)
-        self.assertEqual(kwargs['limit'], 2)
-        
+        self.assertEqual(kwargs["vector"], mock_embedding)
+        self.assertEqual(kwargs["limit"], 2)
+
         # 3. Check results formatting
-        self.assertEqual(len(response['results']), 2)
-        
+        self.assertEqual(len(response["results"]), 2)
+
         # Check first result format
-        first_entry = response['results'][0]
+        first_entry = response["results"][0]
         self.assertIn("SOURCE: Note1", first_entry)
         self.assertIn("TAGS: tag1, tag2", first_entry)
         self.assertIn("RELATED NOTES: Note2", first_entry)
@@ -105,19 +123,22 @@ def test_search_obsidian_success(self):
 
     def test_setup_database(self):
         self.service.setup_database()
-        
+
         # Verify constraints and index creation calls
         self.assertTrue(self.mock_session.run.called)
         # It should run at least 3 queries: Note constraint, Chunk constraint, Vector Index
         self.assertGreaterEqual(self.mock_session.run.call_count, 3)
-        
+
         calls = [call[0][0] for call in self.mock_session.run.call_args_list]
         self.assertTrue(any("CREATE CONSTRAINT note_path" in c for c in calls))
         self.assertTrue(any("CREATE CONSTRAINT chunk_id" in c for c in calls))
         self.assertTrue(any("CREATE VECTOR INDEX chunk_embeddings" in c for c in calls))
 
-    @patch('advanced_omi_backend.services.obsidian_service.chunk_text_with_spacy')
-    def test_chunking_and_embedding_uses_shared_chunker(self, mock_chunker):
+    @patch(
+        "advanced_omi_backend.services.obsidian_service.semantic_chunk_text",
+        new_callable=AsyncMock,
+    )
+    def test_chunking_and_embedding_uses_semantic_chunker(self, mock_chunker):
         mock_chunker.return_value = ["part1"]
         self.mock_generate_embeddings.return_value = [[0.1, 0.2]]
         note_data = {
@@ -130,7 +151,18 @@ def test_chunking_and_embedding_uses_shared_chunker(self, mock_chunker):
             "tags": [],
         }
         chunks = asyncio.run(self.service.chunking_and_embedding(note_data))
-        mock_chunker.assert_called_once_with("sample", max_tokens=self.service.chunk_word_limit)
+        mock_chunker.assert_awaited_once()
+        call_kwargs = mock_chunker.call_args
+        self.assertEqual(
+            call_kwargs[1]["buffer_size"], self.service.semantic_buffer_size
+        )
+        self.assertEqual(
+            call_kwargs[1]["breakpoint_percentile_threshold"],
+            self.service.semantic_breakpoint_percentile,
+        )
+        self.assertEqual(
+            call_kwargs[1]["max_chunk_words"], self.service.max_chunk_words
+        )
         self.mock_generate_embeddings.assert_awaited_once()
         self.assertEqual(len(chunks), 1)
 
@@ -142,21 +174,19 @@ def test_ingest_note_and_chunks(self):
             "content": "some content",
             "wordcount": 2,
             "links": ["OtherNote"],
-            "tags": ["tag1"]
+            "tags": ["tag1"],
         }
-        chunks = [
-            {"text": "chunk1", "embedding": [0.1, 0.2]}
-        ]
-        
+        chunks = [{"text": "chunk1", "embedding": [0.1, 0.2]}]
+
         self.service.ingest_note_and_chunks(note_data, chunks)
-        
+
         # Verify DB calls
         # 1. Note + Folder merge
         # 2. Chunk merge
         # 3. Tag merge
         # 4. Link merge
         self.assertGreaterEqual(self.mock_session.run.call_count, 4)
-        
+
         calls = [call[0][0] for call in self.mock_session.run.call_args_list]
         self.assertTrue(any("MERGE (f:Folder" in c for c in calls))
         self.assertTrue(any("MERGE (c:Chunk" in c for c in calls))
@@ -166,10 +196,10 @@ def test_ingest_note_and_chunks(self):
     def test_search_obsidian_embedding_fail(self):
         # Mock embedding failure (raises exception)
         self.mock_generate_embeddings.side_effect = Exception("API Error")
-        
+
         with self.assertRaises(ObsidianSearchError) as ctx:
             asyncio.run(self.service.search_obsidian("test query"))
-        
+
         self.assertEqual(ctx.exception.stage, "embedding")
         self.assertIn("API Error", str(ctx.exception))
         self.mock_session.run.assert_not_called()
@@ -178,13 +208,13 @@ def test_search_obsidian_db_fail(self):
         # Setup mock embedding
         mock_embedding = [0.1]
         self.mock_generate_embeddings.return_value = [mock_embedding]
-        
+
         # Mock DB failure
         self.mock_session.run.side_effect = Exception("DB Connection Failed")
-        
+
         with self.assertRaises(ObsidianSearchError) as ctx:
             asyncio.run(self.service.search_obsidian("test query"))
-        
+
         self.assertEqual(ctx.exception.stage, "database")
         self.assertIn("DB Connection Failed", str(ctx.exception))
 
@@ -192,13 +222,14 @@ def test_search_obsidian_empty_results(self):
         # Setup mock embedding
         mock_embedding = [0.1]
         self.mock_generate_embeddings.return_value = [mock_embedding]
-        
+
         # Mock empty DB results
         self.mock_session.run.return_value = []
-        
+
         response = asyncio.run(self.service.search_obsidian("test query"))
-        
-        self.assertEqual(response['results'], [])
 
-if __name__ == '__main__':
+        self.assertEqual(response["results"], [])
+
+
+if __name__ == "__main__":
     unittest.main()
diff --git a/backends/advanced/tests/test_text_chunking.py b/backends/advanced/tests/test_text_chunking.py
new file mode 100644
index 00000000..62a32c6f
--- /dev/null
+++ b/backends/advanced/tests/test_text_chunking.py
@@ -0,0 +1,376 @@
+"""Unit tests for semantic text chunking."""
+
+import asyncio
+import math
+import os
+import sys
+import unittest
+from unittest.mock import AsyncMock
+
+import numpy as np
+
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../src")))
+
+from advanced_omi_backend.utils.text_chunking import (
+    _build_buffered_sentences,
+    _cosine_distances,
+    _enforce_max_chunk_words,
+    _find_breakpoints,
+    semantic_chunk_text,
+    split_sentences,
+)
+
+
+class TestSplitSentences(unittest.TestCase):
+    def test_basic_splitting(self):
+        text = "Hello world. How are you? I am fine!"
+        result = split_sentences(text)
+        self.assertEqual(result, ["Hello world.", "How are you?", "I am fine!"])
+
+    def test_single_sentence(self):
+        self.assertEqual(split_sentences("Just one sentence."), ["Just one sentence."])
+
+    def test_empty_string(self):
+        self.assertEqual(split_sentences(""), [])
+        self.assertEqual(split_sentences("   "), [])
+
+    def test_no_terminal_punctuation(self):
+        result = split_sentences("No punctuation here")
+        self.assertEqual(result, ["No punctuation here"])
+
+    def test_multiple_spaces(self):
+        result = split_sentences("First sentence.   Second sentence.")
+        self.assertEqual(len(result), 2)
+
+    def test_newlines_split_sentences(self):
+        result = split_sentences("Hello world.\nNew line here.")
+        # Newline after punctuation splits into separate sentences
+        self.assertEqual(len(result), 2)
+
+    def test_preserves_sentence_content(self):
+        text = "The temperature is 3.5 degrees. It is cold."
+        result = split_sentences(text)
+        self.assertEqual(len(result), 2)
+
+
+class TestBuildBufferedSentences(unittest.TestCase):
+    def test_buffer_size_zero(self):
+        sentences = ["A.", "B.", "C."]
+        result = _build_buffered_sentences(sentences, buffer_size=0)
+        self.assertEqual(result, ["A.", "B.", "C."])
+
+    def test_buffer_size_one(self):
+        sentences = ["A.", "B.", "C.", "D."]
+        result = _build_buffered_sentences(sentences, buffer_size=1)
+        self.assertEqual(result[0], "A. B.")  # [0:2]
+        self.assertEqual(result[1], "A. B. C.")  # [0:3]
+        self.assertEqual(result[2], "B. C. D.")  # [1:4]
+        self.assertEqual(result[3], "C. D.")  # [2:4]
+
+    def test_single_sentence(self):
+        result = _build_buffered_sentences(["Only one."], buffer_size=1)
+        self.assertEqual(result, ["Only one."])
+
+
+class TestCosineDistances(unittest.TestCase):
+    def test_identical_vectors(self):
+        embeddings = [[1.0, 0.0], [1.0, 0.0], [1.0, 0.0]]
+        distances = _cosine_distances(embeddings)
+        self.assertEqual(len(distances), 2)
+        for d in distances:
+            self.assertAlmostEqual(d, 0.0, places=6)
+
+    def test_orthogonal_vectors(self):
+        embeddings = [[1.0, 0.0], [0.0, 1.0]]
+        distances = _cosine_distances(embeddings)
+        self.assertAlmostEqual(distances[0], 1.0, places=6)
+
+    def test_opposite_vectors(self):
+        embeddings = [[1.0, 0.0], [-1.0, 0.0]]
+        distances = _cosine_distances(embeddings)
+        self.assertAlmostEqual(distances[0], 2.0, places=6)
+
+    def test_known_values(self):
+        # Two similar, then one different
+        embeddings = [[1.0, 0.0], [0.95, 0.05], [0.0, 1.0]]
+        distances = _cosine_distances(embeddings)
+        self.assertEqual(len(distances), 2)
+        # First pair should be close (small distance)
+        self.assertLess(distances[0], 0.1)
+        # Second pair should be far (large distance)
+        self.assertGreater(distances[1], 0.5)
+
+    def test_zero_vector_handling(self):
+        embeddings = [[0.0, 0.0], [1.0, 0.0]]
+        distances = _cosine_distances(embeddings)
+        # Zero vector gets norm=1 (no division by zero)
+        self.assertEqual(len(distances), 1)
+
+
+class TestFindBreakpoints(unittest.TestCase):
+    def test_clear_breakpoint(self):
+        # Low distances except one spike
+        distances = [0.01, 0.02, 0.01, 0.9, 0.01, 0.02]
+        breakpoints = _find_breakpoints(distances, 90.0)
+        self.assertIn(3, breakpoints)
+
+    def test_no_breakpoints_uniform(self):
+        distances = [0.1, 0.1, 0.1, 0.1]
+        breakpoints = _find_breakpoints(distances, 95.0)
+        # With all equal distances, the 95th percentile = 0.1, and we need > threshold
+        self.assertEqual(breakpoints, [])
+
+    def test_empty_distances(self):
+        self.assertEqual(_find_breakpoints([], 95.0), [])
+
+    def test_single_distance(self):
+        breakpoints = _find_breakpoints([0.5], 50.0)
+        # 50th percentile of [0.5] = 0.5; nothing is > 0.5
+        self.assertEqual(breakpoints, [])
+
+
+class TestEnforceMaxChunkWords(unittest.TestCase):
+    def test_no_split_needed(self):
+        chunks = ["short chunk", "another one"]
+        result = _enforce_max_chunk_words(chunks, max_words=10)
+        self.assertEqual(result, chunks)
+
+    def test_split_long_chunk(self):
+        long_chunk = " ".join(f"word{i}" for i in range(20))
+        result = _enforce_max_chunk_words([long_chunk], max_words=10)
+        self.assertEqual(len(result), 2)
+        self.assertEqual(len(result[0].split()), 10)
+        self.assertEqual(len(result[1].split()), 10)
+
+    def test_empty_chunks(self):
+        self.assertEqual(_enforce_max_chunk_words([], max_words=10), [])
+
+
+class TestSemanticChunkText(unittest.TestCase):
+    def test_empty_text(self):
+        embed_fn = AsyncMock()
+        result = asyncio.run(semantic_chunk_text("", embed_fn))
+        self.assertEqual(result, [])
+        embed_fn.assert_not_awaited()
+
+    def test_single_sentence_returns_whole_text(self):
+        embed_fn = AsyncMock()
+        result = asyncio.run(semantic_chunk_text("Just one sentence.", embed_fn))
+        self.assertEqual(result, ["Just one sentence."])
+        embed_fn.assert_not_awaited()
+
+    def test_two_sentences_returns_whole_text(self):
+        embed_fn = AsyncMock()
+        text = "First sentence. Second sentence."
+        result = asyncio.run(semantic_chunk_text(text, embed_fn))
+        self.assertEqual(result, [text])
+        embed_fn.assert_not_awaited()
+
+    def test_topic_transition_detected(self):
+        """Three sentences: first two similar, third different. Should split."""
+
+        async def mock_embed(texts):
+            embeddings = []
+            for t in texts:
+                if "weather" in t.lower():
+                    embeddings.append([1.0, 0.0, 0.0])
+                else:
+                    embeddings.append([0.0, 0.0, 1.0])
+            return embeddings
+
+        text = (
+            "The weather is nice. It is sunny today. Python is a programming language."
+        )
+        result = asyncio.run(
+            semantic_chunk_text(text, mock_embed, breakpoint_percentile_threshold=50.0)
+        )
+        # Should detect the topic transition
+        self.assertGreater(len(result), 1)
+
+    def test_uniform_topic_single_chunk(self):
+        """All sentences on the same topic should stay together."""
+
+        async def mock_embed(texts):
+            return [[1.0, 0.0, 0.0]] * len(texts)
+
+        text = "Dogs are great. Dogs are loyal. Dogs are friendly."
+        result = asyncio.run(semantic_chunk_text(text, mock_embed))
+        self.assertEqual(len(result), 1)
+
+    def test_embed_fn_failure_returns_single_chunk(self):
+        """If embedding fails, fall back to returning text as single chunk."""
+
+        async def failing_embed(texts):
+            raise RuntimeError("API error")
+
+        text = "First sentence. Second sentence. Third sentence."
+        result = asyncio.run(semantic_chunk_text(text, failing_embed))
+        self.assertEqual(result, [text])
+
+    def test_max_chunk_words_applied(self):
+        """Long uniform text should still be split by max_chunk_words."""
+        words = " ".join(f"word{i}." for i in range(100))
+
+        async def mock_embed(texts):
+            return [[1.0, 0.0]] * len(texts)
+
+        result = asyncio.run(semantic_chunk_text(words, mock_embed, max_chunk_words=30))
+        for chunk in result:
+            self.assertLessEqual(len(chunk.split()), 30)
+
+    def test_wrong_embedding_count_returns_single_chunk(self):
+        """If embed_fn returns wrong number of embeddings, fall back gracefully."""
+
+        async def wrong_count_embed(texts):
+            return [[1.0, 0.0]]  # Always returns 1 regardless of input
+
+        text = "First sentence. Second sentence. Third sentence."
+        result = asyncio.run(semantic_chunk_text(text, wrong_count_embed))
+        self.assertEqual(result, [text])
+
+
+class TestSemanticChunkTextWithSentences(unittest.TestCase):
+    """Tests for the `sentences` and `join_str` parameters."""
+
+    def test_sentences_param_skips_split(self):
+        """Pre-split units should be used directly, not regex-split."""
+        call_count = {"n": 0}
+
+        async def mock_embed(texts):
+            call_count["n"] += 1
+            # Return distinct embeddings so we can verify units are passed through
+            embeddings = []
+            for i, _ in enumerate(texts):
+                vec = [0.0] * 3
+                vec[i % 3] = 1.0
+                embeddings.append(vec)
+            return embeddings
+
+        # These dialogue turns have no sentence-ending punctuation — regex
+        # split_sentences would return them as a single unit.
+        turns = [
+            "Alice: Hey how are you",
+            "Bob: I'm good thanks",
+            "Alice: Want to grab lunch",
+            "Bob: Sure let's go",
+        ]
+        text = "\n".join(turns)
+        result = asyncio.run(
+            semantic_chunk_text(
+                text, mock_embed, sentences=turns, breakpoint_percentile_threshold=50.0
+            )
+        )
+        # embed_fn should have been called (4 units > 2 threshold)
+        self.assertEqual(call_count["n"], 1)
+        # Result should contain all turns (possibly grouped)
+        joined = " ".join(result)
+        for turn in turns:
+            self.assertIn(turn, joined)
+
+    def test_join_str_newline_preserves_dialogue(self):
+        """With join_str='\\n', chunks should keep speaker labels on separate lines."""
+
+        async def same_topic_embed(texts):
+            return [[1.0, 0.0, 0.0]] * len(texts)
+
+        turns = [
+            "Alice: The project is on track",
+            "Bob: Great to hear",
+            "Alice: We should ship next week",
+        ]
+        text = "\n".join(turns)
+        result = asyncio.run(
+            semantic_chunk_text(text, same_topic_embed, sentences=turns, join_str="\n")
+        )
+        # All same topic → single chunk with newlines
+        self.assertEqual(len(result), 1)
+        self.assertIn("\n", result[0])
+        # Each turn should be on its own line
+        lines = result[0].split("\n")
+        self.assertEqual(len(lines), 3)
+
+    def test_single_turn_returns_whole_text(self):
+        """A single dialogue turn should return the full text."""
+        embed_fn = AsyncMock()
+        turns = ["Alice: Hello"]
+        text = "Alice: Hello"
+        result = asyncio.run(semantic_chunk_text(text, embed_fn, sentences=turns))
+        self.assertEqual(result, [text])
+        embed_fn.assert_not_awaited()
+
+    def test_two_turns_returns_whole_text(self):
+        """Two dialogue turns should return the full text (below threshold)."""
+        embed_fn = AsyncMock()
+        turns = ["Alice: Hello", "Bob: Hi"]
+        text = "\n".join(turns)
+        result = asyncio.run(semantic_chunk_text(text, embed_fn, sentences=turns))
+        self.assertEqual(result, [text])
+        embed_fn.assert_not_awaited()
+
+    def test_topic_transition_with_dialogue(self):
+        """Dialogue that switches topics should be split into separate chunks."""
+
+        async def mock_embed(texts):
+            embeddings = []
+            for t in texts:
+                if "weather" in t.lower() or "sunny" in t.lower():
+                    embeddings.append([1.0, 0.0, 0.0])
+                else:
+                    embeddings.append([0.0, 0.0, 1.0])
+            return embeddings
+
+        turns = [
+            "Alice: The weather is beautiful today",
+            "Bob: Yes it's very sunny outside",
+            "Alice: By the way I started learning Python",
+            "Bob: Oh that's a great programming language",
+        ]
+        text = "\n".join(turns)
+        result = asyncio.run(
+            semantic_chunk_text(
+                text,
+                mock_embed,
+                sentences=turns,
+                join_str="\n",
+                breakpoint_percentile_threshold=50.0,
+            )
+        )
+        self.assertGreater(len(result), 1)
+
+    def test_empty_turns_filtered(self):
+        """Empty strings in sentences list should be filtered out."""
+        embed_fn = AsyncMock()
+        turns = ["Alice: Hello", "", "  ", "Bob: Hi"]
+        text = "Alice: Hello\nBob: Hi"
+        result = asyncio.run(semantic_chunk_text(text, embed_fn, sentences=turns))
+        # After filtering: 2 units → returns whole text
+        self.assertEqual(result, [text])
+        embed_fn.assert_not_awaited()
+
+    def test_max_chunk_words_still_applied(self):
+        """The max_chunk_words safety valve should apply to dialogue chunks."""
+
+        async def same_topic(texts):
+            return [[1.0, 0.0]] * len(texts)
+
+        # Each turn has ~10 words; 5 turns = ~50 words
+        turns = [
+            f"Speaker: word {i} " + " ".join(f"w{j}" for j in range(8))
+            for i in range(5)
+        ]
+        text = "\n".join(turns)
+        result = asyncio.run(
+            semantic_chunk_text(
+                text,
+                same_topic,
+                sentences=turns,
+                join_str="\n",
+                max_chunk_words=20,
+            )
+        )
+        for chunk in result:
+            self.assertLessEqual(len(chunk.split()), 20)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/backends/advanced/webui/package-lock.json b/backends/advanced/webui/package-lock.json
index c3bd503e..0b11b4a7 100644
--- a/backends/advanced/webui/package-lock.json
+++ b/backends/advanced/webui/package-lock.json
@@ -20,6 +20,7 @@
         "d3-zoom": "^3.0.0",
         "framer-motion": "^11.0.0",
         "lucide-react": "^0.294.0",
+        "qrcode.react": "^4.2.0",
         "react": "^18.2.0",
         "react-dom": "^18.2.0",
         "react-router-dom": "^6.20.0"
@@ -4538,6 +4539,15 @@
         "node": ">=6"
       }
     },
+    "node_modules/qrcode.react": {
+      "version": "4.2.0",
+      "resolved": "https://registry.npmjs.org/qrcode.react/-/qrcode.react-4.2.0.tgz",
+      "integrity": "sha512-QpgqWi8rD9DsS9EP3z7BT+5lY5SFhsqGjpgW5DY/i3mK4M9DTBNz3ErMi8BWYEfI3L0d8GIbGmcdFAS1uIRGjA==",
+      "license": "ISC",
+      "peerDependencies": {
+        "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
+      }
+    },
     "node_modules/queue-microtask": {
       "version": "1.2.3",
       "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz",
diff --git a/backends/advanced/webui/package.json b/backends/advanced/webui/package.json
index 7c497790..64696777 100644
--- a/backends/advanced/webui/package.json
+++ b/backends/advanced/webui/package.json
@@ -13,7 +13,6 @@
     "@tanstack/react-query": "^5.90.20",
     "axios": "^1.6.2",
     "clsx": "^2.0.0",
-    "framer-motion": "^11.0.0",
     "cronstrue": "^2.50.0",
     "d3-array": "^3.2.4",
     "d3-axis": "^3.0.0",
@@ -21,7 +20,9 @@
     "d3-selection": "^3.0.0",
     "d3-time-format": "^4.1.0",
     "d3-zoom": "^3.0.0",
+    "framer-motion": "^11.0.0",
     "lucide-react": "^0.294.0",
+    "qrcode.react": "^4.2.0",
     "react": "^18.2.0",
     "react-dom": "^18.2.0",
     "react-router-dom": "^6.20.0"
diff --git a/backends/advanced/webui/src/App.tsx b/backends/advanced/webui/src/App.tsx
index e660311f..cbb03ab4 100644
--- a/backends/advanced/webui/src/App.tsx
+++ b/backends/advanced/webui/src/App.tsx
@@ -23,6 +23,7 @@ const Queue = lazy(() => import('./pages/Queue'))
 const LiveRecord = lazy(() => import('./pages/LiveRecord'))
 const Plugins = lazy(() => import('./pages/Plugins'))
 const Finetuning = lazy(() => import('./pages/Finetuning'))
+const ConnectApp = lazy(() => import('./pages/ConnectApp'))
 
 function PageSkeleton() {
   return (
@@ -159,6 +160,13 @@ function App() {
                     </Suspense>
                   </PageErrorBoundary>
                 } />
+                <Route path="connect-app" element={
+                  <PageErrorBoundary>
+                    <Suspense fallback={<PageSkeleton />}>
+                      <ConnectApp />
+                    </Suspense>
+                  </PageErrorBoundary>
+                } />
               </Route>
             </Routes>
             </Router>
diff --git a/backends/advanced/webui/src/components/PluginSettingsForm.tsx b/backends/advanced/webui/src/components/PluginSettingsForm.tsx
index 718ade20..0f59a3c2 100644
--- a/backends/advanced/webui/src/components/PluginSettingsForm.tsx
+++ b/backends/advanced/webui/src/components/PluginSettingsForm.tsx
@@ -15,8 +15,9 @@ interface PluginMetadata {
     enabled: boolean
     events: string[]
     condition: {
-      type: 'always' | 'wake_word'
+      type: 'always' | 'wake_word' | 'keyword_anywhere'
       wake_words?: string[]
+      keywords?: string[]
     }
   }
   config_schema: {
@@ -30,8 +31,9 @@ interface PluginConfig {
     enabled: boolean
     events: string[]
     condition: {
-      type: 'always' | 'wake_word'
+      type: 'always' | 'wake_word' | 'keyword_anywhere'
       wake_words?: string[]
+      keywords?: string[]
     }
   }
   settings: Record<string, any>
diff --git a/backends/advanced/webui/src/components/layout/Layout.tsx b/backends/advanced/webui/src/components/layout/Layout.tsx
index 630fb0d2..a182e033 100644
--- a/backends/advanced/webui/src/components/layout/Layout.tsx
+++ b/backends/advanced/webui/src/components/layout/Layout.tsx
@@ -1,5 +1,5 @@
 import { Link, useLocation, Outlet } from 'react-router-dom'
-import { Music, MessageSquare, MessageCircle, Brain, Users, Upload, Settings, LogOut, Sun, Moon, Shield, Radio, Layers, Puzzle, Zap, Activity } from 'lucide-react'
+import { Music, MessageSquare, MessageCircle, Brain, Users, Upload, Settings, LogOut, Sun, Moon, Shield, Radio, Layers, Puzzle, Zap, Activity, Smartphone } from 'lucide-react'
 import { useAuth } from '../../contexts/AuthContext'
 import { useTheme } from '../../contexts/ThemeContext'
 import GlobalRecordingIndicator from './GlobalRecordingIndicator'
@@ -16,6 +16,7 @@ export default function Layout() {
     { path: '/conversations', label: 'Conversations', icon: MessageSquare },
     { path: '/memories', label: 'Memories', icon: Brain },
     { path: '/users', label: 'User Management', icon: Users },
+    { path: '/connect-app', label: 'Connect App', icon: Smartphone },
     ...(isAdmin ? [
       { path: '/upload', label: 'Upload Audio', icon: Upload },
       { path: '/queue', label: 'Queue & Events', icon: Layers },
diff --git a/backends/advanced/webui/src/components/plugins/OrchestrationSection.tsx b/backends/advanced/webui/src/components/plugins/OrchestrationSection.tsx
index ceaf51c8..16de8088 100644
--- a/backends/advanced/webui/src/components/plugins/OrchestrationSection.tsx
+++ b/backends/advanced/webui/src/components/plugins/OrchestrationSection.tsx
@@ -4,8 +4,9 @@ interface OrchestrationConfig {
   enabled: boolean
   events: string[]
   condition: {
-    type: 'always' | 'wake_word'
+    type: 'always' | 'wake_word' | 'keyword_anywhere'
     wake_words?: string[]
+    keywords?: string[]
   }
 }
 
@@ -41,12 +42,13 @@ export default function OrchestrationSection({
     onChange({ ...config, events })
   }
 
-  const handleConditionTypeChange = (type: 'always' | 'wake_word') => {
+  const handleConditionTypeChange = (type: 'always' | 'wake_word' | 'keyword_anywhere') => {
     onChange({
       ...config,
       condition: {
         type,
-        wake_words: type === 'wake_word' ? config.condition.wake_words || [] : undefined
+        wake_words: type === 'wake_word' ? config.condition.wake_words || [] : undefined,
+        keywords: type === 'keyword_anywhere' ? config.condition.keywords || [] : undefined
       }
     })
   }
@@ -62,6 +64,17 @@ export default function OrchestrationSection({
     })
   }
 
+  const handleKeywordsChange = (value: string) => {
+    const keywords = value.split(',').map((w) => w.trim()).filter(Boolean)
+    onChange({
+      ...config,
+      condition: {
+        ...config.condition,
+        keywords
+      }
+    })
+  }
+
   return (
     <div className="space-y-4">
       {/* Section Header */}
@@ -182,7 +195,7 @@ export default function OrchestrationSection({
                 Always
               </span>
               <p className="text-xs text-gray-500 dark:text-gray-400">
-                Execute on every matching event
+                Execute on every matching event, no filtering
               </p>
             </div>
           </label>
@@ -209,10 +222,40 @@ export default function OrchestrationSection({
             />
             <div className="flex-1">
               <span className="text-sm font-medium text-gray-900 dark:text-gray-100">
-                Wake Word
+                Wake Word (start of sentence)
+              </span>
+              <p className="text-xs text-gray-500 dark:text-gray-400">
+                Triggers when the transcript starts with the wake word
+              </p>
+            </div>
+          </label>
+
+          <label
+            className={`
+              flex items-center space-x-3 p-3 border rounded-lg cursor-pointer transition-colors
+              ${
+                config.condition.type === 'keyword_anywhere'
+                  ? 'border-blue-500 bg-blue-50 dark:bg-blue-900/20'
+                  : 'border-gray-200 dark:border-gray-700 hover:border-gray-300 dark:hover:border-gray-600'
+              }
+              ${disabled ? 'opacity-50 cursor-not-allowed' : ''}
+            `}
+          >
+            <input
+              type="radio"
+              name="condition"
+              value="keyword_anywhere"
+              checked={config.condition.type === 'keyword_anywhere'}
+              onChange={() => !disabled && handleConditionTypeChange('keyword_anywhere')}
+              disabled={disabled}
+              className="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300"
+            />
+            <div className="flex-1">
+              <span className="text-sm font-medium text-gray-900 dark:text-gray-100">
+                Keyword Anywhere
               </span>
               <p className="text-xs text-gray-500 dark:text-gray-400">
-                Execute only when specific wake words are detected
+                Triggers when keyword appears anywhere in the transcript
               </p>
             </div>
           </label>
@@ -239,7 +282,32 @@ export default function OrchestrationSection({
             className="w-full px-3 py-2 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-gray-100 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-blue-500 disabled:opacity-50 disabled:cursor-not-allowed"
           />
           <p className="mt-1 text-xs text-gray-500 dark:text-gray-400">
-            Comma-separated list of wake words (case-insensitive)
+            Comma-separated list of wake words. The transcript must start with one of these words (case-insensitive).
+          </p>
+        </div>
+      )}
+
+      {/* Keywords Input (conditional) */}
+      {config.condition.type === 'keyword_anywhere' && (
+        <div className="pl-7">
+          <label
+            htmlFor="keywords"
+            className="block text-sm font-medium text-gray-700 dark:text-gray-300 mb-1"
+          >
+            Keywords
+            <span className="text-red-500 ml-1">*</span>
+          </label>
+          <input
+            type="text"
+            id="keywords"
+            value={config.condition.keywords?.join(', ') || ''}
+            onChange={(e) => !disabled && handleKeywordsChange(e.target.value)}
+            placeholder="e.g., vivi, hey chronicle"
+            disabled={disabled}
+            className="w-full px-3 py-2 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-gray-100 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-blue-500 disabled:opacity-50 disabled:cursor-not-allowed"
+          />
+          <p className="mt-1 text-xs text-gray-500 dark:text-gray-400">
+            Comma-separated list of keywords. Triggers when any keyword appears anywhere in the transcript (case-insensitive).
           </p>
         </div>
       )}
diff --git a/backends/advanced/webui/src/components/plugins/PluginConfigPanel.tsx b/backends/advanced/webui/src/components/plugins/PluginConfigPanel.tsx
index 13c19542..a58eb023 100644
--- a/backends/advanced/webui/src/components/plugins/PluginConfigPanel.tsx
+++ b/backends/advanced/webui/src/components/plugins/PluginConfigPanel.tsx
@@ -22,8 +22,9 @@ interface PluginConfig {
     enabled: boolean
     events: string[]
     condition: {
-      type: 'always' | 'wake_word'
+      type: 'always' | 'wake_word' | 'keyword_anywhere'
       wake_words?: string[]
+      keywords?: string[]
     }
   }
   settings: Record<string, any>
diff --git a/backends/advanced/webui/src/pages/ConnectApp.tsx b/backends/advanced/webui/src/pages/ConnectApp.tsx
new file mode 100644
index 00000000..768cdb6e
--- /dev/null
+++ b/backends/advanced/webui/src/pages/ConnectApp.tsx
@@ -0,0 +1,120 @@
+import { useState } from 'react'
+import { QRCodeSVG } from 'qrcode.react'
+import { Smartphone, Copy, Check } from 'lucide-react'
+import { useTheme } from '../contexts/ThemeContext'
+
+function getBackendHttpUrl(): string {
+  const { protocol, hostname, port } = window.location
+
+  const isStandardPort =
+    (protocol === 'https:' && (port === '' || port === '443')) ||
+    (protocol === 'http:' && (port === '' || port === '80'))
+
+  const basePath = import.meta.env.BASE_URL
+  if (isStandardPort && basePath && basePath !== '/') {
+    // Caddy path-based routing — return full origin
+    return `${protocol}//${hostname}`
+  }
+
+  if (import.meta.env.VITE_BACKEND_URL) {
+    const url = import.meta.env.VITE_BACKEND_URL as string
+    // If it's a relative URL, make it absolute
+    if (url.startsWith('/') || url === '') {
+      return `${protocol}//${hostname}${port ? `:${port}` : ''}`
+    }
+    return url
+  }
+
+  if (isStandardPort) {
+    return `${protocol}//${hostname}`
+  }
+
+  if (port === '5173') {
+    return `${protocol}//${hostname}:8000`
+  }
+
+  return `${protocol}//${hostname}${port ? `:${port}` : ''}`
+}
+
+export default function ConnectApp() {
+  const { isDark } = useTheme()
+  const [copied, setCopied] = useState(false)
+  const backendUrl = getBackendHttpUrl()
+
+  const handleCopy = async () => {
+    try {
+      await navigator.clipboard.writeText(backendUrl)
+      setCopied(true)
+      setTimeout(() => setCopied(false), 2000)
+    } catch {
+      // Fallback for older browsers
+      const textArea = document.createElement('textarea')
+      textArea.value = backendUrl
+      document.body.appendChild(textArea)
+      textArea.select()
+      document.execCommand('copy')
+      document.body.removeChild(textArea)
+      setCopied(true)
+      setTimeout(() => setCopied(false), 2000)
+    }
+  }
+
+  return (
+    <div className="space-y-6">
+      <div className="flex items-center space-x-3">
+        <Smartphone className="h-6 w-6 text-blue-600" />
+        <h2 className="text-2xl font-semibold text-gray-900 dark:text-gray-100">
+          Connect App
+        </h2>
+      </div>
+
+      <p className="text-gray-600 dark:text-gray-400">
+        Scan this QR code with the Chronicle mobile app to connect it to your backend.
+      </p>
+
+      {/* QR Code */}
+      <div className="flex flex-col items-center space-y-4 py-6">
+        <div className="p-4 bg-white rounded-xl shadow-sm border border-gray-200 dark:border-gray-600">
+          <QRCodeSVG
+            value={backendUrl}
+            size={256}
+            level="M"
+            fgColor={isDark ? '#1f2937' : '#111827'}
+            bgColor="#ffffff"
+          />
+        </div>
+
+        {/* URL display + copy */}
+        <div className="flex items-center space-x-2">
+          <code className="px-3 py-1.5 bg-gray-100 dark:bg-gray-700 rounded text-sm text-gray-800 dark:text-gray-200 font-mono">
+            {backendUrl}
+          </code>
+          <button
+            onClick={handleCopy}
+            className="p-2 rounded-lg hover:bg-gray-100 dark:hover:bg-gray-700 transition-colors text-gray-600 dark:text-gray-300"
+            title="Copy URL"
+          >
+            {copied ? (
+              <Check className="h-4 w-4 text-green-500" />
+            ) : (
+              <Copy className="h-4 w-4" />
+            )}
+          </button>
+        </div>
+      </div>
+
+      {/* Instructions */}
+      <div className="bg-blue-50 dark:bg-blue-900/20 border border-blue-200 dark:border-blue-800 rounded-lg p-4">
+        <h3 className="font-medium text-blue-900 dark:text-blue-200 mb-2">
+          How to connect
+        </h3>
+        <ol className="list-decimal list-inside space-y-1.5 text-sm text-blue-800 dark:text-blue-300">
+          <li>Open the Chronicle app on your phone</li>
+          <li>Go to Settings and tap <strong>Scan QR Code</strong></li>
+          <li>Point your camera at the QR code above</li>
+          <li>The backend URL will be configured automatically</li>
+        </ol>
+      </div>
+    </div>
+  )
+}
diff --git a/backends/advanced/webui/src/pages/ConversationDetail.tsx b/backends/advanced/webui/src/pages/ConversationDetail.tsx
index a7ec7fa4..c32465c9 100644
--- a/backends/advanced/webui/src/pages/ConversationDetail.tsx
+++ b/backends/advanced/webui/src/pages/ConversationDetail.tsx
@@ -3,7 +3,7 @@ import { useParams, useNavigate } from 'react-router-dom'
 import { useQueryClient } from '@tanstack/react-query'
 import {
   ArrowLeft, Calendar, User, Trash2, RefreshCw, MoreVertical,
-  RotateCcw, Zap, Play, Pause,
+  RotateCcw, Zap, Play, Pause, Download,
   Save, X, Pencil, Brain, Clock, Database, Layers, Star, BarChart3
 } from 'lucide-react'
 import { annotationsApi, speakerApi, systemApi, BACKEND_URL } from '../services/api'
@@ -333,6 +333,27 @@ export default function ConversationDetail() {
   }
 
   // Action handlers
+  const handleDownloadAudio = async () => {
+    if (!id) return
+    setOpenDropdown(false)
+    try {
+      const token = localStorage.getItem(getStorageKey('token')) || ''
+      const resp = await fetch(`${BACKEND_URL}/api/audio/get_audio/${id}`, {
+        headers: { Authorization: `Bearer ${token}` },
+      })
+      if (!resp.ok) throw new Error(`Download failed: ${resp.status}`)
+      const blob = await resp.blob()
+      const url = URL.createObjectURL(blob)
+      const a = document.createElement('a')
+      a.href = url
+      a.download = `${conversation?.title || id}.wav`
+      a.click()
+      URL.revokeObjectURL(url)
+    } catch (err: any) {
+      setActionError(`Failed to download audio: ${err.message || 'Unknown error'}`)
+    }
+  }
+
   const handleDelete = async () => {
     if (!id) return
     const confirmed = window.confirm('Are you sure you want to delete this conversation?')
@@ -658,6 +679,15 @@ export default function ConversationDetail() {
                 {reprocessingSpeakers ? <RefreshCw className="h-4 w-4 animate-spin" /> : <User className="h-4 w-4" />}
                 <span>Reprocess Speakers</span>
               </button>
+              {conversation.audio_chunks_count && conversation.audio_chunks_count > 0 && (
+                <button
+                  onClick={handleDownloadAudio}
+                  className="w-full text-left px-4 py-2 text-sm text-gray-700 dark:text-gray-300 hover:bg-gray-100 dark:hover:bg-gray-700 flex items-center space-x-2"
+                >
+                  <Download className="h-4 w-4" />
+                  <span>Download Audio</span>
+                </button>
+              )}
               <div className="border-t border-gray-200 dark:border-gray-600 my-1"></div>
               <button
                 onClick={handleDelete}
diff --git a/backends/advanced/webui/src/pages/Queue.tsx b/backends/advanced/webui/src/pages/Queue.tsx
index 3ea93db7..f1c6a784 100644
--- a/backends/advanced/webui/src/pages/Queue.tsx
+++ b/backends/advanced/webui/src/pages/Queue.tsx
@@ -938,7 +938,7 @@ const Queue: React.FC = () => {
                         {health.consumer_groups && health.consumer_groups.map((group) => (
                           <div key={group.name} className="mt-2 pt-2 border-t border-gray-200">
                             <div className="text-xs text-gray-600 mb-1">{group.name}:</div>
-                            {group.consumers.map((consumer) => (
+                            {(group.consumers || []).map((consumer) => (
                               <div key={consumer.name} className="flex justify-between text-xs pl-2">
                                 <span className="text-gray-700 truncate">{consumer.name}</span>
                                 <span className={consumer.pending > 0 ? 'text-yellow-600' : 'text-green-600'}>
@@ -2155,8 +2155,9 @@ const Queue: React.FC = () => {
                   </thead>
                   <tbody className="divide-y divide-gray-200">
                     {filtered.map((evt, idx) => {
-                      const allSuccess = evt.plugins_executed.length > 0 && evt.plugins_executed.every(p => p.success);
-                      const anyFailure = evt.plugins_executed.some(p => !p.success);
+                      const pluginsExecuted = evt.plugins_executed || [];
+                      const allSuccess = pluginsExecuted.length > 0 && pluginsExecuted.every(p => p.success);
+                      const anyFailure = pluginsExecuted.some(p => !p.success);
 
                       return (
                         <tr key={idx} className="hover:bg-gray-50">
@@ -2169,17 +2170,17 @@ const Queue: React.FC = () => {
                             </span>
                           </td>
                           <td className="px-4 py-2 text-xs text-gray-600 font-mono">
-                            {evt.user_id.length > 12 ? `${evt.user_id.slice(-8)}` : evt.user_id}
+                            {(evt.user_id || '').length > 12 ? `${evt.user_id.slice(-8)}` : evt.user_id}
                           </td>
                           <td className="px-4 py-2 text-xs text-gray-700">
-                            {evt.plugins_executed.length > 0
-                              ? evt.plugins_executed.map(p => p.plugin_id).join(', ')
+                            {pluginsExecuted.length > 0
+                              ? pluginsExecuted.map(p => p.plugin_id).join(', ')
                               : <span className="text-gray-400">none</span>
                             }
                           </td>
                           <td className="px-4 py-2">
                             <div className="flex items-center space-x-2">
-                              {evt.plugins_executed.length === 0 ? (
+                              {pluginsExecuted.length === 0 ? (
                                 <span className="text-xs text-gray-400">no plugins ran</span>
                               ) : allSuccess ? (
                                 <span className="flex items-center space-x-1 text-xs text-green-600">
@@ -2194,7 +2195,7 @@ const Queue: React.FC = () => {
                               ) : (
                                 <span className="text-xs text-gray-500">partial</span>
                               )}
-                              {evt.plugins_executed.length > 0 && (
+                              {pluginsExecuted.length > 0 && (
                                 <button
                                   onClick={() => setSelectedEvent(evt)}
                                   className="text-gray-400 hover:text-gray-600 p-0.5 rounded hover:bg-gray-100"
@@ -2698,7 +2699,7 @@ const Queue: React.FC = () => {
               <div>
                 <label className="block text-sm font-medium text-gray-700 mb-2">Plugin Results</label>
                 <div className="space-y-2">
-                  {selectedEvent.plugins_executed.map((p, i) => (
+                  {(selectedEvent.plugins_executed || []).map((p, i) => (
                     <div
                       key={i}
                       className={`p-3 rounded-lg border ${p.success ? 'bg-green-50 border-green-200' : 'bg-red-50 border-red-200'}`}
@@ -2723,7 +2724,7 @@ const Queue: React.FC = () => {
                 </div>
               </div>
 
-              {Object.keys(selectedEvent.metadata).length > 0 && (
+              {selectedEvent.metadata && Object.keys(selectedEvent.metadata).length > 0 && (
                 <details>
                   <summary className="text-sm font-medium text-gray-700 cursor-pointer hover:text-gray-900">
                     Raw Metadata
@@ -2908,4 +2909,4 @@ const Queue: React.FC = () => {
   );
 };
 
-export default Queue;
\ No newline at end of file
+export default Queue;
diff --git a/backends/advanced/webui/src/services/api.ts b/backends/advanced/webui/src/services/api.ts
index bff5986a..8929fb28 100644
--- a/backends/advanced/webui/src/services/api.ts
+++ b/backends/advanced/webui/src/services/api.ts
@@ -300,7 +300,7 @@ export const systemApi = {
   getMiscSettings: () => api.get('/api/misc-settings'),
   saveMiscSettings: (settings: { always_persist_enabled?: boolean; use_provider_segments?: boolean; per_segment_speaker_id?: boolean; transcription_job_timeout_seconds?: number }) =>
     api.post('/api/misc-settings', settings),
-  
+
   // Plugin Configuration Management (YAML-based)
   getPluginsConfigRaw: () => api.get('/api/admin/plugins/config'),
   updatePluginsConfigRaw: (configYaml: string) =>
@@ -318,7 +318,7 @@ export const systemApi = {
     orchestration?: {
       enabled: boolean
       events: string[]
-      condition: { type: string; wake_words?: string[] }
+      condition: { type: string; wake_words?: string[]; keywords?: string[] }
     }
     settings?: Record<string, any>
     env_vars?: Record<string, string>
@@ -327,7 +327,7 @@ export const systemApi = {
     orchestration?: {
       enabled: boolean
       events: string[]
-      condition: { type: string; wake_words?: string[] }
+      condition: { type: string; wake_words?: string[]; keywords?: string[] }
     }
     settings?: Record<string, any>
     env_vars?: Record<string, string>
@@ -464,19 +464,19 @@ export const chatApi = {
   getSession: (sessionId: string) => api.get(`/api/chat/sessions/${sessionId}`),
   updateSession: (sessionId: string, title: string) => api.put(`/api/chat/sessions/${sessionId}`, { title }),
   deleteSession: (sessionId: string) => api.delete(`/api/chat/sessions/${sessionId}`),
-  
+
   // Messages
   getMessages: (sessionId: string, limit = 100) => api.get(`/api/chat/sessions/${sessionId}/messages`, { params: { limit } }),
-  
+
   // Memory extraction
   extractMemories: (sessionId: string) => api.post(`/api/chat/sessions/${sessionId}/extract-memories`),
-  
+
   // Statistics
   getStatistics: () => api.get('/api/chat/statistics'),
-  
+
   // Health check
   getHealth: () => api.get('/api/chat/health'),
-  
+
   // Streaming chat — OpenAI-compatible completions endpoint
   sendMessage: (message: string, sessionId?: string, includeObsidianMemory?: boolean) => {
     const requestBody: Record<string, unknown> = {
diff --git a/tests/Makefile b/tests/Makefile
index b4b3e1b2..3131bc09 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -218,18 +218,22 @@ logs: containers-logs
 # ============================================================================
 
 # Full workflow: start containers + run all tests
-# If CONFIG is specified and differs from running containers, recreates them
+# Auto-detects config mismatch and recreates containers if needed
 test:
-	@if docker compose -f ../backends/advanced/docker-compose-test.yml ps chronicle-backend-test 2>/dev/null | grep -q "Up"; then \
-		echo "ℹ️  Containers already running"; \
-		if [ "$(CONFIG)" != "" ]; then \
-			echo "🔄 CONFIG specified - will recreate containers to apply new config"; \
+	@RUNNING_CONFIG=$$(docker inspect --format '{{range .Config.Env}}{{println .}}{{end}}' \
+		$$(docker compose -f ../backends/advanced/docker-compose-test.yml ps -q chronicle-backend-test 2>/dev/null) 2>/dev/null \
+		| grep '^CONFIG_FILE=' | cut -d= -f2); \
+	if [ -n "$$RUNNING_CONFIG" ]; then \
+		if [ "$$RUNNING_CONFIG" != "$(TEST_CONFIG_FILE)" ]; then \
+			echo "⚠️  Container config mismatch: running=$$RUNNING_CONFIG expected=$(TEST_CONFIG_FILE)"; \
+			echo "🔄 Recreating containers with correct config..."; \
 			$(MAKE) containers-stop; \
 			$(MAKE) containers-start; \
 		else \
-			echo "✅ Using existing containers (use CONFIG=... to switch config)"; \
-		fi \
+			echo "✅ Containers running with correct config: $$RUNNING_CONFIG"; \
+		fi; \
 	else \
+		echo "🚀 Starting containers..."; \
 		$(MAKE) containers-start; \
 	fi
 	@$(MAKE) all
diff --git a/tests/integration/websocket_streaming_tests.robot b/tests/integration/websocket_streaming_tests.robot
index 5161b816..52160517 100644
--- a/tests/integration/websocket_streaming_tests.robot
+++ b/tests/integration/websocket_streaming_tests.robot
@@ -73,7 +73,9 @@ Conversation Job Created After Speech Detection
 
     # Wait for open_conversation job to be created (transcription + speech analysis takes time)
     # Deepgram/OpenAI API calls + job started can take 30-60s with queue
-    Wait Until Keyword Succeeds    60s    3s
+    # Use 120s timeout: previous test's stream cleanup can trigger post-conversation jobs
+    # that occupy workers, delaying this test's speech detection pipeline
+    Wait Until Keyword Succeeds    120s    3s
     ...    Job Type Exists For Client    open_conversation    ${client_id}
 
     Log To Console    Open conversation job created after speech detection
diff --git a/tests/integration/websocket_transcription_e2e_test.robot b/tests/integration/websocket_transcription_e2e_test.robot
index 951f3aa7..bb7547c2 100644
--- a/tests/integration/websocket_transcription_e2e_test.robot
+++ b/tests/integration/websocket_transcription_e2e_test.robot
@@ -192,22 +192,19 @@ Stream Close Sends End Marker To Redis Stream
     # Close stream - this MUST send end_marker
     Close Audio Stream    ${stream_id}
 
-    # Allow time for end_marker to be written
-    Sleep    2s
-
-    # Read all messages from audio stream to find end_marker
-    # Note: Redis Command returns string output from redis-cli, not a list
-    ${xrange_output}=    Redis Command    XRANGE    ${audio_stream_name}    -    +
-
-    # Search for end_marker in the redis-cli output string
-    # redis-cli XRANGE returns text with field names, so we just check if end_marker appears
-    ${found_end_marker}=    Run Keyword And Return Status
-    ...    Should Contain    ${xrange_output}    end_marker
-    ...    ignore_case=True
+    # The audio stream is intentionally deleted ~1.2s after close by _try_delete_finished_stream(),
+    # so we can't rely on XRANGE to find end_marker. Instead, verify via transcription:complete
+    # which is a durable key (5-min TTL) set by StreamingTranscriptionConsumer.end_session_stream()
+    # only AFTER it processes the end_marker.
+    ${completion_key}=    Set Variable    transcription:complete:${client_id}
+    Wait Until Keyword Succeeds    30s    1s
+    ...    Verify Redis Key Exists    ${completion_key}
 
-    Should Be True    ${found_end_marker}    end_marker NOT found in Redis stream ${audio_stream_name}! Producer.finalize_session() did not send end_marker. XRANGE output: ${xrange_output}
+    ${signal_value}=    Redis Command    GET    ${completion_key}
+    Should Be Equal As Strings    ${signal_value}    1
+    ...    Completion signal should be "1" (clean close), got: ${signal_value}
 
-    Log    ✅ end_marker successfully sent to Redis stream
+    Log    ✅ end_marker was processed by streaming consumer (transcription:complete=${signal_value})
 
 
 Streaming Consumer Closes Deepgram Connection On End Marker
diff --git a/wizard.py b/wizard.py
index 784fdfe4..78b8ae3d 100755
--- a/wizard.py
+++ b/wizard.py
@@ -1022,12 +1022,16 @@ def main():
         )
         console.print()
 
-        # Always prompt for Neo4j password (masked input)
+        # Prompt for Neo4j password (remembers previous value on re-run)
         try:
-            console.print(
-                "Neo4j password (min 8 chars) [leave empty for default: neo4jpassword]"
+            neo4j_password = prompt_with_existing_masked(
+                "Neo4j password (min 8 chars)",
+                env_file_path="backends/advanced/.env",
+                env_key="NEO4J_PASSWORD",
+                placeholders=["", "your-neo4j-password"],
+                is_password=True,
+                default="neo4jpassword",
             )
-            neo4j_password = prompt_password("Neo4j password", min_length=8)
         except (EOFError, KeyboardInterrupt):
             neo4j_password = "neo4jpassword"
             console.print("Using default password")

From de0aa9bce904e811e8c3dfe541600cbf264780f7 Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Tue, 24 Feb 2026 19:31:16 +0000
Subject: [PATCH 2/4] Refactor Connect App integration and enhance System page
 functionality

- Removed the Connect App route from the main application structure to streamline navigation.
- Updated the Layout component to eliminate the Connect App link, improving the user interface.
- Enhanced the System page by adding a QR code feature for connecting the mobile app to the backend, improving user experience.
- Implemented clipboard functionality for easy copying of the backend URL, enhancing usability for users.
---
 backends/advanced/webui/src/App.tsx           | 10 +-
 .../webui/src/components/layout/Layout.tsx    |  4 +-
 backends/advanced/webui/src/pages/System.tsx  | 93 ++++++++++++++++++-
 3 files changed, 96 insertions(+), 11 deletions(-)

diff --git a/backends/advanced/webui/src/App.tsx b/backends/advanced/webui/src/App.tsx
index cbb03ab4..d1f1ff5c 100644
--- a/backends/advanced/webui/src/App.tsx
+++ b/backends/advanced/webui/src/App.tsx
@@ -23,7 +23,7 @@ const Queue = lazy(() => import('./pages/Queue'))
 const LiveRecord = lazy(() => import('./pages/LiveRecord'))
 const Plugins = lazy(() => import('./pages/Plugins'))
 const Finetuning = lazy(() => import('./pages/Finetuning'))
-const ConnectApp = lazy(() => import('./pages/ConnectApp'))
+
 
 function PageSkeleton() {
   return (
@@ -160,13 +160,7 @@ function App() {
                     </Suspense>
                   </PageErrorBoundary>
                 } />
-                <Route path="connect-app" element={
-                  <PageErrorBoundary>
-                    <Suspense fallback={<PageSkeleton />}>
-                      <ConnectApp />
-                    </Suspense>
-                  </PageErrorBoundary>
-                } />
+
               </Route>
             </Routes>
             </Router>
diff --git a/backends/advanced/webui/src/components/layout/Layout.tsx b/backends/advanced/webui/src/components/layout/Layout.tsx
index a182e033..1dd65c60 100644
--- a/backends/advanced/webui/src/components/layout/Layout.tsx
+++ b/backends/advanced/webui/src/components/layout/Layout.tsx
@@ -1,5 +1,5 @@
 import { Link, useLocation, Outlet } from 'react-router-dom'
-import { Music, MessageSquare, MessageCircle, Brain, Users, Upload, Settings, LogOut, Sun, Moon, Shield, Radio, Layers, Puzzle, Zap, Activity, Smartphone } from 'lucide-react'
+import { Music, MessageSquare, MessageCircle, Brain, Users, Upload, Settings, LogOut, Sun, Moon, Shield, Radio, Layers, Puzzle, Zap, Activity } from 'lucide-react'
 import { useAuth } from '../../contexts/AuthContext'
 import { useTheme } from '../../contexts/ThemeContext'
 import GlobalRecordingIndicator from './GlobalRecordingIndicator'
@@ -16,7 +16,7 @@ export default function Layout() {
     { path: '/conversations', label: 'Conversations', icon: MessageSquare },
     { path: '/memories', label: 'Memories', icon: Brain },
     { path: '/users', label: 'User Management', icon: Users },
-    { path: '/connect-app', label: 'Connect App', icon: Smartphone },
+
     ...(isAdmin ? [
       { path: '/upload', label: 'Upload Audio', icon: Upload },
       { path: '/queue', label: 'Queue & Events', icon: Layers },
diff --git a/backends/advanced/webui/src/pages/System.tsx b/backends/advanced/webui/src/pages/System.tsx
index d06602de..785a8c2e 100644
--- a/backends/advanced/webui/src/pages/System.tsx
+++ b/backends/advanced/webui/src/pages/System.tsx
@@ -1,9 +1,42 @@
 import { useState, useEffect, useRef, useCallback } from 'react'
-import { Activity, RefreshCw, CheckCircle, XCircle, AlertCircle, Users, Database, Server, MoreVertical, RotateCcw, Power } from 'lucide-react'
+import { Activity, RefreshCw, CheckCircle, XCircle, AlertCircle, Users, Database, Server, MoreVertical, RotateCcw, Power, Smartphone, Copy, Check } from 'lucide-react'
+import { QRCodeSVG } from 'qrcode.react'
 import { useAuth } from '../contexts/AuthContext'
+import { useTheme } from '../contexts/ThemeContext'
 import { useSystemData, useRestartWorkers, useRestartBackend } from '../hooks/useSystem'
 import { systemApi } from '../services/api'
 
+function getBackendHttpUrl(): string {
+  const { protocol, hostname, port } = window.location
+
+  const isStandardPort =
+    (protocol === 'https:' && (port === '' || port === '443')) ||
+    (protocol === 'http:' && (port === '' || port === '80'))
+
+  const basePath = import.meta.env.BASE_URL
+  if (isStandardPort && basePath && basePath !== '/') {
+    return `${protocol}//${hostname}`
+  }
+
+  if (import.meta.env.VITE_BACKEND_URL) {
+    const url = import.meta.env.VITE_BACKEND_URL as string
+    if (url.startsWith('/') || url === '') {
+      return `${protocol}//${hostname}${port ? `:${port}` : ''}`
+    }
+    return url
+  }
+
+  if (isStandardPort) {
+    return `${protocol}//${hostname}`
+  }
+
+  if (port === '5173') {
+    return `${protocol}//${hostname}:8000`
+  }
+
+  return `${protocol}//${hostname}${port ? `:${port}` : ''}`
+}
+
 interface ServiceStatus {
   healthy: boolean
   message?: string
@@ -12,6 +45,26 @@ interface ServiceStatus {
 
 export default function System() {
   const { isAdmin } = useAuth()
+  const { isDark } = useTheme()
+  const [copied, setCopied] = useState(false)
+  const backendUrl = getBackendHttpUrl()
+
+  const handleCopyUrl = async () => {
+    try {
+      await navigator.clipboard.writeText(backendUrl)
+      setCopied(true)
+      setTimeout(() => setCopied(false), 2000)
+    } catch {
+      const textArea = document.createElement('textarea')
+      textArea.value = backendUrl
+      document.body.appendChild(textArea)
+      textArea.select()
+      document.execCommand('copy')
+      document.body.removeChild(textArea)
+      setCopied(true)
+      setTimeout(() => setCopied(false), 2000)
+    }
+  }
 
   // TanStack Query hooks for data fetching
   const { data: systemData, isLoading: loading, error: systemError, refetch: refetchSystem, dataUpdatedAt } = useSystemData(isAdmin)
@@ -692,6 +745,44 @@ export default function System() {
         )}
       </div>
 
+      {/* Connect App */}
+      <div className="mt-6 bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-6">
+        <div className="flex items-center space-x-3 mb-3">
+          <Smartphone className="h-5 w-5 text-blue-600" />
+          <h3 className="text-lg font-semibold text-gray-900 dark:text-gray-100">Connect App</h3>
+        </div>
+        <p className="text-sm text-gray-600 dark:text-gray-400 mb-4">
+          Scan this QR code with the Chronicle mobile app to connect it to your backend.
+        </p>
+        <div className="flex flex-col items-center space-y-4">
+          <div className="p-4 bg-white rounded-xl shadow-sm border border-gray-200 dark:border-gray-600">
+            <QRCodeSVG
+              value={backendUrl}
+              size={200}
+              level="M"
+              fgColor={isDark ? '#1f2937' : '#111827'}
+              bgColor="#ffffff"
+            />
+          </div>
+          <div className="flex items-center space-x-2">
+            <code className="px-3 py-1.5 bg-gray-100 dark:bg-gray-700 rounded text-sm text-gray-800 dark:text-gray-200 font-mono">
+              {backendUrl}
+            </code>
+            <button
+              onClick={handleCopyUrl}
+              className="p-2 rounded-lg hover:bg-gray-100 dark:hover:bg-gray-700 transition-colors text-gray-600 dark:text-gray-300"
+              title="Copy URL"
+            >
+              {copied ? (
+                <Check className="h-4 w-4 text-green-500" />
+              ) : (
+                <Copy className="h-4 w-4" />
+              )}
+            </button>
+          </div>
+        </div>
+      </div>
+
       {/* Raw Data (Debug) */}
       {readinessData && (
         <div className="mt-6 bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-6">

From 4559eda70d79b50720543b5712f48e05ab9fb349 Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Wed, 25 Feb 2026 18:34:19 +0530
Subject: [PATCH 3/4] Update application configuration and enhance device
 management features

- Changed app slug from "chronicle" to "friend-lite-app" in app.json for better branding.
- Added "ITSAppUsesNonExemptEncryption" key to app.json for compliance with App Store requirements.
- Introduced a new "reconnect_backoff" event type in ConnectionLogContext for improved connection handling.
- Enhanced diagnostics with a new reconnect backoff color in diagnostics.tsx.
- Updated device filtering logic in index.tsx to include "neo" devices, improving user experience.
- Added device type detection in DeviceListItem component to visually differentiate between device types.
- Implemented retry connection UI feedback in index.tsx to inform users during reconnection attempts.
- Updated useAutoReconnect hook to manage connection retry logic and backoff timing effectively.
- Added utility function for device type detection to streamline device management.
---
 app/app.json                                  |  10 +-
 app/app/diagnostics.tsx                       |   1 +
 app/app/index.tsx                             |  43 +++++-
 app/package-lock.json                         |  38 ++---
 app/src/components/DeviceListItem.tsx         |  18 +++
 app/src/contexts/ConnectionLogContext.tsx     |   1 +
 app/src/hooks/useAudioStreamer.ts             |   1 +
 .../hooks/useAudioStreamingOrchestrator.ts    |   8 +-
 app/src/hooks/useAutoReconnect.ts             | 139 +++++++++++++++++-
 app/src/utils/deviceType.ts                   |   8 +
 10 files changed, 232 insertions(+), 35 deletions(-)
 create mode 100644 app/src/utils/deviceType.ts

diff --git a/app/app.json b/app/app.json
index d2bf04ec..237eab68 100644
--- a/app/app.json
+++ b/app/app.json
@@ -1,7 +1,7 @@
 {
   "expo": {
     "name": "chronicle",
-    "slug": "chronicle",
+    "slug": "friend-lite-app",
     "version": "1.0.0",
     "scheme": "chronicle",
     "orientation": "portrait",
@@ -24,7 +24,8 @@
         "NSAppTransportSecurity": {
           "NSAllowsArbitraryLoads": true,
           "NSAllowsLocalNetworking": true
-        }
+        },
+        "ITSAppUsesNonExemptEncryption": false
       }
     },
     "android": {
@@ -55,7 +56,9 @@
           "enableNotifications": true,
           "enableBackgroundAudio": true,
           "enableDeviceDetection": true,
-          "iosBackgroundModes": { "useProcessing": true },
+          "iosBackgroundModes": {
+            "useProcessing": true
+          },
           "iosConfig": {
             "microphoneUsageDescription": "We use the mic for live audio streaming"
           }
@@ -107,6 +110,7 @@
       "expo-image-picker",
       "./plugins/with-ats"
     ],
+    "owner": "cupbearer5517",
     "extra": {
       "eas": {
         "projectId": "05d8598e-6fe7-4373-81e4-1654f3d8e181"
diff --git a/app/app/diagnostics.tsx b/app/app/diagnostics.tsx
index 18ade000..c8c0d59a 100644
--- a/app/app/diagnostics.tsx
+++ b/app/app/diagnostics.tsx
@@ -17,6 +17,7 @@ const EVENT_BADGE_COLORS: Record<ConnectionEventType, string> = {
   error: '#FF3B30',
   health_ping: '#34C759',
   reconnect_attempt: '#FF9500',
+  reconnect_backoff: '#FF9500',
   bt_state_change: '#5856D6',
 };
 
diff --git a/app/app/index.tsx b/app/app/index.tsx
index 636e475e..1c4dc58e 100644
--- a/app/app/index.tsx
+++ b/app/app/index.tsx
@@ -117,16 +117,17 @@ export default function App() {
 
   const canScan = React.useMemo(() => (
     permissionGranted && bluetoothState === BluetoothState.PoweredOn &&
-    !autoReconnect.isAttemptingAutoReconnect && !deviceConnection.isConnecting &&
+    !autoReconnect.isAttemptingAutoReconnect && !autoReconnect.isRetryingConnection &&
+    !deviceConnection.isConnecting &&
     !deviceConnection.connectedDeviceId &&
     (autoReconnect.triedAutoReconnectForCurrentId || !autoReconnect.lastKnownDeviceId)
-  ), [permissionGranted, bluetoothState, autoReconnect.isAttemptingAutoReconnect, deviceConnection.isConnecting, deviceConnection.connectedDeviceId, autoReconnect.triedAutoReconnectForCurrentId, autoReconnect.lastKnownDeviceId]);
+  ), [permissionGranted, bluetoothState, autoReconnect.isAttemptingAutoReconnect, autoReconnect.isRetryingConnection, deviceConnection.isConnecting, deviceConnection.connectedDeviceId, autoReconnect.triedAutoReconnectForCurrentId, autoReconnect.lastKnownDeviceId]);
 
   const filteredDevices = React.useMemo(() => {
     if (!showOnlyOmi) return scannedDevices;
     return scannedDevices.filter(d => {
       const name = d.name?.toLowerCase() || '';
-      return name.includes('omi') || name.includes('friend');
+      return name.includes('omi') || name.includes('friend') || name.includes('neo');
     });
   }, [scannedDevices, showOnlyOmi]);
 
@@ -190,6 +191,21 @@ export default function App() {
           <BluetoothStatusBanner bluetoothState={bluetoothState} isPermissionsLoading={isPermissionsLoading} permissionGranted={permissionGranted} onRequestPermission={requestBluetoothPermission} />
           <ScanControls scanning={scanning} onScanPress={startScan} onStopScanPress={stopDeviceScanAction} canScan={canScan} />
 
+          {autoReconnect.isRetryingConnection && (
+            <View style={s.retryBanner}>
+              <ActivityIndicator size="small" color={colors.warning} />
+              <Text style={s.retryBannerText}>
+                Reconnecting in {autoReconnect.retryBackoffSeconds}s... (attempt {autoReconnect.connectionRetryCount})
+              </Text>
+              <TouchableOpacity
+                style={[s.button, { backgroundColor: colors.danger, paddingVertical: 6, paddingHorizontal: 10 }]}
+                onPress={autoReconnect.handleCancelAutoReconnect}
+              >
+                <Text style={s.buttonText}>Cancel</Text>
+              </TouchableOpacity>
+            </View>
+          )}
+
           {!settings.isAuthenticated && (
             <View style={s.authWarning}>
               <Text style={s.authWarningText}>Login is required for advanced backend features. Simple backend can be used without authentication.</Text>
@@ -201,7 +217,7 @@ export default function App() {
               <View style={s.sectionHeaderWithFilter}>
                 <Text style={s.sectionTitle}>Found Devices</Text>
                 <View style={s.filterContainer}>
-                  <Text style={s.filterText}>Show only OMI/Friend</Text>
+                  <Text style={s.filterText}>Show only OMI/Friend/Neo</Text>
                   <Switch
                     trackColor={{ false: colors.disabled, true: colors.primary }}
                     thumbColor={showOnlyOmi ? colors.warning : colors.card}
@@ -222,7 +238,7 @@ export default function App() {
               ) : (
                 <View style={s.noDevicesContainer}>
                   <Text style={s.noDevicesText}>
-                    {showOnlyOmi ? `No OMI/Friend devices found. ${scannedDevices.length} other device(s) hidden by filter.` : 'No devices found.'}
+                    {showOnlyOmi ? `No OMI/Friend/Neo devices found. ${scannedDevices.length} other device(s) hidden by filter.` : 'No devices found.'}
                   </Text>
                 </View>
               )}
@@ -408,6 +424,23 @@ const createStyles = (colors: ThemeColors) => StyleSheet.create({
     textAlign: 'center',
     fontStyle: 'italic',
   },
+  retryBanner: {
+    flexDirection: 'row',
+    alignItems: 'center',
+    padding: 12,
+    marginBottom: 15,
+    backgroundColor: colors.card,
+    borderRadius: 8,
+    borderWidth: 1,
+    borderColor: colors.warning,
+  },
+  retryBannerText: {
+    flex: 1,
+    marginLeft: 10,
+    fontSize: 14,
+    color: colors.warning,
+    fontWeight: '500',
+  },
   authWarning: {
     marginBottom: 20,
     padding: 15,
diff --git a/app/package-lock.json b/app/package-lock.json
index c4ceb0c8..27c99921 100644
--- a/app/package-lock.json
+++ b/app/package-lock.json
@@ -82,6 +82,7 @@
       "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.28.4.tgz",
       "integrity": "sha512-2BCOP7TN8M+gVDj7/ht3hsaO/B/n5oDbiAyyvnRlNOs+u1o+JWNYTQrmpuNp1/Wq2gcFrI01JAW+paEKDMx/CA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@babel/code-frame": "^7.27.1",
         "@babel/generator": "^7.28.3",
@@ -2814,6 +2815,7 @@
       "integrity": "sha512-Q7UnBqOO/JsWfgmO9qZjrKgMi/0U9ih0FywXXheml8VH1hn/pBXKIeO/BvzA6g5gHIvBZ/6KyhdGoNok1R/ZJw==",
       "devOptional": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@react-native-community/cli-clean": "20.0.1",
         "@react-native-community/cli-config": "20.0.1",
@@ -3429,6 +3431,7 @@
       "resolved": "https://registry.npmjs.org/@react-navigation/native/-/native-7.1.28.tgz",
       "integrity": "sha512-d1QDn+KNHfHGt3UIwOZvupvdsDdiHYZBEj7+wL2yDVo3tMezamYy60H9s3EnNVE1Ae1ty0trc7F2OKqo/RmsdQ==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@react-navigation/core": "^7.14.0",
         "escape-string-regexp": "^4.0.0",
@@ -3624,6 +3627,7 @@
       "integrity": "sha512-ixLZ7zG7j1fM0DijL9hDArwhwcCb4vqmePgwtV0GfnkHRSCUEv4LvzarcTdhoqgyMznUx/EhoTUv31CKZzkQlw==",
       "devOptional": true,
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "csstype": "^3.0.2"
       }
@@ -3748,6 +3752,7 @@
       "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz",
       "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "fast-deep-equal": "^3.1.3",
         "fast-uri": "^3.0.1",
@@ -4331,6 +4336,7 @@
         }
       ],
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "caniuse-lite": "^1.0.30001737",
         "electron-to-chromium": "^1.5.211",
@@ -5286,6 +5292,7 @@
       "resolved": "https://registry.npmjs.org/expo/-/expo-53.0.22.tgz",
       "integrity": "sha512-sJ2I4W/e5iiM4u/wYCe3qmW4D7WPCRqByPDD0hJcdYNdjc9HFFFdO4OAudZVyC/MmtoWZEIH5kTJP1cw9FjzYA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@babel/runtime": "^7.20.0",
         "@expo/cli": "0.24.21",
@@ -5394,6 +5401,7 @@
       "resolved": "https://registry.npmjs.org/expo-constants/-/expo-constants-17.1.7.tgz",
       "integrity": "sha512-byBjGsJ6T6FrLlhOBxw4EaiMXrZEn/MlUYIj/JAd+FS7ll5X/S4qVRbIimSJtdW47hXMq0zxPfJX6njtA56hHA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@expo/config": "~11.0.12",
         "@expo/env": "~1.0.7"
@@ -5486,6 +5494,7 @@
       "resolved": "https://registry.npmjs.org/expo-font/-/expo-font-13.3.2.tgz",
       "integrity": "sha512-wUlMdpqURmQ/CNKK/+BIHkDA5nGjMqNlYmW0pJFXY/KE/OG80Qcavdu2sHsL4efAIiNGvYdBS10WztuQYU4X0A==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "fontfaceobserver": "^2.1.0"
       },
@@ -5536,7 +5545,6 @@
       "resolved": "https://registry.npmjs.org/expo-linking/-/expo-linking-8.0.11.tgz",
       "integrity": "sha512-+VSaNL5om3kOp/SSKO5qe6cFgfSIWnnQDSbA7XLs3ECkYzXRquk5unxNS3pg7eK5kNUmQ4kgLI7MhTggAEUBLA==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "expo-constants": "~18.0.12",
         "invariant": "^2.2.4"
@@ -5551,7 +5559,6 @@
       "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.10.4.tgz",
       "integrity": "sha512-vG6SvB6oYEhvgisZNFRmRCUkLz11c7rp+tbNTynGqc6mS1d5ATd/sGyV6W0KZZnXRKMTzZDRgQT3Ou9jhpAfUg==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@babel/highlight": "^7.10.4"
       }
@@ -5561,7 +5568,6 @@
       "resolved": "https://registry.npmjs.org/@expo/config/-/config-12.0.13.tgz",
       "integrity": "sha512-Cu52arBa4vSaupIWsF0h7F/Cg//N374nYb7HAxV0I4KceKA7x2UXpYaHOL7EEYYvp7tZdThBjvGpVmr8ScIvaQ==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@babel/code-frame": "~7.10.4",
         "@expo/config-plugins": "~54.0.4",
@@ -5583,7 +5589,6 @@
       "resolved": "https://registry.npmjs.org/@expo/config-plugins/-/config-plugins-54.0.4.tgz",
       "integrity": "sha512-g2yXGICdoOw5i3LkQSDxl2Q5AlQCrG7oniu0pCPPO+UxGb7He4AFqSvPSy8HpRUj55io17hT62FTjYRD+d6j3Q==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@expo/config-types": "^54.0.10",
         "@expo/json-file": "~10.0.8",
@@ -5605,15 +5610,13 @@
       "version": "54.0.10",
       "resolved": "https://registry.npmjs.org/@expo/config-types/-/config-types-54.0.10.tgz",
       "integrity": "sha512-/J16SC2an1LdtCZ67xhSkGXpALYUVUNyZws7v+PVsFZxClYehDSoKLqyRaGkpHlYrCc08bS0RF5E0JV6g50psA==",
-      "license": "MIT",
-      "peer": true
+      "license": "MIT"
     },
     "node_modules/expo-linking/node_modules/@expo/env": {
       "version": "2.0.8",
       "resolved": "https://registry.npmjs.org/@expo/env/-/env-2.0.8.tgz",
       "integrity": "sha512-5VQD6GT8HIMRaSaB5JFtOXuvfDVU80YtZIuUT/GDhUF782usIXY13Tn3IdDz1Tm/lqA9qnRZQ1BF4t7LlvdJPA==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "chalk": "^4.0.0",
         "debug": "^4.3.4",
@@ -5627,7 +5630,6 @@
       "resolved": "https://registry.npmjs.org/@expo/json-file/-/json-file-10.0.8.tgz",
       "integrity": "sha512-9LOTh1PgKizD1VXfGQ88LtDH0lRwq9lsTb4aichWTWSWqy3Ugfkhfm3BhzBIkJJfQQ5iJu3m/BoRlEIjoCGcnQ==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@babel/code-frame": "~7.10.4",
         "json5": "^2.2.3"
@@ -5638,7 +5640,6 @@
       "resolved": "https://registry.npmjs.org/@expo/plist/-/plist-0.4.8.tgz",
       "integrity": "sha512-pfNtErGGzzRwHP+5+RqswzPDKkZrx+Cli0mzjQaus1ZWFsog5ibL+nVT3NcporW51o8ggnt7x813vtRbPiyOrQ==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@xmldom/xmldom": "^0.8.8",
         "base64-js": "^1.2.3",
@@ -5650,7 +5651,6 @@
       "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz",
       "integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==",
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": "18 || 20 || >=22"
       }
@@ -5660,7 +5660,6 @@
       "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.3.tgz",
       "integrity": "sha512-fy6KJm2RawA5RcHkLa1z/ScpBeA762UF9KmZQxwIbDtRJrgLzM10depAiEQ+CXYcoiqW1/m96OAAoke2nE9EeA==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "balanced-match": "^4.0.2"
       },
@@ -5673,7 +5672,6 @@
       "resolved": "https://registry.npmjs.org/commander/-/commander-4.1.1.tgz",
       "integrity": "sha512-NOKm8xhkzAjzFx8B2v5OAHT+u5pRQc2UCa2Vq9jYL/31o2wi9mxBA7LIFs3sV5VSC49z6pEhfbMULvShKj26WA==",
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">= 6"
       }
@@ -5683,7 +5681,6 @@
       "resolved": "https://registry.npmjs.org/expo-constants/-/expo-constants-18.0.13.tgz",
       "integrity": "sha512-FnZn12E1dRYKDHlAdIyNFhBurKTS3F9CrfrBDJI5m3D7U17KBHMQ6JEfYlSj7LG7t+Ulr+IKaj58L1k5gBwTcQ==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@expo/config": "~12.0.13",
         "@expo/env": "~2.0.8"
@@ -5698,7 +5695,6 @@
       "resolved": "https://registry.npmjs.org/glob/-/glob-13.0.6.tgz",
       "integrity": "sha512-Wjlyrolmm8uDpm/ogGyXZXb1Z+Ca2B8NbJwqBVg0axK9GbBeoS7yGV6vjXnYdGm6X53iehEuxxbyiKp8QmN4Vw==",
       "license": "BlueOak-1.0.0",
-      "peer": true,
       "dependencies": {
         "minimatch": "^10.2.2",
         "minipass": "^7.1.3",
@@ -5716,7 +5712,6 @@
       "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.2.6.tgz",
       "integrity": "sha512-ESL2CrkS/2wTPfuend7Zhkzo2u0daGJ/A2VucJOgQ/C48S/zB8MMeMHSGKYpXhIjbPxfuezITkaBH1wqv00DDQ==",
       "license": "BlueOak-1.0.0",
-      "peer": true,
       "engines": {
         "node": "20 || >=22"
       }
@@ -5726,7 +5721,6 @@
       "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.2.tgz",
       "integrity": "sha512-+G4CpNBxa5MprY+04MbgOw1v7So6n5JY166pFi9KfYwT78fxScCeSNQSNzp6dpPSW2rONOps6Ocam1wFhCgoVw==",
       "license": "BlueOak-1.0.0",
-      "peer": true,
       "dependencies": {
         "brace-expansion": "^5.0.2"
       },
@@ -5742,7 +5736,6 @@
       "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-2.0.2.tgz",
       "integrity": "sha512-3O/iVVsJAPsOnpwWIeD+d6z/7PmqApyQePUtCndjatj/9I5LylHvt5qluFaBT3I5h3r1ejfR056c+FCv+NnNXg==",
       "license": "BlueOak-1.0.0",
-      "peer": true,
       "dependencies": {
         "lru-cache": "^11.0.0",
         "minipass": "^7.1.2"
@@ -5759,7 +5752,6 @@
       "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz",
       "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==",
       "license": "ISC",
-      "peer": true,
       "bin": {
         "semver": "bin/semver.js"
       },
@@ -5772,7 +5764,6 @@
       "resolved": "https://registry.npmjs.org/sucrase/-/sucrase-3.35.1.tgz",
       "integrity": "sha512-DhuTmvZWux4H1UOnWMB3sk0sbaCVOoQZjv8u1rDoTV0HTdGem9hkAZtl4JZy8P2z4Bg0nT+YMeOFyVr4zcG5Tw==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@jridgewell/gen-mapping": "^0.3.2",
         "commander": "^4.0.0",
@@ -6020,7 +6011,6 @@
       "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz",
       "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==",
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=12.0.0"
       },
@@ -8698,6 +8688,7 @@
       "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-3.0.1.tgz",
       "integrity": "sha512-I3EurrIQMlRc9IaAZnqRR044Phh2DXY+55o7uJ0V+hYZAcQYSuFWsc9q5PvyDHUSCe1Qxn/iBz+78s86zWnGag==",
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">=10"
       },
@@ -8986,6 +8977,7 @@
       "resolved": "https://registry.npmjs.org/react/-/react-19.0.0.tgz",
       "integrity": "sha512-V8AVnmPIICiWpGfm6GLzCR/W5FXLchHop40W4nXBmdlEceh16rCN8O8LNWm5bh5XUX91fh7KpA+W0TgMKmgTpQ==",
       "license": "MIT",
+      "peer": true,
       "engines": {
         "node": ">=0.10.0"
       }
@@ -9050,6 +9042,7 @@
       "resolved": "https://registry.npmjs.org/react-native/-/react-native-0.79.6.tgz",
       "integrity": "sha512-kvIWSmf4QPfY41HC25TR285N7Fv0Pyn3DAEK8qRL9dA35usSaxsJkHfw+VqnonqJjXOaoKCEanwudRAJ60TBGA==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "@jest/create-cache-key-function": "^29.7.0",
         "@react-native/assets-registry": "0.79.6",
@@ -9148,6 +9141,7 @@
       "resolved": "https://registry.npmjs.org/react-native-safe-area-context/-/react-native-safe-area-context-5.4.0.tgz",
       "integrity": "sha512-JaEThVyJcLhA+vU0NU8bZ0a1ih6GiF4faZ+ArZLqpYbL6j7R3caRqj+mE3lEtKCuHgwjLg3bCxLL1GPUJZVqUA==",
       "license": "MIT",
+      "peer": true,
       "peerDependencies": {
         "react": "*",
         "react-native": "*"
@@ -9158,6 +9152,7 @@
       "resolved": "https://registry.npmjs.org/react-native-screens/-/react-native-screens-4.11.1.tgz",
       "integrity": "sha512-F0zOzRVa3ptZfLpD0J8ROdo+y1fEPw+VBFq1MTY/iyDu08al7qFUO5hLMd+EYMda5VXGaTFCa8q7bOppUszhJw==",
       "license": "MIT",
+      "peer": true,
       "dependencies": {
         "react-freeze": "^1.0.0",
         "react-native-is-edge-to-edge": "^1.1.7",
@@ -10567,7 +10562,6 @@
       "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
       "integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==",
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "fdir": "^6.5.0",
         "picomatch": "^4.0.3"
@@ -10584,7 +10578,6 @@
       "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
       "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=12"
       },
@@ -10663,6 +10656,7 @@
       "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
       "dev": true,
       "license": "Apache-2.0",
+      "peer": true,
       "bin": {
         "tsc": "bin/tsc",
         "tsserver": "bin/tsserver"
diff --git a/app/src/components/DeviceListItem.tsx b/app/src/components/DeviceListItem.tsx
index 33409b88..573dc30f 100644
--- a/app/src/components/DeviceListItem.tsx
+++ b/app/src/components/DeviceListItem.tsx
@@ -3,6 +3,7 @@ import { View, Text, TouchableOpacity, StyleSheet } from 'react-native';
 import { OmiDevice } from 'friend-lite-react-native';
 import { useTheme, ThemeColors } from '../theme';
 import SignalStrength from './SignalStrength';
+import { detectDeviceType } from '../utils/deviceType';
 
 interface DeviceListItemProps {
   device: OmiDevice;
@@ -23,12 +24,18 @@ export const DeviceListItem: React.FC<DeviceListItemProps> = ({
   const s = createStyles(colors);
   const isThisDeviceConnected = connectedDeviceId === device.id;
   const isAnotherDeviceConnected = connectedDeviceId !== null && connectedDeviceId !== device.id;
+  const deviceType = detectDeviceType(device.name);
 
   return (
     <View style={s.deviceItem}>
       <View style={s.deviceInfoContainer}>
         <View style={{ flexDirection: 'row', alignItems: 'center' }}>
           <Text style={s.deviceName}>{device.name || 'Unknown Device'}</Text>
+          {deviceType !== 'unknown' && (
+            <View style={[s.deviceTypeBadge, { backgroundColor: deviceType === 'neo' ? colors.warning : colors.primary }]}>
+              <Text style={s.deviceTypeBadgeText}>{deviceType === 'neo' ? 'Neo' : 'OMI'}</Text>
+            </View>
+          )}
           <SignalStrength rssi={device.rssi} />
         </View>
         <Text style={s.deviceInfo}>ID: {device.id}</Text>
@@ -85,6 +92,17 @@ const createStyles = (colors: ThemeColors) => StyleSheet.create({
     color: colors.textSecondary,
     marginTop: 2,
   },
+  deviceTypeBadge: {
+    marginLeft: 6,
+    paddingHorizontal: 6,
+    paddingVertical: 2,
+    borderRadius: 4,
+  },
+  deviceTypeBadgeText: {
+    color: 'white',
+    fontSize: 10,
+    fontWeight: '700',
+  },
   button: {
     backgroundColor: colors.primary,
     paddingVertical: 12,
diff --git a/app/src/contexts/ConnectionLogContext.tsx b/app/src/contexts/ConnectionLogContext.tsx
index 8493cc90..dc9314c4 100644
--- a/app/src/contexts/ConnectionLogContext.tsx
+++ b/app/src/contexts/ConnectionLogContext.tsx
@@ -24,6 +24,7 @@ export type ConnectionEventType =
   | 'error'
   | 'health_ping'
   | 'reconnect_attempt'
+  | 'reconnect_backoff'
   | 'bt_state_change';
 
 const MAX_EVENTS = 200;
diff --git a/app/src/hooks/useAudioStreamer.ts b/app/src/hooks/useAudioStreamer.ts
index cfd86fd7..f22371c0 100644
--- a/app/src/hooks/useAudioStreamer.ts
+++ b/app/src/hooks/useAudioStreamer.ts
@@ -27,6 +27,7 @@ const AUDIO_FORMAT = {
   rate: 16000,
   width: 2,
   channels: 1,
+  mode: 'streaming',
 };
 
 /** -------------------- Foreground Service helpers (NEW) -------------------- */
diff --git a/app/src/hooks/useAudioStreamingOrchestrator.ts b/app/src/hooks/useAudioStreamingOrchestrator.ts
index 43e78511..bff49ee5 100644
--- a/app/src/hooks/useAudioStreamingOrchestrator.ts
+++ b/app/src/hooks/useAudioStreamingOrchestrator.ts
@@ -46,8 +46,14 @@ export const useAudioStreamingOrchestrator = ({
 
   const buildWebSocketUrl = useCallback((baseUrl: string): string => {
     let url = baseUrl.trim();
-    const isAdvanced = settings.jwtToken && settings.isAuthenticated;
+    url = url.replace(/^http:/, 'ws:').replace(/^https:/, 'wss:');
+    if (!url.includes('/ws')) url = url.replace(/\/$/, '') + '/ws';
+    if (!url.includes('codec=')) {
+      const sep = url.includes('?') ? '&' : '?';
+      url = url + sep + 'codec=opus';
+    }
 
+    const isAdvanced = settings.jwtToken && settings.isAuthenticated;
     if (isAdvanced) {
       const params = new URLSearchParams();
       params.append('token', settings.jwtToken!);
diff --git a/app/src/hooks/useAutoReconnect.ts b/app/src/hooks/useAutoReconnect.ts
index c2581b99..c45a1b62 100644
--- a/app/src/hooks/useAutoReconnect.ts
+++ b/app/src/hooks/useAutoReconnect.ts
@@ -1,8 +1,12 @@
-import { useState, useEffect, useCallback } from 'react';
+import { useState, useEffect, useCallback, useRef } from 'react';
 import { State as BluetoothState } from 'react-native-ble-plx';
 import { saveLastConnectedDeviceId, getLastConnectedDeviceId } from '../utils/storage';
 import { useConnectionLog } from '../contexts/ConnectionLogContext';
 
+const BACKOFF_INITIAL = 10000;   // 10s
+const BACKOFF_MAX = 300000;      // 5 min
+const MIN_HEALTHY_DURATION = 30000; // 30s
+
 interface UseAutoReconnectParams {
   bluetoothState: BluetoothState;
   permissionGranted: boolean;
@@ -19,6 +23,9 @@ export interface AutoReconnectState {
   lastKnownDeviceId: string | null;
   isAttemptingAutoReconnect: boolean;
   triedAutoReconnectForCurrentId: boolean;
+  isRetryingConnection: boolean;
+  retryBackoffSeconds: number;
+  connectionRetryCount: number;
   setLastKnownDeviceId: (id: string | null) => void;
   setTriedAutoReconnectForCurrentId: (tried: boolean) => void;
   handleCancelAutoReconnect: () => Promise<void>;
@@ -35,6 +42,35 @@ export const useAutoReconnect = ({
   const [triedAutoReconnectForCurrentId, setTriedAutoReconnectForCurrentId] = useState(false);
   const { addEvent } = useConnectionLog();
 
+  // Retry / backoff state
+  const [isRetryingConnection, setIsRetryingConnection] = useState(false);
+  const [retryBackoffSeconds, setRetryBackoffSeconds] = useState(0);
+  const [connectionRetryCount, setConnectionRetryCount] = useState(0);
+  const backoffMsRef = useRef(0);
+  const connectionStartTimeRef = useRef<number | null>(null);
+  const retryTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
+  const countdownTimerRef = useRef<ReturnType<typeof setInterval> | null>(null);
+  const prevConnectedRef = useRef<string | null>(null);
+
+  const clearRetryTimers = useCallback(() => {
+    if (retryTimerRef.current) {
+      clearTimeout(retryTimerRef.current);
+      retryTimerRef.current = null;
+    }
+    if (countdownTimerRef.current) {
+      clearInterval(countdownTimerRef.current);
+      countdownTimerRef.current = null;
+    }
+    setIsRetryingConnection(false);
+    setRetryBackoffSeconds(0);
+  }, []);
+
+  const resetBackoff = useCallback(() => {
+    backoffMsRef.current = 0;
+    setConnectionRetryCount(0);
+    clearRetryTimers();
+  }, [clearRetryTimers]);
+
   // Load last device on mount
   useEffect(() => {
     const load = async () => {
@@ -50,7 +86,88 @@ export const useAutoReconnect = ({
     load();
   }, []);
 
-  // Auto-reconnect effect
+  // Track connection start/end for backoff calculation
+  useEffect(() => {
+    const currentConnected = deviceConnection.connectedDeviceId;
+    const prevConnected = prevConnectedRef.current;
+    prevConnectedRef.current = currentConnected;
+
+    // Connection established
+    if (currentConnected && !prevConnected) {
+      connectionStartTimeRef.current = Date.now();
+      clearRetryTimers();
+      return;
+    }
+
+    // Connection lost (unexpected disconnect)
+    if (!currentConnected && prevConnected && lastKnownDeviceId) {
+      const startTime = connectionStartTimeRef.current;
+      connectionStartTimeRef.current = null;
+      const duration = startTime ? Date.now() - startTime : 0;
+
+      if (duration >= MIN_HEALTHY_DURATION) {
+        // Healthy connection — reset backoff
+        backoffMsRef.current = 0;
+        setConnectionRetryCount(0);
+      } else {
+        // Quick failure — increase backoff
+        if (backoffMsRef.current === 0) {
+          backoffMsRef.current = BACKOFF_INITIAL;
+        } else {
+          backoffMsRef.current = Math.min(backoffMsRef.current * 2, BACKOFF_MAX);
+        }
+      }
+
+      const delay = backoffMsRef.current;
+      const deviceId = lastKnownDeviceId;
+      addEvent('reconnect_backoff', `Scheduling retry in ${delay / 1000}s (device: ${deviceId})`, { deviceId });
+
+      setIsRetryingConnection(true);
+      setRetryBackoffSeconds(Math.ceil(delay / 1000));
+      setConnectionRetryCount(c => c + 1);
+
+      // Countdown timer for UI
+      const countdownEnd = Date.now() + delay;
+      countdownTimerRef.current = setInterval(() => {
+        const remaining = Math.max(0, Math.ceil((countdownEnd - Date.now()) / 1000));
+        setRetryBackoffSeconds(remaining);
+        if (remaining <= 0 && countdownTimerRef.current) {
+          clearInterval(countdownTimerRef.current);
+          countdownTimerRef.current = null;
+        }
+      }, 1000);
+
+      // Schedule reconnect
+      retryTimerRef.current = setTimeout(async () => {
+        retryTimerRef.current = null;
+        if (countdownTimerRef.current) {
+          clearInterval(countdownTimerRef.current);
+          countdownTimerRef.current = null;
+        }
+
+        if (!deviceId) {
+          setIsRetryingConnection(false);
+          return;
+        }
+
+        setIsAttemptingAutoReconnect(true);
+        setIsRetryingConnection(false);
+        setRetryBackoffSeconds(0);
+        addEvent('reconnect_attempt', `Retrying connection to ${deviceId} (attempt ${connectionRetryCount})`, { deviceId });
+
+        try {
+          await deviceConnection.connectToDevice(deviceId);
+        } catch (error) {
+          console.error(`[AutoReconnect] Retry failed for ${deviceId}:`, error);
+          // Let the next disconnect cycle handle further retries
+        } finally {
+          setIsAttemptingAutoReconnect(false);
+        }
+      }, delay);
+    }
+  }, [deviceConnection.connectedDeviceId]);
+
+  // Auto-reconnect on app launch (existing behavior)
   useEffect(() => {
     if (
       bluetoothState === BluetoothState.PoweredOn &&
@@ -60,7 +177,8 @@ export const useAutoReconnect = ({
       !deviceConnection.isConnecting &&
       !scanning &&
       !isAttemptingAutoReconnect &&
-      !triedAutoReconnectForCurrentId
+      !triedAutoReconnectForCurrentId &&
+      !isRetryingConnection
     ) {
       const attemptAutoConnect = async () => {
         setIsAttemptingAutoReconnect(true);
@@ -83,6 +201,7 @@ export const useAutoReconnect = ({
     deviceConnection.connectedDeviceId, deviceConnection.isConnecting,
     scanning, deviceConnection.connectToDevice,
     triedAutoReconnectForCurrentId, isAttemptingAutoReconnect,
+    isRetryingConnection,
   ]);
 
   const handleCancelAutoReconnect = useCallback(async () => {
@@ -91,14 +210,26 @@ export const useAutoReconnect = ({
       setLastKnownDeviceId(null);
       setTriedAutoReconnectForCurrentId(true);
     }
+    resetBackoff();
     await deviceConnection.disconnectFromDevice();
     setIsAttemptingAutoReconnect(false);
-  }, [deviceConnection, lastKnownDeviceId]);
+  }, [deviceConnection, lastKnownDeviceId, resetBackoff]);
+
+  // Cleanup timers on unmount
+  useEffect(() => {
+    return () => {
+      if (retryTimerRef.current) clearTimeout(retryTimerRef.current);
+      if (countdownTimerRef.current) clearInterval(countdownTimerRef.current);
+    };
+  }, []);
 
   return {
     lastKnownDeviceId,
     isAttemptingAutoReconnect,
     triedAutoReconnectForCurrentId,
+    isRetryingConnection,
+    retryBackoffSeconds,
+    connectionRetryCount,
     setLastKnownDeviceId,
     setTriedAutoReconnectForCurrentId,
     handleCancelAutoReconnect,
diff --git a/app/src/utils/deviceType.ts b/app/src/utils/deviceType.ts
new file mode 100644
index 00000000..e0648e4c
--- /dev/null
+++ b/app/src/utils/deviceType.ts
@@ -0,0 +1,8 @@
+export type DeviceType = 'neo' | 'omi' | 'unknown';
+
+export function detectDeviceType(name: string | null): DeviceType {
+  const lower = (name || '').toLowerCase();
+  if (lower.includes('neo')) return 'neo';
+  if (lower.includes('omi') || lower.includes('friend')) return 'omi';
+  return 'unknown';
+}

From 0d0029d45d92fbc489b0ce9b8383ce45b0750a72 Mon Sep 17 00:00:00 2001
From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com>
Date: Wed, 25 Feb 2026 13:14:50 +0000
Subject: [PATCH 4/4] Update dependencies and refactor OpenAI client
 integration

- Added new dependencies for OpenTelemetry and OpenInference instrumentation to enhance observability and tracing capabilities.
- Refactored OpenAI client creation to remove LangFuse tracing logic, simplifying the client setup.
- Updated LLM client methods to streamline API calls and improve error handling.
- Enhanced session management in controllers to ensure accurate status tracking and websocket connection handling.
- Removed deprecated LangFuse session ID parameters from various methods to clean up the codebase.
- Improved job ID handling in streaming jobs for better traceability and consistency.
---
 backends/advanced/pyproject.toml              |   6 +-
 .../controllers/queue_controller.py           | 343 ++++++----
 .../controllers/session_controller.py         | 222 +++---
 .../controllers/system_controller.py          | 635 ++++++++++--------
 .../src/advanced_omi_backend/llm_client.py    |  64 +-
 .../observability/otel_setup.py               |  93 ++-
 .../advanced_omi_backend/openai_factory.py    |  38 +-
 .../routers/modules/queue_routes.py           |   8 +
 .../services/memory/base.py                   |  14 +-
 .../services/memory/providers/chronicle.py    |  76 ++-
 .../memory/providers/llm_providers.py         |  76 +--
 .../utils/conversation_utils.py               |  18 +-
 .../workers/conversation_jobs.py              | 225 +++----
 .../workers/transcription_jobs.py             | 136 +---
 .../advanced/webui/src/pages/ConnectApp.tsx   | 120 ----
 backends/advanced/webui/src/pages/Queue.tsx   |  27 +-
 backends/advanced/webui/src/services/api.ts   |   3 +
 17 files changed, 1078 insertions(+), 1026 deletions(-)
 delete mode 100644 backends/advanced/webui/src/pages/ConnectApp.tsx

diff --git a/backends/advanced/pyproject.toml b/backends/advanced/pyproject.toml
index e23f3a34..db93628f 100644
--- a/backends/advanced/pyproject.toml
+++ b/backends/advanced/pyproject.toml
@@ -24,6 +24,9 @@ dependencies = [
     "ruamel-yaml>=0.18.0",
     "omegaconf>=2.3.0",
     "langfuse>=3.13.0,<4.0",
+    "opentelemetry-api>=1.20",
+    "opentelemetry-sdk>=1.20",
+    "openinference-instrumentation-openai>=0.1",
     "spacy>=3.8.2",
     "en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl",
     "redis>=5.0.0",
@@ -47,10 +50,7 @@ local-audio = [
 ]
 galileo = [
     "galileo>=1.0",
-    "opentelemetry-api>=1.20",
-    "opentelemetry-sdk>=1.20",
     "opentelemetry-exporter-otlp>=1.20",
-    "openinference-instrumentation-openai>=0.1",
 ]
 
 [build-system]
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py
index d2cfc7df..6973dffd 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py
@@ -73,6 +73,7 @@ def get_job_status_from_rq(job: Job) -> str:
 
     return status_str
 
+
 # Queue name constants
 TRANSCRIPTION_QUEUE = "transcription"
 MEMORY_QUEUE = "memory"
@@ -86,9 +87,13 @@ def get_job_status_from_rq(job: Job) -> str:
 JOB_RESULT_TTL = int(os.getenv("RQ_RESULT_TTL", 86400))  # 24 hour default
 
 # Create queues with custom result TTL
-transcription_queue = Queue(TRANSCRIPTION_QUEUE, connection=redis_conn, default_timeout=86400)  # 24 hours for streaming jobs
+transcription_queue = Queue(
+    TRANSCRIPTION_QUEUE, connection=redis_conn, default_timeout=86400
+)  # 24 hours for streaming jobs
 memory_queue = Queue(MEMORY_QUEUE, connection=redis_conn, default_timeout=300)
-audio_queue = Queue(AUDIO_QUEUE, connection=redis_conn, default_timeout=86400)  # 24 hours for all-day sessions
+audio_queue = Queue(
+    AUDIO_QUEUE, connection=redis_conn, default_timeout=86400
+)  # 24 hours for all-day sessions
 default_queue = Queue(DEFAULT_QUEUE, connection=redis_conn, default_timeout=300)
 
 
@@ -123,7 +128,9 @@ def get_job_stats() -> Dict[str, Any]:
         canceled_jobs += len(queue.canceled_job_registry)
         deferred_jobs += len(queue.deferred_job_registry)
 
-    total_jobs = queued_jobs + started_jobs + finished_jobs + failed_jobs + canceled_jobs + deferred_jobs
+    total_jobs = (
+        queued_jobs + started_jobs + finished_jobs + failed_jobs + canceled_jobs + deferred_jobs
+    )
 
     return {
         "total_jobs": total_jobs,
@@ -133,7 +140,7 @@ def get_job_stats() -> Dict[str, Any]:
         "failed_jobs": failed_jobs,
         "canceled_jobs": canceled_jobs,
         "deferred_jobs": deferred_jobs,
-        "timestamp": datetime.utcnow().isoformat()
+        "timestamp": datetime.utcnow().isoformat(),
     }
 
 
@@ -142,7 +149,7 @@ def get_jobs(
     offset: int = 0,
     queue_name: str = None,
     job_type: str = None,
-    client_id: str = None
+    client_id: str = None,
 ) -> Dict[str, Any]:
     """
     Get jobs from a specific queue or all queues with optional filtering.
@@ -157,7 +164,9 @@ def get_jobs(
     Returns:
         Dict with jobs list and pagination metadata matching frontend expectations
     """
-    logger.info(f"🔍 DEBUG get_jobs: Filtering - queue_name={queue_name}, job_type={job_type}, client_id={client_id}")
+    logger.info(
+        f"🔍 DEBUG get_jobs: Filtering - queue_name={queue_name}, job_type={job_type}, client_id={client_id}"
+    )
     all_jobs = []
     seen_job_ids = set()  # Track which job IDs we've already processed to avoid duplicates
 
@@ -173,7 +182,10 @@ def get_jobs(
             (queue.started_job_registry.get_job_ids(), "started"),  # RQ standard, not "processing"
             (queue.finished_job_registry.get_job_ids(), "finished"),  # RQ standard, not "completed"
             (queue.failed_job_registry.get_job_ids(), "failed"),
-            (queue.deferred_job_registry.get_job_ids(), "deferred"),  # Jobs waiting for dependencies
+            (
+                queue.deferred_job_registry.get_job_ids(),
+                "deferred",
+            ),  # Jobs waiting for dependencies
         ]
 
         for job_ids, status in registries:
@@ -190,46 +202,58 @@ def get_jobs(
                     user_id = job.kwargs.get("user_id", "") if job.kwargs else ""
 
                     # Extract just the function name (e.g., "listen_for_speech_job" from "module.listen_for_speech_job")
-                    func_name = job.func_name.split('.')[-1] if job.func_name else "unknown"
+                    func_name = job.func_name.split(".")[-1] if job.func_name else "unknown"
 
                     # Debug: Log job details before filtering
-                    logger.debug(f"🔍 DEBUG get_jobs: Job {job_id} - func_name={func_name}, full_func_name={job.func_name}, meta_client_id={job.meta.get('client_id', '') if job.meta else ''}, status={status}")
+                    logger.debug(
+                        f"🔍 DEBUG get_jobs: Job {job_id} - func_name={func_name}, full_func_name={job.func_name}, meta_client_id={job.meta.get('client_id', '') if job.meta else ''}, status={status}"
+                    )
 
                     # Apply job_type filter
                     if job_type and job_type not in func_name:
-                        logger.debug(f"🔍 DEBUG get_jobs: Filtered out {job_id} - job_type '{job_type}' not in func_name '{func_name}'")
+                        logger.debug(
+                            f"🔍 DEBUG get_jobs: Filtered out {job_id} - job_type '{job_type}' not in func_name '{func_name}'"
+                        )
                         continue
 
                     # Apply client_id filter (partial match in meta)
                     if client_id:
                         job_client_id = job.meta.get("client_id", "") if job.meta else ""
                         if client_id not in job_client_id:
-                            logger.debug(f"🔍 DEBUG get_jobs: Filtered out {job_id} - client_id '{client_id}' not in job_client_id '{job_client_id}'")
+                            logger.debug(
+                                f"🔍 DEBUG get_jobs: Filtered out {job_id} - client_id '{client_id}' not in job_client_id '{job_client_id}'"
+                            )
                             continue
 
                     logger.debug(f"🔍 DEBUG get_jobs: Including job {job_id} in results")
 
-                    all_jobs.append({
-                        "job_id": job.id,
-                        "job_type": func_name,
-                        "user_id": user_id,
-                        "status": status,
-                        "priority": "normal",  # RQ doesn't track priority in metadata
-                        "data": {
-                            "description": job.description or "",
-                            "queue": qname,
-                        },
-                        "result": job.result if hasattr(job, 'result') else None,
-                        "meta": job.meta if job.meta else {},  # Include job metadata
-                        "error_message": str(job.exc_info) if job.exc_info else None,
-                        "created_at": job.created_at.isoformat() if job.created_at else None,
-                        "started_at": job.started_at.isoformat() if job.started_at else None,
-                        "completed_at": job.ended_at.isoformat() if job.ended_at else None,
-                        "retry_count": job.retries_left if hasattr(job, 'retries_left') else 0,
-                        "max_retries": 3,  # Default max retries
-                        "progress_percent": (job.meta or {}).get("batch_progress", {}).get("percent", 0),
-                        "progress_message": (job.meta or {}).get("batch_progress", {}).get("message", ""),
-                    })
+                    all_jobs.append(
+                        {
+                            "job_id": job.id,
+                            "job_type": func_name,
+                            "user_id": user_id,
+                            "status": status,
+                            "priority": "normal",  # RQ doesn't track priority in metadata
+                            "data": {
+                                "description": job.description or "",
+                                "queue": qname,
+                            },
+                            "result": job.result if hasattr(job, "result") else None,
+                            "meta": job.meta if job.meta else {},  # Include job metadata
+                            "error_message": str(job.exc_info) if job.exc_info else None,
+                            "created_at": job.created_at.isoformat() if job.created_at else None,
+                            "started_at": job.started_at.isoformat() if job.started_at else None,
+                            "completed_at": job.ended_at.isoformat() if job.ended_at else None,
+                            "retry_count": job.retries_left if hasattr(job, "retries_left") else 0,
+                            "max_retries": 3,  # Default max retries
+                            "progress_percent": (job.meta or {})
+                            .get("batch_progress", {})
+                            .get("percent", 0),
+                            "progress_message": (job.meta or {})
+                            .get("batch_progress", {})
+                            .get("message", ""),
+                        }
+                    )
                 except Exception as e:
                     logger.error(f"Error fetching job {job_id}: {e}")
 
@@ -238,10 +262,12 @@ def get_jobs(
 
     # Paginate
     total_jobs = len(all_jobs)
-    paginated_jobs = all_jobs[offset:offset + limit]
+    paginated_jobs = all_jobs[offset : offset + limit]
     has_more = (offset + limit) < total_jobs
 
-    logger.info(f"🔍 DEBUG get_jobs: Found {total_jobs} matching jobs (returning {len(paginated_jobs)} after pagination)")
+    logger.info(
+        f"🔍 DEBUG get_jobs: Found {total_jobs} matching jobs (returning {len(paginated_jobs)} after pagination)"
+    )
 
     return {
         "jobs": paginated_jobs,
@@ -250,7 +276,7 @@ def get_jobs(
             "limit": limit,
             "offset": offset,
             "has_more": has_more,
-        }
+        },
     }
 
 
@@ -281,7 +307,7 @@ def is_job_complete(job):
             return False
 
         # Check dependent jobs
-        for dep_id in (job.dependent_ids or []):
+        for dep_id in job.dependent_ids or []:
             try:
                 dep_job = Job.fetch(dep_id, connection=redis_conn)
                 if not is_job_complete(dep_job):
@@ -310,7 +336,7 @@ def is_job_complete(job):
                     job = Job.fetch(job_id, connection=redis_conn)
 
                     # Only check jobs with client_id in meta
-                    if job.meta and job.meta.get('client_id') == client_id:
+                    if job.meta and job.meta.get("client_id") == client_id:
                         if not is_job_complete(job):
                             return False
                 except Exception as e:
@@ -319,11 +345,7 @@ def is_job_complete(job):
     return True
 
 
-def start_streaming_jobs(
-    session_id: str,
-    user_id: str,
-    client_id: str
-) -> Dict[str, str]:
+def start_streaming_jobs(session_id: str, user_id: str, client_id: str) -> Dict[str, str]:
     """
     Enqueue jobs for streaming audio session (initial session setup).
 
@@ -351,7 +373,7 @@ def start_streaming_jobs(
 
     # Read always_persist from global config NOW (backend process has fresh config)
     misc_settings = get_misc_settings()
-    always_persist = misc_settings.get('always_persist_enabled', False)
+    always_persist = misc_settings.get("always_persist_enabled", False)
 
     # Enqueue speech detection job
     speech_job = transcription_queue.enqueue(
@@ -363,9 +385,9 @@ def start_streaming_jobs(
         ttl=None,  # No pre-run expiry (job can wait indefinitely in queue)
         result_ttl=JOB_RESULT_TTL,  # Cleanup AFTER completion
         failure_ttl=86400,  # Cleanup failed jobs after 24h
-        job_id=f"speech-detect_{session_id[:12]}",
+        job_id=f"speech-detect_{session_id}",
         description=f"Listening for speech...",
-        meta={'client_id': client_id, 'session_level': True}
+        meta={"client_id": client_id, "session_level": True},
     )
     # Log job enqueue with TTL information for debugging
     actual_ttl = redis_conn.ttl(f"rq:job:{speech_job.id}")
@@ -397,9 +419,9 @@ def start_streaming_jobs(
         ttl=None,  # No pre-run expiry (job can wait indefinitely in queue)
         result_ttl=JOB_RESULT_TTL,  # Cleanup AFTER completion
         failure_ttl=86400,  # Cleanup failed jobs after 24h
-        job_id=f"audio-persist_{session_id[:12]}",
-        description=f"Audio persistence for session {session_id[:12]}",
-        meta={'client_id': client_id, 'session_level': True}  # Mark as session-level job
+        job_id=f"audio-persist_{session_id}",
+        description=f"Audio persistence for session {session_id}",
+        meta={"client_id": client_id, "session_level": True},  # Mark as session-level job
     )
     # Log job enqueue with TTL information for debugging
     actual_ttl = redis_conn.ttl(f"rq:job:{audio_job.id}")
@@ -411,19 +433,16 @@ def start_streaming_jobs(
         f"queue_length={audio_queue.count}, client_id={client_id}"
     )
 
-    return {
-        'speech_detection': speech_job.id,
-        'audio_persistence': audio_job.id
-    }
+    return {"speech_detection": speech_job.id, "audio_persistence": audio_job.id}
 
 
 def start_post_conversation_jobs(
     conversation_id: str,
     user_id: str,
     transcript_version_id: Optional[str] = None,
-    depends_on_job = None,
+    depends_on_job=None,
     client_id: Optional[str] = None,
-    end_reason: str = "file_upload"
+    end_reason: str = "file_upload",
 ) -> Dict[str, str]:
     """
     Start post-conversation processing jobs after conversation is created.
@@ -458,21 +477,27 @@ def start_post_conversation_jobs(
     version_id = transcript_version_id or str(uuid.uuid4())
 
     # Build job metadata (include client_id if provided for UI tracking)
-    job_meta = {'conversation_id': conversation_id}
+    job_meta = {"conversation_id": conversation_id}
     if client_id:
-        job_meta['client_id'] = client_id
+        job_meta["client_id"] = client_id
 
     # Check if speaker recognition is enabled
-    speaker_config = get_service_config('speaker_recognition')
-    speaker_enabled = speaker_config.get('enabled', True)  # Default to True for backward compatibility
+    speaker_config = get_service_config("speaker_recognition")
+    speaker_enabled = speaker_config.get(
+        "enabled", True
+    )  # Default to True for backward compatibility
 
     # Step 1: Speaker recognition job (conditional - only if enabled)
-    speaker_dependency = depends_on_job  # Start with upstream dependency (transcription if file upload)
+    speaker_dependency = (
+        depends_on_job  # Start with upstream dependency (transcription if file upload)
+    )
     speaker_job = None
 
     if speaker_enabled:
         speaker_job_id = f"speaker_{conversation_id[:12]}"
-        logger.info(f"🔍 DEBUG: Creating speaker job with job_id={speaker_job_id}, conversation_id={conversation_id[:12]}")
+        logger.info(
+            f"🔍 DEBUG: Creating speaker job with job_id={speaker_job_id}, conversation_id={conversation_id[:12]}"
+        )
 
         speaker_job = transcription_queue.enqueue(
             recognise_speakers_job,
@@ -483,26 +508,36 @@ def start_post_conversation_jobs(
             depends_on=speaker_dependency,
             job_id=speaker_job_id,
             description=f"Speaker recognition for conversation {conversation_id[:8]}",
-            meta=job_meta
+            meta=job_meta,
         )
         speaker_dependency = speaker_job  # Chain for next jobs
         if depends_on_job:
-            logger.info(f"📥 RQ: Enqueued speaker recognition job {speaker_job.id}, meta={speaker_job.meta} (depends on {depends_on_job.id})")
+            logger.info(
+                f"📥 RQ: Enqueued speaker recognition job {speaker_job.id}, meta={speaker_job.meta} (depends on {depends_on_job.id})"
+            )
         else:
-            logger.info(f"📥 RQ: Enqueued speaker recognition job {speaker_job.id}, meta={speaker_job.meta} (no dependencies, starts immediately)")
+            logger.info(
+                f"📥 RQ: Enqueued speaker recognition job {speaker_job.id}, meta={speaker_job.meta} (no dependencies, starts immediately)"
+            )
     else:
-        logger.info(f"⏭️  Speaker recognition disabled, skipping speaker job for conversation {conversation_id[:8]}")
+        logger.info(
+            f"⏭️  Speaker recognition disabled, skipping speaker job for conversation {conversation_id[:8]}"
+        )
 
     # Step 2: Memory extraction job (conditional - only if enabled)
     # Check if memory extraction is enabled
-    memory_config = get_service_config('memory.extraction')
-    memory_enabled = memory_config.get('enabled', True)  # Default to True for backward compatibility
+    memory_config = get_service_config("memory.extraction")
+    memory_enabled = memory_config.get(
+        "enabled", True
+    )  # Default to True for backward compatibility
 
     memory_job = None
     if memory_enabled:
         # Depends on speaker job if it was created, otherwise depends on upstream (transcription or nothing)
         memory_job_id = f"memory_{conversation_id[:12]}"
-        logger.info(f"🔍 DEBUG: Creating memory job with job_id={memory_job_id}, conversation_id={conversation_id[:12]}")
+        logger.info(
+            f"🔍 DEBUG: Creating memory job with job_id={memory_job_id}, conversation_id={conversation_id[:12]}"
+        )
 
         memory_job = memory_queue.enqueue(
             process_memory_job,
@@ -512,23 +547,33 @@ def start_post_conversation_jobs(
             depends_on=speaker_dependency,  # Either speaker_job or upstream dependency
             job_id=memory_job_id,
             description=f"Memory extraction for conversation {conversation_id[:8]}",
-            meta=job_meta
+            meta=job_meta,
         )
         if speaker_job:
-            logger.info(f"📥 RQ: Enqueued memory extraction job {memory_job.id}, meta={memory_job.meta} (depends on speaker job {speaker_job.id})")
+            logger.info(
+                f"📥 RQ: Enqueued memory extraction job {memory_job.id}, meta={memory_job.meta} (depends on speaker job {speaker_job.id})"
+            )
         elif depends_on_job:
-            logger.info(f"📥 RQ: Enqueued memory extraction job {memory_job.id}, meta={memory_job.meta} (depends on {depends_on_job.id})")
+            logger.info(
+                f"📥 RQ: Enqueued memory extraction job {memory_job.id}, meta={memory_job.meta} (depends on {depends_on_job.id})"
+            )
         else:
-            logger.info(f"📥 RQ: Enqueued memory extraction job {memory_job.id}, meta={memory_job.meta} (no dependencies, starts immediately)")
+            logger.info(
+                f"📥 RQ: Enqueued memory extraction job {memory_job.id}, meta={memory_job.meta} (no dependencies, starts immediately)"
+            )
     else:
-        logger.info(f"⏭️  Memory extraction disabled, skipping memory job for conversation {conversation_id[:8]}")
+        logger.info(
+            f"⏭️  Memory extraction disabled, skipping memory job for conversation {conversation_id[:8]}"
+        )
 
     # Step 3: Title/summary generation job
     # Depends on memory job to avoid race condition (both jobs save the conversation document)
     # and to ensure fresh memories are available for context-enriched summaries
     title_dependency = memory_job if memory_job else speaker_dependency
     title_job_id = f"title_summary_{conversation_id[:12]}"
-    logger.info(f"🔍 DEBUG: Creating title/summary job with job_id={title_job_id}, conversation_id={conversation_id[:12]}")
+    logger.info(
+        f"🔍 DEBUG: Creating title/summary job with job_id={title_job_id}, conversation_id={conversation_id[:12]}"
+    )
 
     title_summary_job = default_queue.enqueue(
         generate_title_summary_job,
@@ -538,21 +583,31 @@ def start_post_conversation_jobs(
         depends_on=title_dependency,
         job_id=title_job_id,
         description=f"Generate title and summary for conversation {conversation_id[:8]}",
-        meta=job_meta
+        meta=job_meta,
     )
     if memory_job:
-        logger.info(f"📥 RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (depends on memory job {memory_job.id})")
+        logger.info(
+            f"📥 RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (depends on memory job {memory_job.id})"
+        )
     elif speaker_job:
-        logger.info(f"📥 RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (depends on speaker job {speaker_job.id})")
+        logger.info(
+            f"📥 RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (depends on speaker job {speaker_job.id})"
+        )
     elif depends_on_job:
-        logger.info(f"📥 RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (depends on {depends_on_job.id})")
+        logger.info(
+            f"📥 RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (depends on {depends_on_job.id})"
+        )
     else:
-        logger.info(f"📥 RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (no dependencies, starts immediately)")
+        logger.info(
+            f"📥 RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (no dependencies, starts immediately)"
+        )
 
     # Step 5: Dispatch conversation.complete event (runs after both memory and title/summary complete)
     # This ensures plugins receive the event after all processing is done
     event_job_id = f"event_complete_{conversation_id[:12]}"
-    logger.info(f"🔍 DEBUG: Creating conversation complete event job with job_id={event_job_id}, conversation_id={conversation_id[:12]}")
+    logger.info(
+        f"🔍 DEBUG: Creating conversation complete event job with job_id={event_job_id}, conversation_id={conversation_id[:12]}"
+    )
 
     # Event job depends on memory and title/summary jobs that were actually enqueued
     # Build dependency list excluding None values
@@ -571,29 +626,33 @@ def start_post_conversation_jobs(
         end_reason,  # Use the end_reason parameter (defaults to 'file_upload' for backward compatibility)
         job_timeout=120,  # 2 minutes
         result_ttl=JOB_RESULT_TTL,
-        depends_on=event_dependencies if event_dependencies else None,  # Wait for jobs that were enqueued
+        depends_on=(
+            event_dependencies if event_dependencies else None
+        ),  # Wait for jobs that were enqueued
         job_id=event_job_id,
         description=f"Dispatch conversation complete event ({end_reason}) for {conversation_id[:8]}",
-        meta=job_meta
+        meta=job_meta,
     )
 
     # Log event dispatch dependencies
     if event_dependencies:
         dep_ids = [job.id for job in event_dependencies]
-        logger.info(f"📥 RQ: Enqueued conversation complete event job {event_dispatch_job.id}, meta={event_dispatch_job.meta} (depends on {', '.join(dep_ids)})")
+        logger.info(
+            f"📥 RQ: Enqueued conversation complete event job {event_dispatch_job.id}, meta={event_dispatch_job.meta} (depends on {', '.join(dep_ids)})"
+        )
     else:
-        logger.info(f"📥 RQ: Enqueued conversation complete event job {event_dispatch_job.id}, meta={event_dispatch_job.meta} (no dependencies, starts immediately)")
+        logger.info(
+            f"📥 RQ: Enqueued conversation complete event job {event_dispatch_job.id}, meta={event_dispatch_job.meta} (no dependencies, starts immediately)"
+        )
 
     return {
-        'speaker_recognition': speaker_job.id if speaker_job else None,
-        'memory': memory_job.id if memory_job else None,
-        'title_summary': title_summary_job.id,
-        'event_dispatch': event_dispatch_job.id
+        "speaker_recognition": speaker_job.id if speaker_job else None,
+        "memory": memory_job.id if memory_job else None,
+        "title_summary": title_summary_job.id,
+        "event_dispatch": event_dispatch_job.id,
     }
 
 
-
-
 def get_queue_health() -> Dict[str, Any]:
     """Get health status of all queues and workers."""
     health = {
@@ -637,15 +696,18 @@ def get_queue_health() -> Dict[str, Any]:
         else:
             health["idle_workers"] += 1
 
-        health["workers"].append({
-            "name": worker.name,
-            "state": state,
-            "queues": [q.name for q in worker.queues],
-            "current_job": current_job,
-        })
+        health["workers"].append(
+            {
+                "name": worker.name,
+                "state": state,
+                "queues": [q.name for q in worker.queues],
+                "current_job": current_job,
+            }
+        )
 
     return health
 
+
 # needs tidying but works for now
 async def cleanup_stuck_stream_workers(request):
     """Clean up stuck Redis Stream consumers and pending messages from all active streams."""
@@ -660,7 +722,7 @@ async def cleanup_stuck_stream_workers(request):
         if not redis_client:
             return JSONResponse(
                 status_code=503,
-                content={"error": "Redis client for audio streaming not initialized"}
+                content={"error": "Redis client for audio streaming not initialized"},
             )
 
         cleanup_results = {}
@@ -677,13 +739,17 @@ async def cleanup_stuck_stream_workers(request):
 
             try:
                 # First check stream age - delete old streams (>1 hour) immediately
-                stream_info = await redis_client.execute_command('XINFO', 'STREAM', stream_name)
+                stream_info = await redis_client.execute_command("XINFO", "STREAM", stream_name)
 
                 # Parse stream info
                 info_dict = {}
                 for i in range(0, len(stream_info), 2):
-                    key_name = stream_info[i].decode() if isinstance(stream_info[i], bytes) else str(stream_info[i])
-                    info_dict[key_name] = stream_info[i+1]
+                    key_name = (
+                        stream_info[i].decode()
+                        if isinstance(stream_info[i], bytes)
+                        else str(stream_info[i])
+                    )
+                    info_dict[key_name] = stream_info[i + 1]
 
                 stream_length = int(info_dict.get("length", 0))
                 last_entry = info_dict.get("last-entry")
@@ -700,7 +766,7 @@ async def cleanup_stuck_stream_workers(request):
                         last_id = last_entry[0]
                         if isinstance(last_id, bytes):
                             last_id = last_id.decode()
-                        last_timestamp_ms = int(last_id.split('-')[0])
+                        last_timestamp_ms = int(last_id.split("-")[0])
                         last_timestamp_s = last_timestamp_ms / 1000
                         stream_age = current_time - last_timestamp_s
 
@@ -718,15 +784,19 @@ async def cleanup_stuck_stream_workers(request):
                         "cleaned": 0,
                         "deleted_consumers": 0,
                         "deleted_stream": True,
-                        "stream_age": stream_age
+                        "stream_age": stream_age,
                     }
                     continue
 
                 # Get consumer groups
-                groups = await redis_client.execute_command('XINFO', 'GROUPS', stream_name)
+                groups = await redis_client.execute_command("XINFO", "GROUPS", stream_name)
 
                 if not groups:
-                    cleanup_results[stream_name] = {"message": "No consumer groups found", "cleaned": 0, "deleted_stream": False}
+                    cleanup_results[stream_name] = {
+                        "message": "No consumer groups found",
+                        "cleaned": 0,
+                        "deleted_stream": False,
+                    }
                     continue
 
                 # Parse first group
@@ -734,7 +804,7 @@ async def cleanup_stuck_stream_workers(request):
                 group = groups[0]
                 for i in range(0, len(group), 2):
                     key = group[i].decode() if isinstance(group[i], bytes) else str(group[i])
-                    value = group[i+1]
+                    value = group[i + 1]
                     if isinstance(value, bytes):
                         try:
                             value = value.decode()
@@ -749,7 +819,9 @@ async def cleanup_stuck_stream_workers(request):
                 pending_count = int(group_dict.get("pending", 0))
 
                 # Get consumers for this group to check per-consumer pending
-                consumers = await redis_client.execute_command('XINFO', 'CONSUMERS', stream_name, group_name)
+                consumers = await redis_client.execute_command(
+                    "XINFO", "CONSUMERS", stream_name, group_name
+                )
 
                 cleaned_count = 0
                 total_consumer_pending = 0
@@ -759,8 +831,12 @@ async def cleanup_stuck_stream_workers(request):
                 for consumer in consumers:
                     consumer_dict = {}
                     for i in range(0, len(consumer), 2):
-                        key = consumer[i].decode() if isinstance(consumer[i], bytes) else str(consumer[i])
-                        value = consumer[i+1]
+                        key = (
+                            consumer[i].decode()
+                            if isinstance(consumer[i], bytes)
+                            else str(consumer[i])
+                        )
+                        value = consumer[i + 1]
                         if isinstance(value, bytes):
                             try:
                                 value = value.decode()
@@ -780,12 +856,20 @@ async def cleanup_stuck_stream_workers(request):
                     is_dead = consumer_idle_ms > 300000
 
                     if consumer_pending > 0:
-                        logger.info(f"Found {consumer_pending} pending messages for consumer {consumer_name} (idle: {consumer_idle_ms}ms)")
+                        logger.info(
+                            f"Found {consumer_pending} pending messages for consumer {consumer_name} (idle: {consumer_idle_ms}ms)"
+                        )
 
                         # Get pending messages for this specific consumer
                         try:
                             pending_messages = await redis_client.execute_command(
-                                'XPENDING', stream_name, group_name, '-', '+', str(consumer_pending), consumer_name
+                                "XPENDING",
+                                stream_name,
+                                group_name,
+                                "-",
+                                "+",
+                                str(consumer_pending),
+                                consumer_name,
                             )
 
                             # XPENDING returns flat list: [msg_id, consumer, idle_ms, delivery_count, msg_id, ...]
@@ -799,31 +883,49 @@ async def cleanup_stuck_stream_workers(request):
                                     # Claim the message to a cleanup worker
                                     try:
                                         await redis_client.execute_command(
-                                            'XCLAIM', stream_name, group_name, 'cleanup-worker', '0', msg_id
+                                            "XCLAIM",
+                                            stream_name,
+                                            group_name,
+                                            "cleanup-worker",
+                                            "0",
+                                            msg_id,
                                         )
 
                                         # Acknowledge it immediately
                                         await redis_client.xack(stream_name, group_name, msg_id)
                                         cleaned_count += 1
                                     except Exception as claim_error:
-                                        logger.warning(f"Failed to claim/ack message {msg_id}: {claim_error}")
+                                        logger.warning(
+                                            f"Failed to claim/ack message {msg_id}: {claim_error}"
+                                        )
 
                         except Exception as consumer_error:
-                            logger.error(f"Error processing consumer {consumer_name}: {consumer_error}")
+                            logger.error(
+                                f"Error processing consumer {consumer_name}: {consumer_error}"
+                            )
 
                     # Delete dead consumers (idle > 5 minutes with no pending messages)
                     if is_dead and consumer_pending == 0:
                         try:
                             await redis_client.execute_command(
-                                'XGROUP', 'DELCONSUMER', stream_name, group_name, consumer_name
+                                "XGROUP", "DELCONSUMER", stream_name, group_name, consumer_name
                             )
                             deleted_consumers += 1
-                            logger.info(f"🧹 Deleted dead consumer {consumer_name} (idle: {consumer_idle_ms}ms)")
+                            logger.info(
+                                f"🧹 Deleted dead consumer {consumer_name} (idle: {consumer_idle_ms}ms)"
+                            )
                         except Exception as delete_error:
-                            logger.warning(f"Failed to delete consumer {consumer_name}: {delete_error}")
+                            logger.warning(
+                                f"Failed to delete consumer {consumer_name}: {delete_error}"
+                            )
 
                 if total_consumer_pending == 0 and deleted_consumers == 0:
-                    cleanup_results[stream_name] = {"message": "No pending messages or dead consumers", "cleaned": 0, "deleted_consumers": 0, "deleted_stream": False}
+                    cleanup_results[stream_name] = {
+                        "message": "No pending messages or dead consumers",
+                        "cleaned": 0,
+                        "deleted_consumers": 0,
+                        "deleted_stream": False,
+                    }
                     continue
 
                 total_cleaned += cleaned_count
@@ -833,14 +935,11 @@ async def cleanup_stuck_stream_workers(request):
                     "cleaned": cleaned_count,
                     "deleted_consumers": deleted_consumers,
                     "deleted_stream": False,
-                    "original_pending": pending_count
+                    "original_pending": pending_count,
                 }
 
             except Exception as e:
-                cleanup_results[stream_name] = {
-                    "error": str(e),
-                    "cleaned": 0
-                }
+                cleanup_results[stream_name] = {"error": str(e), "cleaned": 0}
 
         return {
             "success": True,
@@ -849,7 +948,7 @@ async def cleanup_stuck_stream_workers(request):
             "total_deleted_streams": total_deleted_streams,
             "streams": cleanup_results,  # New key for per-stream results
             "providers": cleanup_results,  # Keep for backward compatibility with frontend
-            "timestamp": time.time()
+            "timestamp": time.time(),
         }
 
     except Exception as e:
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py
index 9b3a2de9..6a96883b 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py
@@ -24,7 +24,7 @@ async def mark_session_complete(
         "user_stopped",
         "inactivity_timeout",
         "max_duration",
-        "all_jobs_complete"
+        "all_jobs_complete",
     ],
 ) -> None:
     """
@@ -57,11 +57,10 @@ async def mark_session_complete(
     """
     session_key = f"audio:session:{session_id}"
     mark_time = time.time()
-    await redis_client.hset(session_key, mapping={
-        "status": "finished",
-        "completed_at": str(mark_time),
-        "completion_reason": reason
-    })
+    await redis_client.hset(
+        session_key,
+        mapping={"status": "finished", "completed_at": str(mark_time), "completion_reason": reason},
+    )
     logger.info(f"✅ Session {session_id[:12]} marked finished: {reason} [TIME: {mark_time:.3f}]")
 
 
@@ -117,7 +116,9 @@ async def get_session_info(redis_client, session_id: str) -> Optional[Dict]:
         # Get conversation count for this session
         conversation_count_key = f"session:conversation_count:{session_id}"
         conversation_count_bytes = await redis_client.get(conversation_count_key)
-        conversation_count = int(conversation_count_bytes.decode()) if conversation_count_bytes else 0
+        conversation_count = (
+            int(conversation_count_bytes.decode()) if conversation_count_bytes else 0
+        )
 
         started_at = float(session_data.get(b"started_at", b"0"))
         last_chunk_at = float(session_data.get(b"last_chunk_at", b"0"))
@@ -129,6 +130,9 @@ async def get_session_info(redis_client, session_id: str) -> Optional[Dict]:
             "provider": session_data.get(b"provider", b"").decode(),
             "mode": session_data.get(b"mode", b"").decode(),
             "status": session_data.get(b"status", b"").decode(),
+            "websocket_connected": session_data.get(b"websocket_connected", b"false").decode()
+            == "true",
+            "completion_reason": session_data.get(b"completion_reason", b"").decode(),
             "chunks_published": int(session_data.get(b"chunks_published", b"0")),
             "started_at": started_at,
             "last_chunk_at": last_chunk_at,
@@ -139,7 +143,7 @@ async def get_session_info(redis_client, session_id: str) -> Optional[Dict]:
             "last_event": session_data.get(b"last_event", b"").decode(),
             "speech_detected_at": session_data.get(b"speech_detected_at", b"").decode(),
             "speaker_check_status": session_data.get(b"speaker_check_status", b"").decode(),
-            "identified_speakers": session_data.get(b"identified_speakers", b"").decode()
+            "identified_speakers": session_data.get(b"identified_speakers", b"").decode(),
         }
 
     except Exception as e:
@@ -163,10 +167,8 @@ async def get_all_sessions(redis_client, limit: int = 100) -> List[Dict]:
         session_keys = []
         cursor = b"0"
         while cursor and len(session_keys) < limit:
-            cursor, keys = await redis_client.scan(
-                cursor, match="audio:session:*", count=limit
-            )
-            session_keys.extend(keys[:limit - len(session_keys)])
+            cursor, keys = await redis_client.scan(cursor, match="audio:session:*", count=limit)
+            session_keys.extend(keys[: limit - len(session_keys)])
 
         # Get info for each session
         sessions = []
@@ -241,7 +243,7 @@ async def get_streaming_status(request):
         if not redis_client:
             return JSONResponse(
                 status_code=503,
-                content={"error": "Redis client for audio streaming not initialized"}
+                content={"error": "Redis client for audio streaming not initialized"},
             )
 
         # Get all sessions (both active and completed)
@@ -261,40 +263,56 @@ async def get_streaming_status(request):
 
             # Separate active and completed sessions
             # Check if all jobs are complete (including failed jobs)
-            # Note: session_id == client_id in streaming context, but using client_id explicitly
             all_jobs_done = all_jobs_complete_for_client(session_obj.get("client_id"))
 
-            # Session is finished if:
-            # 1. Redis status says finished AND all jobs done, OR
-            # 2. All jobs are done (even if status isn't finished yet)
-            # This ensures sessions with failed jobs move to finished
-            if status == "finished" or all_jobs_done:
-                if all_jobs_done:
-                    # All jobs finished - this is truly a finished session
-                    # Update Redis status if it wasn't already marked finished
-                    if status != "finished":
-                        await mark_session_complete(redis_client, session_id, "all_jobs_complete")
-
-                    # Get additional session data for completed sessions
-                    session_key = f"audio:session:{session_id}"
-                    session_data = await redis_client.hgetall(session_key)
-
-                    completed_sessions_from_redis.append({
+            # Session is completed ONLY when:
+            # 1. Status was already set to "finished" by an authoritative source
+            #    (WebSocket disconnect handler or job handler), AND
+            # 2. All RQ jobs are in terminal state
+            #
+            # IMPORTANT: Do NOT mark sessions as finished here. Between conversations
+            # (after open_conversation_job finishes, before speech detection restarts),
+            # all jobs are briefly terminal. Writing "finished" during this gap kills
+            # the session permanently.
+            if status == "finished" and all_jobs_done:
+                # Get additional session data for completed sessions
+                session_key = f"audio:session:{session_id}"
+                session_data = await redis_client.hgetall(session_key)
+
+                completed_sessions_from_redis.append(
+                    {
                         "session_id": session_id,
                         "client_id": session_obj.get("client_id", ""),
-                        "conversation_id": session_data.get(b"conversation_id", b"").decode() if session_data and b"conversation_id" in session_data else None,
-                        "has_conversation": bool(session_data and session_data.get(b"conversation_id", b"")),
-                        "action": session_data.get(b"action", b"finished").decode() if session_data and b"action" in session_data else "finished",
-                        "reason": session_data.get(b"reason", b"").decode() if session_data and b"reason" in session_data else "",
+                        "conversation_id": (
+                            session_data.get(b"conversation_id", b"").decode()
+                            if session_data and b"conversation_id" in session_data
+                            else None
+                        ),
+                        "has_conversation": bool(
+                            session_data and session_data.get(b"conversation_id", b"")
+                        ),
+                        "action": (
+                            session_data.get(b"action", b"finished").decode()
+                            if session_data and b"action" in session_data
+                            else "finished"
+                        ),
+                        "reason": (
+                            session_data.get(b"reason", b"").decode()
+                            if session_data and b"reason" in session_data
+                            else ""
+                        ),
                         "completed_at": session_obj.get("last_chunk_at", 0),
-                        "audio_file": session_data.get(b"audio_file", b"").decode() if session_data and b"audio_file" in session_data else "",
-                        "conversation_count": session_obj.get("conversation_count", 0)
-                    })
-                else:
-                    # Status says complete but jobs still processing - keep in active
-                    active_sessions.append(session_obj)
+                        "audio_file": (
+                            session_data.get(b"audio_file", b"").decode()
+                            if session_data and b"audio_file" in session_data
+                            else ""
+                        ),
+                        "conversation_count": session_obj.get("conversation_count", 0),
+                    }
+                )
             else:
-                # This is an active session
+                # Active session (including inter-conversation gaps where all jobs
+                # are temporarily terminal but status is still "active")
                 active_sessions.append(session_obj)
 
         # Get stream health for all streams (per-client streams)
@@ -317,13 +335,17 @@ async def get_streaming_status(request):
             stream_name = stream_key.decode() if isinstance(stream_key, bytes) else stream_key
             try:
                 # Check if stream exists
-                stream_info = await redis_client.execute_command('XINFO', 'STREAM', stream_name)
+                stream_info = await redis_client.execute_command("XINFO", "STREAM", stream_name)
 
                 # Parse stream info (returns flat list of key-value pairs)
                 info_dict = {}
                 for i in range(0, len(stream_info), 2):
-                    key = stream_info[i].decode() if isinstance(stream_info[i], bytes) else str(stream_info[i])
-                    value = stream_info[i+1]
+                    key = (
+                        stream_info[i].decode()
+                        if isinstance(stream_info[i], bytes)
+                        else str(stream_info[i])
+                    )
+                    value = stream_info[i + 1]
 
                     # Skip complex binary structures like first-entry and last-entry
                     # which contain message data that can't be JSON serialized
@@ -351,7 +373,7 @@ async def get_streaming_status(request):
                 if last_entry_id:
                     try:
                         # Redis Stream IDs format: "milliseconds-sequence"
-                        last_timestamp_ms = int(last_entry_id.split('-')[0])
+                        last_timestamp_ms = int(last_entry_id.split("-")[0])
                         last_timestamp_s = last_timestamp_ms / 1000
                         stream_age_seconds = current_time - last_timestamp_s
                     except (ValueError, IndexError, AttributeError):
@@ -369,7 +391,7 @@ async def get_streaming_status(request):
                     session_idle_seconds = session_data.get("idle_seconds", 0)
 
                 # Get consumer groups
-                groups = await redis_client.execute_command('XINFO', 'GROUPS', stream_name)
+                groups = await redis_client.execute_command("XINFO", "GROUPS", stream_name)
 
                 stream_data = {
                     "stream_length": info_dict.get("length", 0),
@@ -378,19 +400,19 @@ async def get_streaming_status(request):
                     "session_age_seconds": session_age_seconds,  # Age since session started
                     "session_idle_seconds": session_idle_seconds,  # Time since last audio chunk
                     "client_id": client_id,  # Include client_id for reference
-                    "consumer_groups": []
+                    "consumer_groups": [],
                 }
 
                 # Track if stream has any active consumers
                 has_active_consumer = False
-                min_consumer_idle_ms = float('inf')
+                min_consumer_idle_ms = float("inf")
 
                 # Parse consumer groups
                 for group in groups:
                     group_dict = {}
                     for i in range(0, len(group), 2):
                         key = group[i].decode() if isinstance(group[i], bytes) else str(group[i])
-                        value = group[i+1]
+                        value = group[i + 1]
                         if isinstance(value, bytes):
                             try:
                                 value = value.decode()
@@ -403,15 +425,21 @@ async def get_streaming_status(request):
                         group_name = group_name.decode()
 
                     # Get consumers for this group
-                    consumers = await redis_client.execute_command('XINFO', 'CONSUMERS', stream_name, group_name)
+                    consumers = await redis_client.execute_command(
+                        "XINFO", "CONSUMERS", stream_name, group_name
+                    )
                     consumer_list = []
                     consumer_pending_total = 0
 
                     for consumer in consumers:
                         consumer_dict = {}
                         for i in range(0, len(consumer), 2):
-                            key = consumer[i].decode() if isinstance(consumer[i], bytes) else str(consumer[i])
-                            value = consumer[i+1]
+                            key = (
+                                consumer[i].decode()
+                                if isinstance(consumer[i], bytes)
+                                else str(consumer[i])
+                            )
+                            value = consumer[i + 1]
                             if isinstance(value, bytes):
                                 try:
                                     value = value.decode()
@@ -434,11 +462,13 @@ async def get_streaming_status(request):
                         if consumer_idle_ms < 300000:
                             has_active_consumer = True
 
-                        consumer_list.append({
-                            "name": consumer_name,
-                            "pending": consumer_pending,
-                            "idle_ms": consumer_idle_ms
-                        })
+                        consumer_list.append(
+                            {
+                                "name": consumer_name,
+                                "pending": consumer_pending,
+                                "idle_ms": consumer_idle_ms,
+                            }
+                        )
 
                     # Get group-level pending count (may be 0 even if consumers have pending)
                     try:
@@ -451,20 +481,22 @@ async def get_streaming_status(request):
                     # (Sometimes group pending is 0 but consumers still have pending messages)
                     effective_pending = max(group_pending_count, consumer_pending_total)
 
-                    stream_data["consumer_groups"].append({
-                        "name": str(group_name),
-                        "consumers": consumer_list,
-                        "pending": int(effective_pending)
-                    })
+                    stream_data["consumer_groups"].append(
+                        {
+                            "name": str(group_name),
+                            "consumers": consumer_list,
+                            "pending": int(effective_pending),
+                        }
+                    )
 
                 # Determine if stream is active or completed
                 # Active: has active consumers OR pending messages OR recent activity (< 5 min)
                 # Completed: no active consumers and idle > 5 minutes but < 1 hour
                 total_pending = sum(group["pending"] for group in stream_data["consumer_groups"])
                 is_active = (
-                    has_active_consumer or
-                    total_pending > 0 or
-                    stream_age_seconds < 300  # Less than 5 minutes old
+                    has_active_consumer
+                    or total_pending > 0
+                    or stream_age_seconds < 300  # Less than 5 minutes old
                 )
 
                 if is_active:
@@ -487,7 +519,7 @@ async def get_streaming_status(request):
                 "finished": len(transcription_queue.finished_job_registry),
                 "failed": len(transcription_queue.failed_job_registry),
                 "canceled": len(transcription_queue.canceled_job_registry),
-                "deferred": len(transcription_queue.deferred_job_registry)
+                "deferred": len(transcription_queue.deferred_job_registry),
             },
             "memory_queue": {
                 "queued": memory_queue.count,
@@ -495,7 +527,7 @@ async def get_streaming_status(request):
                 "finished": len(memory_queue.finished_job_registry),
                 "failed": len(memory_queue.failed_job_registry),
                 "canceled": len(memory_queue.canceled_job_registry),
-                "deferred": len(memory_queue.deferred_job_registry)
+                "deferred": len(memory_queue.deferred_job_registry),
             },
             "default_queue": {
                 "queued": default_queue.count,
@@ -503,8 +535,8 @@ async def get_streaming_status(request):
                 "finished": len(default_queue.finished_job_registry),
                 "failed": len(default_queue.failed_job_registry),
                 "canceled": len(default_queue.canceled_job_registry),
-                "deferred": len(default_queue.deferred_job_registry)
-            }
+                "deferred": len(default_queue.deferred_job_registry),
+            },
         }
 
         return {
@@ -514,14 +546,13 @@ async def get_streaming_status(request):
             "completed_streams": completed_streams,
             "stream_health": active_streams,  # Backward compatibility - use active_streams
             "rq_queues": rq_stats,
-            "timestamp": time.time()
+            "timestamp": time.time(),
         }
 
     except Exception as e:
         logger.error(f"Error getting streaming status: {e}", exc_info=True)
         return JSONResponse(
-            status_code=500,
-            content={"error": f"Failed to get streaming status: {str(e)}"}
+            status_code=500, content={"error": f"Failed to get streaming status: {str(e)}"}
         )
 
 
@@ -538,7 +569,7 @@ async def cleanup_old_sessions(request, max_age_seconds: int = 3600):
         if not redis_client:
             return JSONResponse(
                 status_code=503,
-                content={"error": "Redis client for audio streaming not initialized"}
+                content={"error": "Redis client for audio streaming not initialized"},
             )
 
         # Get all session keys
@@ -560,17 +591,14 @@ async def cleanup_old_sessions(request, max_age_seconds: int = 3600):
             age_seconds = current_time - started_at
 
             # Clean up sessions older than max_age or stuck in "finalizing"
-            should_clean = (
-                age_seconds > max_age_seconds or
-                (status == "finalizing" and age_seconds > 300)  # Finalizing for more than 5 minutes
-            )
+            should_clean = age_seconds > max_age_seconds or (
+                status == "finalizing" and age_seconds > 300
+            )  # Finalizing for more than 5 minutes
 
             if should_clean:
-                old_sessions.append({
-                    "session_id": session_id,
-                    "age_seconds": age_seconds,
-                    "status": status
-                })
+                old_sessions.append(
+                    {"session_id": session_id, "age_seconds": age_seconds, "status": status}
+                )
                 await redis_client.delete(key)
                 cleaned_sessions += 1
 
@@ -584,13 +612,17 @@ async def cleanup_old_sessions(request, max_age_seconds: int = 3600):
 
             try:
                 # Check stream info to get last activity
-                stream_info = await redis_client.execute_command('XINFO', 'STREAM', stream_name)
+                stream_info = await redis_client.execute_command("XINFO", "STREAM", stream_name)
 
                 # Parse stream info
                 info_dict = {}
                 for i in range(0, len(stream_info), 2):
-                    key_name = stream_info[i].decode() if isinstance(stream_info[i], bytes) else str(stream_info[i])
-                    info_dict[key_name] = stream_info[i+1]
+                    key_name = (
+                        stream_info[i].decode()
+                        if isinstance(stream_info[i], bytes)
+                        else str(stream_info[i])
+                    )
+                    info_dict[key_name] = stream_info[i + 1]
 
                 stream_length = int(info_dict.get("length", 0))
                 last_entry = info_dict.get("last-entry")
@@ -611,7 +643,7 @@ async def cleanup_old_sessions(request, max_age_seconds: int = 3600):
 
                     # Redis Stream IDs format: "milliseconds-sequence"
                     try:
-                        last_timestamp_ms = int(last_id.split('-')[0])
+                        last_timestamp_ms = int(last_id.split("-")[0])
                         last_timestamp_s = last_timestamp_ms / 1000
                         age_seconds = current_time - last_timestamp_s
 
@@ -627,7 +659,7 @@ async def cleanup_old_sessions(request, max_age_seconds: int = 3600):
                                 first_id = first_entry[0]
                                 if isinstance(first_id, bytes):
                                     first_id = first_id.decode()
-                                first_timestamp_ms = int(first_id.split('-')[0])
+                                first_timestamp_ms = int(first_id.split("-")[0])
                                 first_timestamp_s = first_timestamp_ms / 1000
                                 age_seconds = current_time - first_timestamp_s
 
@@ -640,12 +672,14 @@ async def cleanup_old_sessions(request, max_age_seconds: int = 3600):
                 if should_delete:
                     await redis_client.delete(stream_name)
                     cleaned_streams += 1
-                    old_streams.append({
-                        "stream_name": stream_name,
-                        "reason": reason,
-                        "age_seconds": age_seconds,
-                        "length": stream_length
-                    })
+                    old_streams.append(
+                        {
+                            "stream_name": stream_name,
+                            "reason": reason,
+                            "age_seconds": age_seconds,
+                            "length": stream_length,
+                        }
+                    )
 
             except Exception as e:
                 logger.debug(f"Error checking stream {stream_name}: {e}")
@@ -657,7 +691,7 @@ async def cleanup_old_sessions(request, max_age_seconds: int = 3600):
             "cleaned_streams": cleaned_streams,
             "cleaned_session_details": old_sessions,
             "cleaned_stream_details": old_streams,
-            "timestamp": time.time()
+            "timestamp": time.time(),
         }
 
     except Exception as e:
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py
index bf3ce1b1..274861c8 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py
@@ -7,30 +7,29 @@
 import logging
 import os
 import re
-import signal
 import shutil
+import signal
 import time
 import warnings
 from datetime import UTC, datetime
+from io import StringIO
 from pathlib import Path
 from typing import Optional
 
-from io import StringIO
-
-from ruamel.yaml import YAML
 from fastapi import HTTPException
+from ruamel.yaml import YAML
 
 from advanced_omi_backend.config import (
     get_diarization_settings as load_diarization_settings,
 )
 from advanced_omi_backend.config import get_misc_settings as load_misc_settings
-from advanced_omi_backend.config import (
-    save_diarization_settings,
-    save_misc_settings,
+from advanced_omi_backend.config import save_diarization_settings, save_misc_settings
+from advanced_omi_backend.config_loader import get_plugins_yml_path, save_config_section
+from advanced_omi_backend.model_registry import (
+    _find_config_path,
+    get_models_registry,
+    load_models_config,
 )
-from advanced_omi_backend.config_loader import get_plugins_yml_path
-from advanced_omi_backend.config_loader import save_config_section
-from advanced_omi_backend.model_registry import _find_config_path, get_models_registry, load_models_config
 from advanced_omi_backend.models.user import User
 
 logger = logging.getLogger(__name__)
@@ -43,7 +42,7 @@
 async def get_config_diagnostics():
     """
     Get comprehensive configuration diagnostics.
-    
+
     Returns warnings, errors, and status for all configuration components.
     """
     diagnostics = {
@@ -52,9 +51,9 @@ async def get_config_diagnostics():
         "issues": [],
         "warnings": [],
         "info": [],
-        "components": {}
+        "components": {},
     }
-    
+
     # Test OmegaConf configuration loading
     try:
         from advanced_omi_backend.config_loader import load_config
@@ -63,7 +62,7 @@ async def get_config_diagnostics():
         with warnings.catch_warnings(record=True) as w:
             warnings.simplefilter("always")
             config = load_config(force_reload=True)
-            
+
             # Check for OmegaConf warnings
             for warning in w:
                 warning_msg = str(warning.message)
@@ -71,148 +70,168 @@ async def get_config_diagnostics():
                     # Extract the variable name from warning
                     if "variable '" in warning_msg.lower():
                         var_name = warning_msg.split("'")[1]
-                        diagnostics["warnings"].append({
-                            "component": "OmegaConf",
-                            "severity": "warning",
-                            "message": f"Environment variable '{var_name}' not set (using empty default)",
-                            "resolution": f"Set {var_name} in .env file if needed"
-                        })
-        
+                        diagnostics["warnings"].append(
+                            {
+                                "component": "OmegaConf",
+                                "severity": "warning",
+                                "message": f"Environment variable '{var_name}' not set (using empty default)",
+                                "resolution": f"Set {var_name} in .env file if needed",
+                            }
+                        )
+
         diagnostics["components"]["omegaconf"] = {
             "status": "healthy",
-            "message": "Configuration loaded successfully"
+            "message": "Configuration loaded successfully",
         }
     except Exception as e:
         diagnostics["overall_status"] = "unhealthy"
-        diagnostics["issues"].append({
-            "component": "OmegaConf",
-            "severity": "error",
-            "message": f"Failed to load configuration: {str(e)}",
-            "resolution": "Check config/defaults.yml and config/config.yml syntax"
-        })
-        diagnostics["components"]["omegaconf"] = {
-            "status": "unhealthy",
-            "message": str(e)
-        }
-    
+        diagnostics["issues"].append(
+            {
+                "component": "OmegaConf",
+                "severity": "error",
+                "message": f"Failed to load configuration: {str(e)}",
+                "resolution": "Check config/defaults.yml and config/config.yml syntax",
+            }
+        )
+        diagnostics["components"]["omegaconf"] = {"status": "unhealthy", "message": str(e)}
+
     # Test model registry
     try:
         from advanced_omi_backend.model_registry import get_models_registry
-        
+
         with warnings.catch_warnings(record=True) as w:
             warnings.simplefilter("always")
             registry = get_models_registry()
-            
+
             # Capture model loading warnings
             for warning in w:
                 warning_msg = str(warning.message)
-                diagnostics["warnings"].append({
-                    "component": "Model Registry",
-                    "severity": "warning",
-                    "message": warning_msg,
-                    "resolution": "Check model definitions in config/defaults.yml"
-                })
-        
+                diagnostics["warnings"].append(
+                    {
+                        "component": "Model Registry",
+                        "severity": "warning",
+                        "message": warning_msg,
+                        "resolution": "Check model definitions in config/defaults.yml",
+                    }
+                )
+
         if registry:
             diagnostics["components"]["model_registry"] = {
                 "status": "healthy",
                 "message": f"Loaded {len(registry.models)} models",
                 "details": {
                     "total_models": len(registry.models),
-                    "defaults": dict(registry.defaults) if registry.defaults else {}
-                }
+                    "defaults": dict(registry.defaults) if registry.defaults else {},
+                },
             }
-            
+
             # Check critical models
             stt = registry.get_default("stt")
             stt_stream = registry.get_default("stt_stream")
             llm = registry.get_default("llm")
-            
+
             # STT check
             if stt:
                 if stt.api_key:
-                    diagnostics["info"].append({
-                        "component": "STT (Batch)",
-                        "message": f"Configured: {stt.name} ({stt.model_provider}) - API key present"
-                    })
+                    diagnostics["info"].append(
+                        {
+                            "component": "STT (Batch)",
+                            "message": f"Configured: {stt.name} ({stt.model_provider}) - API key present",
+                        }
+                    )
                 else:
-                    diagnostics["warnings"].append({
-                        "component": "STT (Batch)",
-                        "severity": "warning",
-                        "message": f"{stt.name} ({stt.model_provider}) - No API key configured",
-                        "resolution": "Transcription can fail without API key"
-                    })
+                    diagnostics["warnings"].append(
+                        {
+                            "component": "STT (Batch)",
+                            "severity": "warning",
+                            "message": f"{stt.name} ({stt.model_provider}) - No API key configured",
+                            "resolution": "Transcription can fail without API key",
+                        }
+                    )
             else:
-                diagnostics["issues"].append({
-                    "component": "STT (Batch)",
-                    "severity": "error",
-                    "message": "No batch STT model configured",
-                    "resolution": "Set defaults.stt in config.yml"
-                })
+                diagnostics["issues"].append(
+                    {
+                        "component": "STT (Batch)",
+                        "severity": "error",
+                        "message": "No batch STT model configured",
+                        "resolution": "Set defaults.stt in config.yml",
+                    }
+                )
                 diagnostics["overall_status"] = "partial"
-            
+
             # Streaming STT check
             if stt_stream:
                 if stt_stream.api_key:
-                    diagnostics["info"].append({
-                        "component": "STT (Streaming)",
-                        "message": f"Configured: {stt_stream.name} ({stt_stream.model_provider}) - API key present"
-                    })
+                    diagnostics["info"].append(
+                        {
+                            "component": "STT (Streaming)",
+                            "message": f"Configured: {stt_stream.name} ({stt_stream.model_provider}) - API key present",
+                        }
+                    )
                 else:
-                    diagnostics["warnings"].append({
+                    diagnostics["warnings"].append(
+                        {
+                            "component": "STT (Streaming)",
+                            "severity": "warning",
+                            "message": f"{stt_stream.name} ({stt_stream.model_provider}) - No API key configured",
+                            "resolution": "Real-time transcription can fail without API key",
+                        }
+                    )
+            else:
+                diagnostics["warnings"].append(
+                    {
                         "component": "STT (Streaming)",
                         "severity": "warning",
-                        "message": f"{stt_stream.name} ({stt_stream.model_provider}) - No API key configured",
-                        "resolution": "Real-time transcription can fail without API key"
-                    })
-            else:
-                diagnostics["warnings"].append({
-                    "component": "STT (Streaming)",
-                    "severity": "warning",
-                    "message": "No streaming STT model configured - streaming worker disabled",
-                    "resolution": "Set defaults.stt_stream in config.yml for WebSocket transcription"
-                })
-            
+                        "message": "No streaming STT model configured - streaming worker disabled",
+                        "resolution": "Set defaults.stt_stream in config.yml for WebSocket transcription",
+                    }
+                )
+
             # LLM check
             if llm:
                 if llm.api_key:
-                    diagnostics["info"].append({
-                        "component": "LLM",
-                        "message": f"Configured: {llm.name} ({llm.model_provider}) - API key present"
-                    })
+                    diagnostics["info"].append(
+                        {
+                            "component": "LLM",
+                            "message": f"Configured: {llm.name} ({llm.model_provider}) - API key present",
+                        }
+                    )
                 else:
-                    diagnostics["warnings"].append({
-                        "component": "LLM",
-                        "severity": "warning",
-                        "message": f"{llm.name} ({llm.model_provider}) - No API key configured",
-                        "resolution": "Memory extraction can fail without API key"
-                    })
-            
+                    diagnostics["warnings"].append(
+                        {
+                            "component": "LLM",
+                            "severity": "warning",
+                            "message": f"{llm.name} ({llm.model_provider}) - No API key configured",
+                            "resolution": "Memory extraction can fail without API key",
+                        }
+                    )
+
         else:
             diagnostics["overall_status"] = "unhealthy"
-            diagnostics["issues"].append({
-                "component": "Model Registry",
-                "severity": "error",
-                "message": "Failed to load model registry",
-                "resolution": "Check config/defaults.yml for syntax errors"
-            })
+            diagnostics["issues"].append(
+                {
+                    "component": "Model Registry",
+                    "severity": "error",
+                    "message": "Failed to load model registry",
+                    "resolution": "Check config/defaults.yml for syntax errors",
+                }
+            )
             diagnostics["components"]["model_registry"] = {
                 "status": "unhealthy",
-                "message": "Registry failed to load"
+                "message": "Registry failed to load",
             }
     except Exception as e:
         diagnostics["overall_status"] = "partial"
-        diagnostics["issues"].append({
-            "component": "Model Registry",
-            "severity": "error",
-            "message": f"Error loading registry: {str(e)}",
-            "resolution": "Check logs for detailed error information"
-        })
-        diagnostics["components"]["model_registry"] = {
-            "status": "unhealthy",
-            "message": str(e)
-        }
-    
+        diagnostics["issues"].append(
+            {
+                "component": "Model Registry",
+                "severity": "error",
+                "message": f"Error loading registry: {str(e)}",
+                "resolution": "Check logs for detailed error information",
+            }
+        )
+        diagnostics["components"]["model_registry"] = {"status": "unhealthy", "message": str(e)}
+
     # Check environment variables (only warn about keys relevant to configured providers)
     env_checks = [
         ("AUTH_SECRET_KEY", "Required for authentication"),
@@ -235,18 +254,22 @@ async def get_config_diagnostics():
             if provider == "deepgram":
                 env_checks.append(("DEEPGRAM_API_KEY", "Required for Deepgram transcription"))
             elif provider == "smallest":
-                env_checks.append(("SMALLEST_API_KEY", "Required for Smallest.ai Pulse transcription"))
-    
+                env_checks.append(
+                    ("SMALLEST_API_KEY", "Required for Smallest.ai Pulse transcription")
+                )
+
     for env_var, description in env_checks:
         value = os.getenv(env_var)
         if not value or value == "":
-            diagnostics["warnings"].append({
-                "component": "Environment Variables",
-                "severity": "warning",
-                "message": f"{env_var} not set - {description}",
-                "resolution": f"Set {env_var} in .env file"
-            })
-    
+            diagnostics["warnings"].append(
+                {
+                    "component": "Environment Variables",
+                    "severity": "warning",
+                    "message": f"{env_var} not set - {description}",
+                    "resolution": f"Set {env_var} in .env file",
+                }
+            )
+
     return diagnostics
 
 
@@ -288,7 +311,7 @@ async def get_observability_config():
 
     Returns non-secret data only (enabled status and browser URL).
     """
-    from advanced_omi_backend.openai_factory import is_langfuse_enabled
+    from advanced_omi_backend.observability.otel_setup import is_langfuse_enabled
 
     enabled = is_langfuse_enabled()
     session_base_url = None
@@ -321,10 +344,7 @@ async def get_diarization_settings():
     try:
         # Get settings using OmegaConf
         settings = load_diarization_settings()
-        return {
-            "settings": settings,
-            "status": "success"
-        }
+        return {"settings": settings, "status": "success"}
     except Exception as e:
         logger.exception("Error getting diarization settings")
         raise e
@@ -335,8 +355,13 @@ async def save_diarization_settings_controller(settings: dict):
     try:
         # Validate settings
         valid_keys = {
-            "diarization_source", "similarity_threshold", "min_duration", "collar",
-            "min_duration_off", "min_speakers", "max_speakers"
+            "diarization_source",
+            "similarity_threshold",
+            "min_duration",
+            "collar",
+            "min_duration_off",
+            "min_speakers",
+            "max_speakers",
         }
 
         # Filter to only valid keys (allow round-trip GET→POST)
@@ -348,13 +373,20 @@ async def save_diarization_settings_controller(settings: dict):
             # Type validation for known keys only
             if key in ["min_speakers", "max_speakers"]:
                 if not isinstance(value, int) or value < 1 or value > 20:
-                    raise HTTPException(status_code=400, detail=f"Invalid value for {key}: must be integer 1-20")
+                    raise HTTPException(
+                        status_code=400, detail=f"Invalid value for {key}: must be integer 1-20"
+                    )
             elif key == "diarization_source":
                 if not isinstance(value, str) or value not in ["pyannote", "deepgram"]:
-                    raise HTTPException(status_code=400, detail=f"Invalid value for {key}: must be 'pyannote' or 'deepgram'")
+                    raise HTTPException(
+                        status_code=400,
+                        detail=f"Invalid value for {key}: must be 'pyannote' or 'deepgram'",
+                    )
             else:
                 if not isinstance(value, (int, float)) or value < 0:
-                    raise HTTPException(status_code=400, detail=f"Invalid value for {key}: must be positive number")
+                    raise HTTPException(
+                        status_code=400, detail=f"Invalid value for {key}: must be positive number"
+                    )
 
             filtered_settings[key] = value
 
@@ -373,14 +405,14 @@ async def save_diarization_settings_controller(settings: dict):
             return {
                 "message": "Diarization settings saved successfully",
                 "settings": current_settings,
-                "status": "success"
+                "status": "success",
             }
         else:
             logger.warning("Settings save failed")
             return {
                 "message": "Settings save failed",
                 "settings": current_settings,
-                "status": "error"
+                "status": "error",
             }
 
     except Exception as e:
@@ -393,10 +425,7 @@ async def get_misc_settings():
     try:
         # Get settings using OmegaConf
         settings = load_misc_settings()
-        return {
-            "settings": settings,
-            "status": "success"
-        }
+        return {"settings": settings, "status": "success"}
     except Exception as e:
         logger.exception("Error getting misc settings")
         raise e
@@ -406,7 +435,12 @@ async def save_misc_settings_controller(settings: dict):
     """Save miscellaneous settings."""
     try:
         # Validate settings
-        boolean_keys = {"always_persist_enabled", "use_provider_segments", "per_segment_speaker_id", "always_batch_retranscribe"}
+        boolean_keys = {
+            "always_persist_enabled",
+            "use_provider_segments",
+            "per_segment_speaker_id",
+            "always_batch_retranscribe",
+        }
         integer_keys = {"transcription_job_timeout_seconds"}
         valid_keys = boolean_keys | integer_keys
 
@@ -419,10 +453,15 @@ async def save_misc_settings_controller(settings: dict):
             # Type validation
             if key in boolean_keys:
                 if not isinstance(value, bool):
-                    raise HTTPException(status_code=400, detail=f"Invalid value for {key}: must be boolean")
+                    raise HTTPException(
+                        status_code=400, detail=f"Invalid value for {key}: must be boolean"
+                    )
             elif key == "transcription_job_timeout_seconds":
                 if not isinstance(value, int) or value < 60 or value > 7200:
-                    raise HTTPException(status_code=400, detail=f"Invalid value for {key}: must be integer between 60 and 7200")
+                    raise HTTPException(
+                        status_code=400,
+                        detail=f"Invalid value for {key}: must be integer between 60 and 7200",
+                    )
 
             filtered_settings[key] = value
 
@@ -439,14 +478,14 @@ async def save_misc_settings_controller(settings: dict):
             return {
                 "message": "Miscellaneous settings saved successfully",
                 "settings": updated_settings,
-                "status": "success"
+                "status": "success",
             }
         else:
             logger.warning("Settings save failed")
             return {
                 "message": "Settings save failed",
                 "settings": load_misc_settings(),
-                "status": "error"
+                "status": "error",
             }
 
     except HTTPException:
@@ -472,9 +511,7 @@ async def get_cleanup_settings_controller(user: User) -> dict:
 
 
 async def save_cleanup_settings_controller(
-    auto_cleanup_enabled: bool,
-    retention_days: int,
-    user: User
+    auto_cleanup_enabled: bool, retention_days: int, user: User
 ) -> dict:
     """
     Save cleanup settings (admin only).
@@ -504,19 +541,20 @@ async def save_cleanup_settings_controller(
 
     # Create settings object
     settings = CleanupSettings(
-        auto_cleanup_enabled=auto_cleanup_enabled,
-        retention_days=retention_days
+        auto_cleanup_enabled=auto_cleanup_enabled, retention_days=retention_days
     )
 
     # Save using OmegaConf
     save_cleanup_settings(settings)
 
-    logger.info(f"Admin {user.email} updated cleanup settings: auto_cleanup={auto_cleanup_enabled}, retention={retention_days}d")
+    logger.info(
+        f"Admin {user.email} updated cleanup settings: auto_cleanup={auto_cleanup_enabled}, retention={retention_days}d"
+    )
 
     return {
         "auto_cleanup_enabled": settings.auto_cleanup_enabled,
         "retention_days": settings.retention_days,
-        "message": "Cleanup settings saved successfully"
+        "message": "Cleanup settings saved successfully",
     }
 
 
@@ -526,7 +564,7 @@ async def get_speaker_configuration(user: User):
         return {
             "primary_speakers": user.primary_speakers,
             "user_id": user.user_id,
-            "status": "success"
+            "status": "success",
         }
     except Exception as e:
         logger.exception(f"Error getting speaker configuration for user {user.user_id}")
@@ -540,30 +578,32 @@ async def update_speaker_configuration(user: User, primary_speakers: list[dict])
         for speaker in primary_speakers:
             if not isinstance(speaker, dict):
                 raise ValueError("Each speaker must be a dictionary")
-            
+
             required_fields = ["speaker_id", "name", "user_id"]
             for field in required_fields:
                 if field not in speaker:
                     raise ValueError(f"Missing required field: {field}")
-        
+
         # Enforce server-side user_id and add timestamp to each speaker
         for speaker in primary_speakers:
             speaker["user_id"] = user.user_id  # Override client-supplied user_id
             speaker["selected_at"] = datetime.now(UTC).isoformat()
-        
+
         # Update user model
         user.primary_speakers = primary_speakers
         await user.save()
-        
-        logger.info(f"Updated primary speakers configuration for user {user.user_id}: {len(primary_speakers)} speakers")
-        
+
+        logger.info(
+            f"Updated primary speakers configuration for user {user.user_id}: {len(primary_speakers)} speakers"
+        )
+
         return {
             "message": "Primary speakers configuration updated successfully",
             "primary_speakers": primary_speakers,
             "count": len(primary_speakers),
-            "status": "success"
+            "status": "success",
         }
-        
+
     except Exception as e:
         logger.exception(f"Error updating speaker configuration for user {user.user_id}")
         raise e
@@ -578,25 +618,25 @@ async def get_enrolled_speakers(user: User):
 
         # Initialize speaker recognition client
         speaker_client = SpeakerRecognitionClient()
-        
+
         if not speaker_client.enabled:
             return {
                 "speakers": [],
                 "service_available": False,
                 "message": "Speaker recognition service is not configured or disabled",
-                "status": "success"
+                "status": "success",
             }
-        
+
         # Get enrolled speakers - using hardcoded user_id=1 for now (as noted in speaker_recognition_client.py)
         speakers = await speaker_client.get_enrolled_speakers(user_id="1")
-        
+
         return {
             "speakers": speakers.get("speakers", []) if speakers else [],
             "service_available": True,
             "message": "Successfully retrieved enrolled speakers",
-            "status": "success"
+            "status": "success",
         }
-        
+
     except Exception as e:
         logger.exception(f"Error getting enrolled speakers for user {user.user_id}")
         raise e
@@ -611,25 +651,25 @@ async def get_speaker_service_status():
 
         # Initialize speaker recognition client
         speaker_client = SpeakerRecognitionClient()
-        
+
         if not speaker_client.enabled:
             return {
                 "service_available": False,
                 "healthy": False,
                 "message": "Speaker recognition service is not configured or disabled",
-                "status": "disabled"
+                "status": "disabled",
             }
-        
+
         # Perform health check
         health_result = await speaker_client.health_check()
-        
+
         if health_result:
             return {
                 "service_available": True,
                 "healthy": True,
                 "message": "Speaker recognition service is healthy",
                 "service_url": speaker_client.service_url,
-                "status": "healthy"
+                "status": "healthy",
             }
         else:
             return {
@@ -637,17 +677,17 @@ async def get_speaker_service_status():
                 "healthy": False,
                 "message": "Speaker recognition service is not responding",
                 "service_url": speaker_client.service_url,
-                "status": "unhealthy"
+                "status": "unhealthy",
             }
-        
+
     except Exception as e:
         logger.exception("Error checking speaker service status")
         raise e
 
 
-
 # Memory Configuration Management Functions
 
+
 async def get_memory_config_raw():
     """Get current memory configuration (memory section of config.yml) as YAML."""
     try:
@@ -655,7 +695,7 @@ async def get_memory_config_raw():
         if not os.path.exists(cfg_path):
             raise FileNotFoundError(f"Config file not found: {cfg_path}")
 
-        with open(cfg_path, 'r') as f:
+        with open(cfg_path, "r") as f:
             data = _yaml.load(f) or {}
         memory_section = data.get("memory", {})
         stream = StringIO()
@@ -691,10 +731,10 @@ async def update_memory_config_raw(config_yaml: str):
         shutil.copy2(cfg_path, backup_path)
 
         # Update memory section and write file
-        with open(cfg_path, 'r') as f:
+        with open(cfg_path, "r") as f:
             data = _yaml.load(f) or {}
         data["memory"] = new_mem
-        with open(cfg_path, 'w') as f:
+        with open(cfg_path, "w") as f:
             _yaml.dump(data, f)
 
         # Reload registry
@@ -736,7 +776,11 @@ async def reload_memory_config():
     try:
         cfg_path = _find_config_path()
         load_models_config(force_reload=True)
-        return {"message": "Configuration reloaded", "config_path": str(cfg_path), "status": "success"}
+        return {
+            "message": "Configuration reloaded",
+            "config_path": str(cfg_path),
+            "status": "success",
+        }
     except Exception as e:
         logger.exception("Error reloading config")
         raise e
@@ -758,7 +802,7 @@ async def delete_all_user_memories(user: User):
             "message": f"Successfully deleted {deleted_count} memories",
             "deleted_count": deleted_count,
             "user_id": user.user_id,
-            "status": "success"
+            "status": "success",
         }
 
     except Exception as e:
@@ -768,6 +812,7 @@ async def delete_all_user_memories(user: User):
 
 # Memory Provider Configuration Functions
 
+
 async def get_memory_provider():
     """Get current memory provider configuration."""
     try:
@@ -782,7 +827,7 @@ async def get_memory_provider():
         return {
             "current_provider": current_provider,
             "available_providers": available_providers,
-            "status": "success"
+            "status": "success",
         }
 
     except Exception as e:
@@ -798,7 +843,9 @@ async def set_memory_provider(provider: str):
         valid_providers = ["chronicle", "openmemory_mcp"]
 
         if provider not in valid_providers:
-            raise ValueError(f"Invalid provider '{provider}'. Valid providers: {', '.join(valid_providers)}")
+            raise ValueError(
+                f"Invalid provider '{provider}'. Valid providers: {', '.join(valid_providers)}"
+            )
 
         # Path to .env file (assuming we're running from backends/advanced/)
         env_path = os.path.join(os.getcwd(), ".env")
@@ -807,7 +854,7 @@ async def set_memory_provider(provider: str):
             raise FileNotFoundError(f".env file not found at {env_path}")
 
         # Read current .env file
-        with open(env_path, 'r') as file:
+        with open(env_path, "r") as file:
             lines = file.readlines()
 
         # Update or add MEMORY_PROVIDER line
@@ -831,7 +878,7 @@ async def set_memory_provider(provider: str):
         logger.info(f"Created .env backup at {backup_path}")
 
         # Write updated .env file
-        with open(env_path, 'w') as file:
+        with open(env_path, "w") as file:
             file.writelines(updated_lines)
 
         # Update environment variable for current process
@@ -845,7 +892,7 @@ async def set_memory_provider(provider: str):
             "env_path": env_path,
             "backup_created": True,
             "requires_restart": True,
-            "status": "success"
+            "status": "success",
         }
 
     except Exception as e:
@@ -855,6 +902,7 @@ async def set_memory_provider(provider: str):
 
 # LLM Operations Configuration Functions
 
+
 async def get_llm_operations():
     """Get LLM operation configurations and available models."""
     try:
@@ -906,25 +954,36 @@ async def save_llm_operations(operations: dict):
 
             extra_keys = set(op_value.keys()) - valid_keys
             if extra_keys:
-                raise HTTPException(status_code=400, detail=f"Invalid keys for '{op_name}': {extra_keys}")
+                raise HTTPException(
+                    status_code=400, detail=f"Invalid keys for '{op_name}': {extra_keys}"
+                )
 
             if "temperature" in op_value and op_value["temperature"] is not None:
                 t = op_value["temperature"]
                 if not isinstance(t, (int, float)) or t < 0 or t > 2:
-                    raise HTTPException(status_code=400, detail=f"Invalid temperature for '{op_name}': must be 0-2")
+                    raise HTTPException(
+                        status_code=400, detail=f"Invalid temperature for '{op_name}': must be 0-2"
+                    )
 
             if "max_tokens" in op_value and op_value["max_tokens"] is not None:
                 mt = op_value["max_tokens"]
                 if not isinstance(mt, int) or mt <= 0:
-                    raise HTTPException(status_code=400, detail=f"Invalid max_tokens for '{op_name}': must be positive int")
+                    raise HTTPException(
+                        status_code=400,
+                        detail=f"Invalid max_tokens for '{op_name}': must be positive int",
+                    )
 
             if "model" in op_value and op_value["model"] is not None:
                 if not registry.get_by_name(op_value["model"]):
-                    raise HTTPException(status_code=400, detail=f"Model '{op_value['model']}' not found in registry")
+                    raise HTTPException(
+                        status_code=400, detail=f"Model '{op_value['model']}' not found in registry"
+                    )
 
             if "response_format" in op_value and op_value["response_format"] is not None:
                 if op_value["response_format"] != "json":
-                    raise HTTPException(status_code=400, detail=f"response_format must be 'json' or null")
+                    raise HTTPException(
+                        status_code=400, detail=f"response_format must be 'json' or null"
+                    )
 
         if save_config_section("llm_operations", operations):
             load_models_config(force_reload=True)
@@ -958,11 +1017,21 @@ async def test_llm_model(model_name: Optional[str]):
         if model_name:
             model_def = registry.get_by_name(model_name)
             if not model_def:
-                return {"success": False, "model_name": model_name, "error": f"Model '{model_name}' not found", "status": "error"}
+                return {
+                    "success": False,
+                    "model_name": model_name,
+                    "error": f"Model '{model_name}' not found",
+                    "status": "error",
+                }
         else:
             model_def = registry.get_default("llm")
             if not model_def:
-                return {"success": False, "model_name": None, "error": "No default LLM configured", "status": "error"}
+                return {
+                    "success": False,
+                    "model_name": None,
+                    "error": "No default LLM configured",
+                    "status": "error",
+                }
 
         client = create_openai_client(
             api_key=model_def.api_key or "",
@@ -998,6 +1067,7 @@ async def test_llm_model(model_name: Optional[str]):
 
 # Chat Configuration Management Functions
 
+
 async def get_chat_config_yaml() -> str:
     """Get chat system prompt as plain text."""
     try:
@@ -1012,11 +1082,11 @@ async def get_chat_config_yaml() -> str:
         if not os.path.exists(config_path):
             return default_prompt
 
-        with open(config_path, 'r') as f:
+        with open(config_path, "r") as f:
             full_config = _yaml.load(f) or {}
 
-        chat_config = full_config.get('chat', {})
-        system_prompt = chat_config.get('system_prompt', default_prompt)
+        chat_config = full_config.get("chat", {})
+        system_prompt = chat_config.get("system_prompt", default_prompt)
 
         # Return just the prompt text, not the YAML structure
         return system_prompt
@@ -1042,26 +1112,26 @@ async def save_chat_config_yaml(prompt_text: str) -> dict:
             raise ValueError("Prompt too long (maximum 10000 characters)")
 
         # Create chat config dict
-        chat_config = {'system_prompt': prompt_text}
+        chat_config = {"system_prompt": prompt_text}
 
         # Load full config
         if os.path.exists(config_path):
-            with open(config_path, 'r') as f:
+            with open(config_path, "r") as f:
                 full_config = _yaml.load(f) or {}
         else:
             full_config = {}
 
         # Backup existing config
         if os.path.exists(config_path):
-            backup_path = str(config_path) + '.backup'
+            backup_path = str(config_path) + ".backup"
             shutil.copy2(config_path, backup_path)
             logger.info(f"Created config backup at {backup_path}")
 
         # Update chat section
-        full_config['chat'] = chat_config
+        full_config["chat"] = chat_config
 
         # Save
-        with open(config_path, 'w') as f:
+        with open(config_path, "w") as f:
             _yaml.dump(full_config, f)
 
         # Reload config in memory (hot-reload)
@@ -1098,6 +1168,7 @@ async def validate_chat_config_yaml(prompt_text: str) -> dict:
 
 # Plugin Configuration Management Functions
 
+
 async def get_plugins_config_yaml() -> str:
     """Get plugins configuration as YAML text."""
     try:
@@ -1120,7 +1191,7 @@ async def get_plugins_config_yaml() -> str:
         if not plugins_yml_path.exists():
             return default_config
 
-        with open(plugins_yml_path, 'r') as f:
+        with open(plugins_yml_path, "r") as f:
             yaml_content = f.read()
 
         return yaml_content
@@ -1142,7 +1213,7 @@ async def save_plugins_config_yaml(yaml_content: str) -> dict:
                 raise ValueError("Configuration must be a YAML dictionary")
 
             # Validate has 'plugins' key
-            if 'plugins' not in parsed_config:
+            if "plugins" not in parsed_config:
                 raise ValueError("Configuration must contain 'plugins' key")
 
         except ValueError:
@@ -1155,12 +1226,12 @@ async def save_plugins_config_yaml(yaml_content: str) -> dict:
 
         # Backup existing config
         if plugins_yml_path.exists():
-            backup_path = str(plugins_yml_path) + '.backup'
+            backup_path = str(plugins_yml_path) + ".backup"
             shutil.copy2(plugins_yml_path, backup_path)
             logger.info(f"Created plugins config backup at {backup_path}")
 
         # Save new config
-        with open(plugins_yml_path, 'w') as f:
+        with open(plugins_yml_path, "w") as f:
             f.write(yaml_content)
 
         # Hot-reload plugins and signal worker restart
@@ -1201,35 +1272,50 @@ async def validate_plugins_config_yaml(yaml_content: str) -> dict:
         if not isinstance(parsed_config, dict):
             return {"valid": False, "error": "Configuration must be a YAML dictionary"}
 
-        if 'plugins' not in parsed_config:
+        if "plugins" not in parsed_config:
             return {"valid": False, "error": "Configuration must contain 'plugins' key"}
 
-        plugins = parsed_config['plugins']
+        plugins = parsed_config["plugins"]
         if not isinstance(plugins, dict):
             return {"valid": False, "error": "'plugins' must be a dictionary"}
 
         # Validate each plugin
-        valid_access_levels = ['transcript', 'conversation', 'memory']
-        valid_trigger_types = ['wake_word', 'always', 'conditional']
+        valid_access_levels = ["transcript", "conversation", "memory"]
+        valid_trigger_types = ["wake_word", "always", "conditional"]
 
         for plugin_id, plugin_config in plugins.items():
             if not isinstance(plugin_config, dict):
-                return {"valid": False, "error": f"Plugin '{plugin_id}' config must be a dictionary"}
+                return {
+                    "valid": False,
+                    "error": f"Plugin '{plugin_id}' config must be a dictionary",
+                }
 
             # Check required fields
-            if 'enabled' in plugin_config and not isinstance(plugin_config['enabled'], bool):
+            if "enabled" in plugin_config and not isinstance(plugin_config["enabled"], bool):
                 return {"valid": False, "error": f"Plugin '{plugin_id}': 'enabled' must be boolean"}
 
-            if 'access_level' in plugin_config and plugin_config['access_level'] not in valid_access_levels:
-                return {"valid": False, "error": f"Plugin '{plugin_id}': invalid access_level (must be one of {valid_access_levels})"}
+            if (
+                "access_level" in plugin_config
+                and plugin_config["access_level"] not in valid_access_levels
+            ):
+                return {
+                    "valid": False,
+                    "error": f"Plugin '{plugin_id}': invalid access_level (must be one of {valid_access_levels})",
+                }
 
-            if 'trigger' in plugin_config:
-                trigger = plugin_config['trigger']
+            if "trigger" in plugin_config:
+                trigger = plugin_config["trigger"]
                 if not isinstance(trigger, dict):
-                    return {"valid": False, "error": f"Plugin '{plugin_id}': 'trigger' must be a dictionary"}
+                    return {
+                        "valid": False,
+                        "error": f"Plugin '{plugin_id}': 'trigger' must be a dictionary",
+                    }
 
-                if 'type' in trigger and trigger['type'] not in valid_trigger_types:
-                    return {"valid": False, "error": f"Plugin '{plugin_id}': invalid trigger type (must be one of {valid_trigger_types})"}
+                if "type" in trigger and trigger["type"] not in valid_trigger_types:
+                    return {
+                        "valid": False,
+                        "error": f"Plugin '{plugin_id}': invalid trigger type (must be one of {valid_trigger_types})",
+                    }
 
         return {"valid": True, "message": "Configuration is valid"}
 
@@ -1314,9 +1400,11 @@ async def reload_plugins_controller(app=None) -> dict:
 
     return {
         "success": reload_result.get("success", False),
-        "message": "Plugins reloaded and worker restart signaled"
-        if worker_signal_sent
-        else "Plugins reloaded but worker restart signal failed",
+        "message": (
+            "Plugins reloaded and worker restart signaled"
+            if worker_signal_sent
+            else "Plugins reloaded but worker restart signal failed"
+        ),
         "reload": reload_result,
         "worker_signal_sent": worker_signal_sent,
     }
@@ -1324,6 +1412,7 @@ async def reload_plugins_controller(app=None) -> dict:
 
 # Structured Plugin Configuration Management Functions (Form-based UI)
 
+
 async def get_plugins_metadata() -> dict:
     """Get plugin metadata for form-based configuration UI.
 
@@ -1350,19 +1439,17 @@ async def get_plugins_metadata() -> dict:
         orchestration_configs = {}
 
         if plugins_yml_path.exists():
-            with open(plugins_yml_path, 'r') as f:
+            with open(plugins_yml_path, "r") as f:
                 plugins_data = _yaml.load(f) or {}
-                orchestration_configs = plugins_data.get('plugins', {})
+                orchestration_configs = plugins_data.get("plugins", {})
 
         # Build metadata for each plugin
         plugins_metadata = []
         for plugin_id, plugin_class in discovered_plugins.items():
             # Get orchestration config (or empty dict if not configured)
-            orchestration_config = orchestration_configs.get(plugin_id, {
-                'enabled': False,
-                'events': [],
-                'condition': {'type': 'always'}
-            })
+            orchestration_config = orchestration_configs.get(
+                plugin_id, {"enabled": False, "events": [], "condition": {"type": "always"}}
+            )
 
             # Get complete metadata including schema
             metadata = get_plugin_metadata(plugin_id, plugin_class, orchestration_config)
@@ -1370,10 +1457,7 @@ async def get_plugins_metadata() -> dict:
 
         logger.info(f"Retrieved metadata for {len(plugins_metadata)} plugins")
 
-        return {
-            "plugins": plugins_metadata,
-            "status": "success"
-        }
+        return {"plugins": plugins_metadata, "status": "success"}
 
     except Exception as e:
         logger.exception("Error getting plugins metadata")
@@ -1396,7 +1480,10 @@ async def update_plugin_config_structured(plugin_id: str, config: dict) -> dict:
         Success message with list of updated files
     """
     try:
-        from advanced_omi_backend.services.plugin_service import _get_plugins_dir, discover_plugins
+        from advanced_omi_backend.services.plugin_service import (
+            _get_plugins_dir,
+            discover_plugins,
+        )
 
         # Validate plugin exists
         discovered_plugins = discover_plugins()
@@ -1406,84 +1493,83 @@ async def update_plugin_config_structured(plugin_id: str, config: dict) -> dict:
         updated_files = []
 
         # 1. Update config/plugins.yml (orchestration)
-        if 'orchestration' in config:
+        if "orchestration" in config:
             plugins_yml_path = get_plugins_yml_path()
 
             # Load current plugins.yml
             if plugins_yml_path.exists():
-                with open(plugins_yml_path, 'r') as f:
+                with open(plugins_yml_path, "r") as f:
                     plugins_data = _yaml.load(f) or {}
             else:
                 plugins_data = {}
 
-            if 'plugins' not in plugins_data:
-                plugins_data['plugins'] = {}
+            if "plugins" not in plugins_data:
+                plugins_data["plugins"] = {}
 
             # Update orchestration config
-            orchestration = config['orchestration']
-            plugins_data['plugins'][plugin_id] = {
-                'enabled': orchestration.get('enabled', False),
-                'events': orchestration.get('events', []),
-                'condition': orchestration.get('condition', {'type': 'always'})
+            orchestration = config["orchestration"]
+            plugins_data["plugins"][plugin_id] = {
+                "enabled": orchestration.get("enabled", False),
+                "events": orchestration.get("events", []),
+                "condition": orchestration.get("condition", {"type": "always"}),
             }
 
             # Create backup
             if plugins_yml_path.exists():
-                backup_path = str(plugins_yml_path) + '.backup'
+                backup_path = str(plugins_yml_path) + ".backup"
                 shutil.copy2(plugins_yml_path, backup_path)
 
             # Create config directory if needed
             plugins_yml_path.parent.mkdir(parents=True, exist_ok=True)
 
             # Write updated plugins.yml
-            with open(plugins_yml_path, 'w') as f:
+            with open(plugins_yml_path, "w") as f:
                 _yaml.dump(plugins_data, f)
 
             updated_files.append(str(plugins_yml_path))
             logger.info(f"Updated orchestration config for '{plugin_id}' in {plugins_yml_path}")
 
         # 2. Update plugins/{plugin_id}/config.yml (settings with env var references)
-        if 'settings' in config:
+        if "settings" in config:
             plugins_dir = _get_plugins_dir()
             plugin_config_path = plugins_dir / plugin_id / "config.yml"
 
             # Load current config.yml
             if plugin_config_path.exists():
-                with open(plugin_config_path, 'r') as f:
+                with open(plugin_config_path, "r") as f:
                     plugin_config_data = _yaml.load(f) or {}
             else:
                 plugin_config_data = {}
 
             # Update settings (preserve ${ENV_VAR} references)
-            settings = config['settings']
+            settings = config["settings"]
             plugin_config_data.update(settings)
 
             # Create backup
             if plugin_config_path.exists():
-                backup_path = str(plugin_config_path) + '.backup'
+                backup_path = str(plugin_config_path) + ".backup"
                 shutil.copy2(plugin_config_path, backup_path)
 
             # Write updated config.yml
-            with open(plugin_config_path, 'w') as f:
+            with open(plugin_config_path, "w") as f:
                 _yaml.dump(plugin_config_data, f)
 
             updated_files.append(str(plugin_config_path))
             logger.info(f"Updated settings for '{plugin_id}' in {plugin_config_path}")
 
         # 3. Update per-plugin .env (only changed env vars)
-        if 'env_vars' in config and config['env_vars']:
+        if "env_vars" in config and config["env_vars"]:
             from advanced_omi_backend.services.plugin_service import save_plugin_env
 
             # Filter out masked values (unchanged secrets)
-            changed_vars = {
-                k: v for k, v in config['env_vars'].items()
-                if v != '••••••••••••'
-            }
+            changed_vars = {k: v for k, v in config["env_vars"].items() if v != "••••••••••••"}
 
             if changed_vars:
                 env_path = save_plugin_env(plugin_id, changed_vars)
                 updated_files.append(str(env_path))
-                logger.info(f"Saved {len(changed_vars)} env var(s) to per-plugin .env for '{plugin_id}'")
+                logger.info(
+                    f"Saved {len(changed_vars)} env var(s) to per-plugin .env for '{plugin_id}'"
+                )
 
                 # Update os.environ so hot-reload picks up changes immediately
                 for k, v in changed_vars.items():
@@ -1505,7 +1591,7 @@ async def update_plugin_config_structured(plugin_id: str, config: dict) -> dict:
             "message": message,
             "updated_files": updated_files,
             "reload": reload_result,
-            "status": "success"
+            "status": "success",
         }
 
     except Exception as e:
@@ -1541,29 +1627,29 @@ async def test_plugin_connection(plugin_id: str, config: dict) -> dict:
         plugin_class = discovered_plugins[plugin_id]
 
         # Check if plugin supports testing
-        if not hasattr(plugin_class, 'test_connection'):
+        if not hasattr(plugin_class, "test_connection"):
             return {
                 "success": False,
                 "message": f"Plugin '{plugin_id}' does not support connection testing",
-                "status": "unsupported"
+                "status": "unsupported",
             }
 
         # Build complete config from provided data
         test_config = {}
 
         # Merge settings
-        if 'settings' in config:
-            test_config.update(config['settings'])
+        if "settings" in config:
+            test_config.update(config["settings"])
 
         # Load per-plugin env for resolving masked values
         plugin_env = load_plugin_env(plugin_id)
 
         # Add env vars (expand any ${ENV_VAR} references with test values)
-        if 'env_vars' in config:
-            for key, value in config['env_vars'].items():
+        if "env_vars" in config:
+            for key, value in config["env_vars"].items():
                 # For masked values, resolve from per-plugin .env then os.environ
-                if value == '••••••••••••':
-                    value = plugin_env.get(key) or os.getenv(key, '')
+                if value == "••••••••••••":
+                    value = plugin_env.get(key) or os.getenv(key, "")
                 test_config[key.lower()] = value
 
         # Expand any remaining env var references
@@ -1578,15 +1664,12 @@ async def test_plugin_connection(plugin_id: str, config: dict) -> dict:
 
     except Exception as e:
         logger.exception(f"Error testing connection for plugin '{plugin_id}'")
-        return {
-            "success": False,
-            "message": f"Connection test failed: {str(e)}",
-            "status": "error"
-        }
+        return {"success": False, "message": f"Connection test failed: {str(e)}", "status": "error"}
 
 
 # Plugin Lifecycle Management Functions (create / write-code / delete)
 
+
 def _snake_to_pascal(snake_str: str) -> str:
     """Convert snake_case to PascalCase."""
     return "".join(word.capitalize() for word in snake_str.split("_"))
@@ -1615,14 +1698,20 @@ async def create_plugin(
     Returns:
         Success dict with plugin_id and created_files list
     """
-    from advanced_omi_backend.services.plugin_service import _get_plugins_dir, discover_plugins
+    from advanced_omi_backend.services.plugin_service import (
+        _get_plugins_dir,
+        discover_plugins,
+    )
 
     # Validate name
     if not plugin_name.replace("_", "").isalnum():
         return {"success": False, "error": "Plugin name must be alphanumeric with underscores only"}
 
     if not re.match(r"^[a-z][a-z0-9_]*$", plugin_name):
-        return {"success": False, "error": "Plugin name must be lowercase snake_case starting with a letter"}
+        return {
+            "success": False,
+            "error": "Plugin name must be lowercase snake_case starting with a letter",
+        }
 
     plugins_dir = _get_plugins_dir()
     plugin_dir = plugins_dir / plugin_name
@@ -1650,8 +1739,12 @@ async def create_plugin(
             (plugin_dir / "plugin.py").write_text(plugin_code, encoding="utf-8")
         else:
             # Write standard boilerplate
-            events_str = ", ".join(f'"{e}"' for e in events) if events else '"conversation.complete"'
-            boilerplate = inspect.cleandoc(f'''
+            events_str = (
+                ", ".join(f'"{e}"' for e in events) if events else '"conversation.complete"'
+            )
+            boilerplate = (
+                inspect.cleandoc(
+                    f'''
                 """
                 {class_name} implementation.
 
@@ -1688,7 +1781,10 @@ async def cleanup(self):
                     async def on_conversation_complete(self, context: PluginContext) -> Optional[PluginResult]:
                         logger.info(f"Processing conversation for user: {{context.user_id}}")
                         return PluginResult(success=True, message="OK")
-            ''') + "\n"
+            '''
+                )
+                + "\n"
+            )
             (plugin_dir / "plugin.py").write_text(boilerplate, encoding="utf-8")
         created_files.append("plugin.py")
 
@@ -1699,7 +1795,7 @@ async def on_conversation_complete(self, context: PluginContext) -> Optional[Plu
 
         # config.yml
         config_yml = {"description": description}
-        with open(plugin_dir / "config.yml", 'w', encoding="utf-8") as f:
+        with open(plugin_dir / "config.yml", "w", encoding="utf-8") as f:
             _yaml.dump(config_yml, f)
         created_files.append("config.yml")
 
@@ -1848,7 +1944,10 @@ async def delete_plugin(plugin_id: str, remove_files: bool = False) -> dict:
         logger.info(f"Removed plugin directory: {plugin_dir}")
 
     if not removed_from_yml and not files_removed:
-        return {"success": False, "error": f"Plugin '{plugin_id}' not found in plugins.yml or on disk"}
+        return {
+            "success": False,
+            "error": f"Plugin '{plugin_id}' not found in plugins.yml or on disk",
+        }
 
     logger.info(f"Deleted plugin '{plugin_id}' (yml={removed_from_yml}, files={files_removed})")
     return {
diff --git a/backends/advanced/src/advanced_omi_backend/llm_client.py b/backends/advanced/src/advanced_omi_backend/llm_client.py
index 96ccc77b..8b5f2d43 100644
--- a/backends/advanced/src/advanced_omi_backend/llm_client.py
+++ b/backends/advanced/src/advanced_omi_backend/llm_client.py
@@ -11,7 +11,7 @@
 from typing import Any, Dict, Optional
 
 from advanced_omi_backend.model_registry import get_models_registry
-from advanced_omi_backend.openai_factory import create_openai_client, is_langfuse_enabled
+from advanced_omi_backend.openai_factory import create_openai_client
 from advanced_omi_backend.services.memory.config import (
     load_config_yml as _load_root_config,
 )
@@ -62,7 +62,9 @@ def __init__(
         self.base_url = base_url
         self.model = model
         if not self.api_key or not self.base_url or not self.model:
-            raise ValueError(f"LLM configuration incomplete: api_key={'set' if self.api_key else 'MISSING'}, base_url={'set' if self.base_url else 'MISSING'}, model={'set' if self.model else 'MISSING'}")
+            raise ValueError(
+                f"LLM configuration incomplete: api_key={'set' if self.api_key else 'MISSING'}, base_url={'set' if self.base_url else 'MISSING'}, model={'set' if self.model else 'MISSING'}"
+            )
 
         # Initialize OpenAI client with optional Langfuse tracing
         try:
@@ -78,31 +80,32 @@ def __init__(
             raise
 
     def generate(
-        self, prompt: str, model: str | None = None, temperature: float | None = None,
-        **langfuse_kwargs,
+        self,
+        prompt: str,
+        model: str | None = None,
+        temperature: float | None = None,
     ) -> str:
         """Generate text completion using OpenAI-compatible API."""
         try:
             model_name = model or self.model
             temp = temperature if temperature is not None else self.temperature
 
-            params = {
-                "model": model_name,
-                "messages": [{"role": "user", "content": prompt}],
-                "temperature": temp,
-            }
-            if is_langfuse_enabled():
-                params.update(langfuse_kwargs)
-
-            response = self.client.chat.completions.create(**params)
+            response = self.client.chat.completions.create(
+                model=model_name,
+                messages=[{"role": "user", "content": prompt}],
+                temperature=temp,
+            )
             return response.choices[0].message.content.strip()
         except Exception as e:
             self.logger.error(f"Error generating completion: {e}")
             raise
 
     def chat_with_tools(
-        self, messages: list, tools: list | None = None, model: str | None = None,
-        temperature: float | None = None, **langfuse_kwargs,
+        self,
+        messages: list,
+        tools: list | None = None,
+        model: str | None = None,
+        temperature: float | None = None,
     ):
         """Chat completion with tool/function calling support. Returns raw response object."""
         model_name = model or self.model
@@ -113,8 +116,6 @@ def chat_with_tools(
         }
         if tools:
             params["tools"] = tools
-        if is_langfuse_enabled():
-            params.update(langfuse_kwargs)
         return self.client.chat.completions.create(**params)
 
     def health_check(self) -> Dict:
@@ -157,11 +158,13 @@ class LLMClientFactory:
     def create_client() -> LLMClient:
         """Create an LLM client based on model registry configuration (config.yml)."""
         registry = get_models_registry()
-        
+
         if registry:
             llm_def = registry.get_default("llm")
             if llm_def:
-                logger.info(f"Creating LLM client from registry: {llm_def.name} ({llm_def.model_provider})")
+                logger.info(
+                    f"Creating LLM client from registry: {llm_def.name} ({llm_def.model_provider})"
+                )
                 params = llm_def.model_params or {}
                 return OpenAILLMClient(
                     api_key=llm_def.api_key,
@@ -169,7 +172,7 @@ def create_client() -> LLMClient:
                     model=llm_def.model_name,
                     temperature=params.get("temperature", 0.1),
                 )
-        
+
         raise ValueError("No default LLM defined in config.yml")
 
     @staticmethod
@@ -196,20 +199,12 @@ def reset_llm_client():
     _llm_client = None
 
 
-def _langfuse_metadata(session_id: str | None) -> dict:
-    """Return metadata dict with langfuse_session_id if Langfuse is enabled."""
-    if session_id and is_langfuse_enabled():
-        return {"langfuse_session_id": session_id}
-    return {}
-
-
 # Async wrapper for blocking LLM operations
 async def async_generate(
     prompt: str,
     model: str | None = None,
     temperature: float | None = None,
     operation: str | None = None,
-    langfuse_session_id: str | None = None,
 ) -> str:
     """Async wrapper for LLM text generation.
 
@@ -218,9 +213,8 @@ async def async_generate(
     The resolved config determines model, temperature, max_tokens, etc.
     Explicit ``model``/``temperature`` kwargs still override the resolved values.
 
-    When ``langfuse_session_id`` is provided and Langfuse is enabled,
-    the session ID is set on the current Langfuse trace to group all
-    LLM calls for a conversation.
+    Tracing is handled automatically by the OTEL instrumentor; use
+    ``set_otel_session()`` at job boundaries to group calls by session.
     """
     if operation:
         registry = get_models_registry()
@@ -233,16 +227,13 @@ async def async_generate(
             if model is not None:
                 api_params["model"] = model
             api_params["messages"] = [{"role": "user", "content": prompt}]
-            api_params["metadata"] = _langfuse_metadata(langfuse_session_id)
             response = await client.chat.completions.create(**api_params)
             return response.choices[0].message.content.strip()
 
     # Fallback: use singleton client
     client = get_llm_client()
     loop = asyncio.get_running_loop()
-    return await loop.run_in_executor(
-        None, lambda: client.generate(prompt, model, temperature)
-    )
+    return await loop.run_in_executor(None, lambda: client.generate(prompt, model, temperature))
 
 
 async def async_chat_with_tools(
@@ -251,11 +242,11 @@ async def async_chat_with_tools(
     model: str | None = None,
     temperature: float | None = None,
     operation: str | None = None,
-    langfuse_session_id: str | None = None,
 ):
     """Async wrapper for chat completion with tool calling.
 
     When ``operation`` is provided, parameters are resolved from config.
+    Tracing is handled automatically by the OTEL instrumentor.
     """
     if operation:
         registry = get_models_registry()
@@ -270,7 +261,6 @@ async def async_chat_with_tools(
             api_params["messages"] = messages
             if tools:
                 api_params["tools"] = tools
-            api_params["metadata"] = _langfuse_metadata(langfuse_session_id)
             return await client.chat.completions.create(**api_params)
 
     # Fallback: use singleton client
diff --git a/backends/advanced/src/advanced_omi_backend/observability/otel_setup.py b/backends/advanced/src/advanced_omi_backend/observability/otel_setup.py
index 488dcb0d..dffa4f1e 100644
--- a/backends/advanced/src/advanced_omi_backend/observability/otel_setup.py
+++ b/backends/advanced/src/advanced_omi_backend/observability/otel_setup.py
@@ -26,6 +26,16 @@ def is_galileo_enabled() -> bool:
     return bool(os.getenv("GALILEO_API_KEY"))
 
 
+@lru_cache(maxsize=1)
+def is_langfuse_enabled() -> bool:
+    """Check if Langfuse OTEL is configured."""
+    return bool(
+        os.getenv("LANGFUSE_PUBLIC_KEY")
+        and os.getenv("LANGFUSE_SECRET_KEY")
+        and os.getenv("LANGFUSE_HOST")
+    )
+
+
 def is_otel_enabled() -> bool:
     """Check if any OTel exporter has been initialised."""
     return _otel_initialised
@@ -66,38 +76,83 @@ def clear_otel_session() -> None:
 
 
 def init_otel() -> None:
-    """Initialize OTEL with Galileo exporter and OpenAI instrumentor.
+    """Initialize OTEL with configured exporters and OpenAI instrumentor.
 
-    Call once at app startup. Safe to call if Galileo is not configured (no-op).
+    Supports multiple backends simultaneously:
+    - Galileo: if GALILEO_API_KEY is set
+    - Langfuse: if LANGFUSE_PUBLIC_KEY, LANGFUSE_SECRET_KEY, LANGFUSE_HOST are set
+
+    Call once at app startup. No-op if no backends are configured.
     """
-    if not is_galileo_enabled():
-        logger.info("Galileo not configured, skipping OTEL initialization")
+    galileo = is_galileo_enabled()
+    langfuse = is_langfuse_enabled()
+
+    if not galileo and not langfuse:
+        logger.info("No OTEL backends configured (Galileo/Langfuse), skipping initialization")
         return
 
     try:
-        from galileo import otel
-        from openinference.instrumentation.openai import OpenAIInstrumentor
         from opentelemetry.sdk import trace as trace_sdk
 
-        project = os.getenv("GALILEO_PROJECT", "chronicle")
-        logstream = os.getenv("GALILEO_LOG_STREAM", "default")
-
         tracer_provider = trace_sdk.TracerProvider()
-        galileo_processor = otel.GalileoSpanProcessor(
-            project=project, logstream=logstream
-        )
-        tracer_provider.add_span_processor(galileo_processor)
-
-        # Auto-instrument all OpenAI SDK calls
-        OpenAIInstrumentor().instrument(tracer_provider=tracer_provider)
+        backends = []
+
+        # --- Galileo backend ---
+        if galileo:
+            try:
+                from galileo import otel
+
+                project = os.getenv("GALILEO_PROJECT", "chronicle")
+                logstream = os.getenv("GALILEO_LOG_STREAM", "default")
+                galileo_processor = otel.GalileoSpanProcessor(project=project, logstream=logstream)
+                tracer_provider.add_span_processor(galileo_processor)
+                backends.append("Galileo")
+            except ImportError:
+                logger.warning(
+                    "Galileo packages not installed. " "Install with: uv pip install '.[galileo]'"
+                )
+            except Exception as e:
+                logger.error(f"Failed to add Galileo span processor: {e}")
+
+        # --- Langfuse backend ---
+        if langfuse:
+            try:
+                from langfuse.opentelemetry import LangfuseSpanProcessor
+
+                langfuse_processor = LangfuseSpanProcessor()
+                tracer_provider.add_span_processor(langfuse_processor)
+                backends.append("Langfuse")
+            except ImportError:
+                logger.warning(
+                    "Langfuse OTEL packages not installed. " "Ensure langfuse>=3.13.0 is installed."
+                )
+            except Exception as e:
+                logger.error(f"Failed to add Langfuse span processor: {e}")
+
+        if not backends:
+            logger.warning("No OTEL span processors were successfully added")
+            return
+
+        # Auto-instrument all OpenAI SDK calls (backend-agnostic)
+        try:
+            from openinference.instrumentation.openai import OpenAIInstrumentor
+
+            OpenAIInstrumentor().instrument(tracer_provider=tracer_provider)
+        except ImportError:
+            logger.warning(
+                "OpenAI OTEL instrumentor not installed. "
+                "Install with: uv pip install '.[galileo]'"
+            )
+            return
 
         global _otel_initialised
         _otel_initialised = True
-        logger.info("OTEL initialized with Galileo exporter + OpenAI instrumentor")
+        logger.info(
+            f"OTEL initialized with {' + '.join(backends)} exporter(s) + OpenAI instrumentor"
+        )
     except ImportError:
         logger.warning(
-            "Galileo/OTEL packages not installed. "
-            "Install with: uv pip install '.[galileo]'"
+            "OTEL SDK packages not installed. " "Install opentelemetry-api and opentelemetry-sdk."
         )
     except Exception as e:
         logger.error(f"Failed to initialize OTEL: {e}")
diff --git a/backends/advanced/src/advanced_omi_backend/openai_factory.py b/backends/advanced/src/advanced_omi_backend/openai_factory.py
index 17f6eba1..b10f72f0 100644
--- a/backends/advanced/src/advanced_omi_backend/openai_factory.py
+++ b/backends/advanced/src/advanced_omi_backend/openai_factory.py
@@ -1,29 +1,22 @@
-"""Centralized OpenAI client factory with optional LangFuse tracing.
+"""Centralized OpenAI client factory.
 
 Single source of truth for creating OpenAI/AsyncOpenAI clients. All other
 modules that need an OpenAI client should use this factory instead of
-duplicating LangFuse detection logic.
+creating clients directly.
+
+Tracing is handled by the OTEL instrumentor (see observability/otel_setup.py),
+which auto-instruments all OpenAI calls at startup. No per-client wrapping needed.
 """
 
 import logging
-import os
-from functools import lru_cache
-
-logger = logging.getLogger(__name__)
 
+import openai
 
-@lru_cache(maxsize=1)
-def is_langfuse_enabled() -> bool:
-    """Check if LangFuse is properly configured (cached)."""
-    return bool(
-        os.getenv("LANGFUSE_PUBLIC_KEY")
-        and os.getenv("LANGFUSE_SECRET_KEY")
-        and os.getenv("LANGFUSE_HOST")
-    )
+logger = logging.getLogger(__name__)
 
 
 def create_openai_client(api_key: str, base_url: str, is_async: bool = False):
-    """Create an OpenAI client with optional LangFuse tracing.
+    """Create an OpenAI client.
 
     Args:
         api_key: OpenAI API key
@@ -31,18 +24,9 @@ def create_openai_client(api_key: str, base_url: str, is_async: bool = False):
         is_async: Whether to return AsyncOpenAI or sync OpenAI client
 
     Returns:
-        OpenAI or AsyncOpenAI client instance (with or without LangFuse wrapping)
+        OpenAI or AsyncOpenAI client instance
     """
-    if is_langfuse_enabled():
-        import langfuse.openai as openai_module
-
-        logger.debug("Creating OpenAI client with LangFuse tracing")
-    else:
-        import openai as openai_module
-
-        logger.debug("Creating OpenAI client without tracing")
-
     if is_async:
-        return openai_module.AsyncOpenAI(api_key=api_key, base_url=base_url)
+        return openai.AsyncOpenAI(api_key=api_key, base_url=base_url)
     else:
-        return openai_module.OpenAI(api_key=api_key, base_url=base_url)
+        return openai.OpenAI(api_key=api_key, base_url=base_url)
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py
index 29719566..cb0f7137 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py
@@ -382,6 +382,14 @@ async def clear_jobs(
                 for job_id in job_ids:
                     try:
                         job = Job.fetch(job_id, connection=redis_conn)
+                        # Skip jobs that are currently running (their ID may have been
+                        # reused by a new session's job with the same ID)
+                        if job.get_status() in ("started", "queued", "deferred"):
+                            logger.debug(
+                                f"Skipping {registry_name} job {job_id}: currently {job.get_status()}"
+                            )
+                            registry.remove(job_id)  # Remove stale registry entry only
+                            continue
                         job.delete()
                         total_removed += 1
                     except Exception:
diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/base.py b/backends/advanced/src/advanced_omi_backend/services/memory/base.py
index bae18e56..9eddddbc 100644
--- a/backends/advanced/src/advanced_omi_backend/services/memory/base.py
+++ b/backends/advanced/src/advanced_omi_backend/services/memory/base.py
@@ -341,8 +341,10 @@ class LLMProviderBase(ABC):
 
     @abstractmethod
     async def extract_memories(
-        self, text: str, prompt: str, user_id: Optional[str] = None,
-        langfuse_session_id: Optional[str] = None,
+        self,
+        text: str,
+        prompt: str,
+        user_id: Optional[str] = None,
     ) -> List[str]:
         """Extract meaningful fact memories from text using an LLM.
 
@@ -350,7 +352,6 @@ async def extract_memories(
             text: Input text to extract memories from
             prompt: System prompt to guide the extraction process
             user_id: Optional user ID for per-user prompt override resolution
-            langfuse_session_id: Optional session ID for Langfuse trace grouping
 
         Returns:
             List of extracted fact memory strings
@@ -358,7 +359,10 @@ async def extract_memories(
         pass
 
     @abstractmethod
-    async def generate_embeddings(self, texts: List[str]) -> List[List[float]]:
+    async def generate_embeddings(
+        self,
+        texts: List[str],
+    ) -> List[List[float]]:
         """Generate vector embeddings for the given texts.
 
         Args:
@@ -375,7 +379,6 @@ async def propose_memory_actions(
         retrieved_old_memory: List[Dict[str, str]],
         new_facts: List[str],
         custom_prompt: Optional[str] = None,
-        langfuse_session_id: Optional[str] = None,
     ) -> Dict[str, Any]:
         """Propose memory management actions based on existing and new information.
 
@@ -401,7 +404,6 @@ async def propose_reprocess_actions(
         diff_context: str,
         new_transcript: str,
         custom_prompt: Optional[str] = None,
-        langfuse_session_id: Optional[str] = None,
     ) -> Dict[str, Any]:
         """Propose memory updates after transcript reprocessing (e.g., speaker changes).
 
diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py
index d1f51775..2363e5a8 100644
--- a/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py
+++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py
@@ -155,8 +155,9 @@ async def add_memory(
             if self.config.extraction_enabled and self.config.extraction_prompt:
                 fact_memories_text = await asyncio.wait_for(
                     self.llm_provider.extract_memories(
-                        transcript, self.config.extraction_prompt, user_id=user_id,
-                        langfuse_session_id=source_id,
+                        transcript,
+                        self.config.extraction_prompt,
+                        user_id=user_id,
                     ),
                     timeout=self.config.timeout_seconds,
                 )
@@ -194,8 +195,12 @@ async def add_memory(
             if allow_update and fact_memories_text:
                 memory_logger.info(f"🔍 Allowing update for {source_id}")
                 created_ids = await self._process_memory_updates(
-                    fact_memories_text, embeddings, user_id, client_id, source_id, user_email,
-                    langfuse_session_id=source_id,
+                    fact_memories_text,
+                    embeddings,
+                    user_id,
+                    client_id,
+                    source_id,
+                    user_email,
                 )
             else:
                 memory_logger.info(f"🔍 Not allowing update for {source_id}")
@@ -533,9 +538,7 @@ async def reprocess_memory(
 
         try:
             # 1. Get existing memories for this conversation
-            existing_memories = await self.vector_store.get_memories_by_source(
-                user_id, source_id
-            )
+            existing_memories = await self.vector_store.get_memories_by_source(user_id, source_id)
 
             # 2. If no existing memories, fall back to normal extraction
             if not existing_memories:
@@ -544,7 +547,11 @@ async def reprocess_memory(
                     f"falling back to normal extraction"
                 )
                 return await self.add_memory(
-                    transcript, client_id, source_id, user_id, user_email,
+                    transcript,
+                    client_id,
+                    source_id,
+                    user_id,
+                    user_email,
                     allow_update=True,
                 )
 
@@ -555,7 +562,11 @@ async def reprocess_memory(
                     f"falling back to normal extraction"
                 )
                 return await self.add_memory(
-                    transcript, client_id, source_id, user_id, user_email,
+                    transcript,
+                    client_id,
+                    source_id,
+                    user_id,
+                    user_email,
                     allow_update=True,
                 )
 
@@ -580,24 +591,29 @@ async def reprocess_memory(
                     existing_memories=existing_memory_dicts,
                     diff_context=diff_text,
                     new_transcript=transcript,
-                    langfuse_session_id=source_id,
-                )
-                memory_logger.info(
-                    f"🔄 Reprocess LLM returned actions: {actions_obj}"
                 )
+                memory_logger.info(f"🔄 Reprocess LLM returned actions: {actions_obj}")
             except NotImplementedError:
                 memory_logger.warning(
                     "LLM provider does not support propose_reprocess_actions, "
                     "falling back to normal extraction"
                 )
                 return await self.add_memory(
-                    transcript, client_id, source_id, user_id, user_email,
+                    transcript,
+                    client_id,
+                    source_id,
+                    user_id,
+                    user_email,
                     allow_update=True,
                 )
             except Exception as e:
                 memory_logger.error(f"Reprocess LLM call failed: {e}")
                 return await self.add_memory(
-                    transcript, client_id, source_id, user_id, user_email,
+                    transcript,
+                    client_id,
+                    source_id,
+                    user_id,
+                    user_email,
                     allow_update=True,
                 )
 
@@ -619,13 +635,9 @@ async def reprocess_memory(
                         self.llm_provider.generate_embeddings(texts_needing_embeddings),
                         timeout=self.config.timeout_seconds,
                     )
-                    text_to_embedding = dict(
-                        zip(texts_needing_embeddings, embeddings, strict=True)
-                    )
+                    text_to_embedding = dict(zip(texts_needing_embeddings, embeddings, strict=True))
                 except Exception as e:
-                    memory_logger.warning(
-                        f"Batch embedding generation failed for reprocess: {e}"
-                    )
+                    memory_logger.warning(f"Batch embedding generation failed for reprocess: {e}")
 
             # 8. Apply the actions (reuses existing infrastructure)
             created_ids = await self._apply_memory_actions(
@@ -639,21 +651,20 @@ async def reprocess_memory(
             )
 
             memory_logger.info(
-                f"✅ Reprocess complete for {source_id}: "
-                f"{len(created_ids)} memories affected"
+                f"✅ Reprocess complete for {source_id}: " f"{len(created_ids)} memories affected"
             )
             return True, created_ids
 
         except Exception as e:
-            memory_logger.error(
-                f"❌ Reprocess memory failed for {source_id}: {e}"
-            )
+            memory_logger.error(f"❌ Reprocess memory failed for {source_id}: {e}")
             # Fall back to normal extraction on any unexpected error
-            memory_logger.info(
-                f"🔄 Falling back to normal extraction after reprocess error"
-            )
+            memory_logger.info(f"🔄 Falling back to normal extraction after reprocess error")
             return await self.add_memory(
-                transcript, client_id, source_id, user_id, user_email,
+                transcript,
+                client_id,
+                source_id,
+                user_id,
+                user_email,
                 allow_update=True,
             )
 
@@ -688,8 +699,7 @@ def _format_speaker_diff(transcript_diff: list) -> str:
                 )
             elif change_type == "new_segment":
                 lines.append(
-                    f"- New segment: {change.get('speaker', '?')}: "
-                    f"\"{change.get('text', '')}\""
+                    f"- New segment: {change.get('speaker', '?')}: " f"\"{change.get('text', '')}\""
                 )
 
         return "\n".join(lines)
@@ -789,7 +799,6 @@ async def _process_memory_updates(
         client_id: str,
         source_id: str,
         user_email: str,
-        langfuse_session_id: Optional[str] = None,
     ) -> List[str]:
         """Process memory updates using LLM-driven action proposals.
 
@@ -852,7 +861,6 @@ async def _process_memory_updates(
                 retrieved_old_memory=retrieved_old_memory,
                 new_facts=memories_text,
                 custom_prompt=None,
-                langfuse_session_id=langfuse_session_id,
             )
             memory_logger.info(f"📝 UpdateMemory LLM returned: {type(actions_obj)} - {actions_obj}")
         except Exception as e_actions:
diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py
index 3a81b53e..4d440fba 100644
--- a/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py
+++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py
@@ -15,10 +15,7 @@
 from typing import Any, Dict, List, Optional
 
 from advanced_omi_backend.model_registry import ModelDef, get_models_registry
-from advanced_omi_backend.openai_factory import (
-    create_openai_client,
-    is_langfuse_enabled,
-)
+from advanced_omi_backend.openai_factory import create_openai_client
 from advanced_omi_backend.prompt_registry import get_prompt_registry
 from advanced_omi_backend.utils.text_chunking import semantic_chunk_text
 
@@ -42,13 +39,6 @@
 memory_logger = logging.getLogger("memory_service")
 
 
-def _langfuse_metadata(session_id: str | None) -> dict:
-    """Return metadata dict with langfuse_session_id if Langfuse is enabled."""
-    if session_id and is_langfuse_enabled():
-        return {"langfuse_session_id": session_id}
-    return {}
-
-
 def _get_openai_client(api_key: str, base_url: str, is_async: bool = False):
     """Get OpenAI client with optional Langfuse tracing.
 
@@ -75,10 +65,7 @@ async def generate_openai_embeddings(
         base_url=base_url,
         is_async=True,
     )
-    response = await client.embeddings.create(
-        model=model,
-        input=texts,
-    )
+    response = await client.embeddings.create(model=model, input=texts)
     return [data.embedding for data in response.data]
 
 
@@ -160,9 +147,7 @@ def __init__(self, config: Dict[str, Any]):
         # Ignore provider-specific envs; use registry as single source of truth
         registry = get_models_registry()
         if not registry:
-            raise RuntimeError(
-                "config.yml not found or invalid; cannot initialize model registry"
-            )
+            raise RuntimeError("config.yml not found or invalid; cannot initialize model registry")
 
         self._registry = registry
 
@@ -182,12 +167,8 @@ def __init__(self, config: Dict[str, Any]):
         self.embedding_model = (
             self.embed_def.model_name if self.embed_def else self.llm_def.model_name
         )
-        self.embedding_api_key = (
-            self.embed_def.api_key if self.embed_def else self.api_key
-        )
-        self.embedding_base_url = (
-            self.embed_def.model_url if self.embed_def else self.base_url
-        )
+        self.embedding_api_key = self.embed_def.api_key if self.embed_def else self.api_key
+        self.embedding_base_url = self.embed_def.model_url if self.embed_def else self.base_url
 
         # CRITICAL: Validate API keys are present - fail fast instead of hanging
         if not self.api_key or self.api_key.strip() == "":
@@ -197,9 +178,7 @@ def __init__(self, config: Dict[str, Any]):
                 f"Cannot proceed without valid API credentials."
             )
 
-        if self.embed_def and (
-            not self.embedding_api_key or self.embedding_api_key.strip() == ""
-        ):
+        if self.embed_def and (not self.embedding_api_key or self.embedding_api_key.strip() == ""):
             raise RuntimeError(
                 f"API key is missing or empty for embedding provider '{self.embed_def.model_provider}' (model: {self.embedding_model}). "
                 f"Please set the API key in config.yml or environment variables."
@@ -213,7 +192,6 @@ async def extract_memories(
         text: str,
         prompt: str,
         user_id: Optional[str] = None,
-        langfuse_session_id: Optional[str] = None,
     ) -> List[str]:
         """Extract memories using OpenAI API with the enhanced fact retrieval prompt.
 
@@ -221,7 +199,6 @@ async def extract_memories(
             text: Input text to extract memories from
             prompt: System prompt to guide extraction (uses default if empty)
             user_id: Optional user ID for per-user prompt override resolution
-            langfuse_session_id: Optional session ID for Langfuse trace grouping
 
         Returns:
             List of extracted memory strings
@@ -248,9 +225,7 @@ async def _embed_for_chunking(texts: List[str]) -> List[List[float]]:
                     model=self.embedding_model,
                 )
 
-            chunking_config = self._registry.memory.get("extraction", {}).get(
-                "chunking", {}
-            )
+            chunking_config = self._registry.memory.get("extraction", {}).get("chunking", {})
             dialogue_turns = [line for line in text.split("\n") if line.strip()]
             text_chunks = await semantic_chunk_text(
                 text,
@@ -266,9 +241,7 @@ async def _embed_for_chunking(texts: List[str]) -> List[List[float]]:
 
             # Process all chunks in sequence, not concurrently
             results = [
-                await self._process_chunk(
-                    system_prompt, chunk, i, langfuse_session_id=langfuse_session_id
-                )
+                await self._process_chunk(system_prompt, chunk, i)
                 for i, chunk in enumerate(text_chunks)
             ]
 
@@ -289,7 +262,6 @@ async def _process_chunk(
         system_prompt: str,
         chunk: str,
         index: int,
-        langfuse_session_id: Optional[str] = None,
     ) -> List[str]:
         """Process a single text chunk to extract memories using OpenAI API.
 
@@ -301,7 +273,6 @@ async def _process_chunk(
             system_prompt: System prompt that guides the memory extraction behavior
             chunk: Individual text chunk to process for memory extraction
             index: Index of the chunk for logging and error tracking purposes
-            langfuse_session_id: Optional session ID for Langfuse trace grouping
 
         Returns:
             List of extracted memory fact strings from the chunk. Returns empty list
@@ -320,7 +291,6 @@ async def _process_chunk(
                     {"role": "system", "content": system_prompt},
                     {"role": "user", "content": chunk},
                 ],
-                metadata=_langfuse_metadata(langfuse_session_id),
             )
             facts = (response.choices[0].message.content or "").strip()
             if not facts:
@@ -332,7 +302,10 @@ async def _process_chunk(
             memory_logger.error(f"Error processing chunk {index}: {e}")
             return []
 
-    async def generate_embeddings(self, texts: List[str]) -> List[List[float]]:
+    async def generate_embeddings(
+        self,
+        texts: List[str],
+    ) -> List[List[float]]:
         """Generate embeddings using OpenAI API.
 
         Args:
@@ -381,7 +354,6 @@ async def propose_memory_actions(
         retrieved_old_memory: List[Dict[str, str]] | List[str],
         new_facts: List[str],
         custom_prompt: Optional[str] = None,
-        langfuse_session_id: Optional[str] = None,
     ) -> Dict[str, Any]:
         """Use OpenAI chat completion with enhanced prompt to propose memory actions.
 
@@ -389,7 +361,6 @@ async def propose_memory_actions(
             retrieved_old_memory: List of existing memories for context
             new_facts: List of new facts to process
             custom_prompt: Optional custom prompt to override default
-            langfuse_session_id: Optional session ID for Langfuse trace grouping
 
         Returns:
             Dictionary containing proposed memory actions
@@ -409,7 +380,6 @@ async def propose_memory_actions(
             response = await client.chat.completions.create(
                 **op.to_api_params(),
                 messages=update_memory_messages,
-                metadata=_langfuse_metadata(langfuse_session_id),
             )
             content = (response.choices[0].message.content or "").strip()
             if not content:
@@ -434,7 +404,6 @@ async def propose_reprocess_actions(
         diff_context: str,
         new_transcript: str,
         custom_prompt: Optional[str] = None,
-        langfuse_session_id: Optional[str] = None,
     ) -> Dict[str, Any]:
         """Propose memory updates after speaker re-identification.
 
@@ -464,9 +433,7 @@ async def propose_reprocess_actions(
             else:
                 try:
                     registry = get_prompt_registry()
-                    system_prompt = await registry.get_prompt(
-                        "memory.reprocess_speaker_update"
-                    )
+                    system_prompt = await registry.get_prompt("memory.reprocess_speaker_update")
                 except Exception as e:
                     memory_logger.debug(
                         f"Registry prompt fetch failed for "
@@ -497,7 +464,6 @@ async def propose_reprocess_actions(
             response = await client.chat.completions.create(
                 **op.to_api_params(),
                 messages=messages,
-                metadata=_langfuse_metadata(langfuse_session_id),
             )
             content = (response.choices[0].message.content or "").strip()
 
@@ -553,16 +519,12 @@ def _parse_memories_content(content: str) -> List[str]:
             for key in ("facts", "preferences"):
                 value = parsed.get(key)
                 if isinstance(value, list):
-                    collected.extend(
-                        [str(item).strip() for item in value if str(item).strip()]
-                    )
+                    collected.extend([str(item).strip() for item in value if str(item).strip()])
             # If the dict didn't contain expected keys, try to flatten any list values
             if not collected:
                 for value in parsed.values():
                     if isinstance(value, list):
-                        collected.extend(
-                            [str(item).strip() for item in value if str(item).strip()]
-                        )
+                        collected.extend([str(item).strip() for item in value if str(item).strip()])
             if collected:
                 return collected
     except Exception:
@@ -597,17 +559,13 @@ def _try_parse_list_or_object(text: str) -> List[str] | None:
             for key in ("facts", "preferences"):
                 value = data.get(key)
                 if isinstance(value, list):
-                    collected.extend(
-                        [str(item).strip() for item in value if str(item).strip()]
-                    )
+                    collected.extend([str(item).strip() for item in value if str(item).strip()])
             if collected:
                 return collected
             # As a last attempt, flatten any list values
             for value in data.values():
                 if isinstance(value, list):
-                    collected.extend(
-                        [str(item).strip() for item in value if str(item).strip()]
-                    )
+                    collected.extend([str(item).strip() for item in value if str(item).strip()])
             return collected if collected else None
     except Exception:
         return None
diff --git a/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py b/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py
index 63036ce1..96c52f57 100644
--- a/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py
+++ b/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py
@@ -118,7 +118,9 @@ def analyze_speech(transcript_data: dict) -> dict:
                 else:
                     # Check minimum duration threshold when we have timing data
                     min_duration = settings.get("min_duration", 10.0)
-                    logger.info(f"📏 Comparing duration {speech_duration:.1f}s vs threshold {min_duration:.1f}s")
+                    logger.info(
+                        f"📏 Comparing duration {speech_duration:.1f}s vs threshold {min_duration:.1f}s"
+                    )
                     if speech_duration < min_duration:
                         return {
                             "has_speech": False,
@@ -164,7 +166,6 @@ async def generate_title_and_summary(
     text: str,
     segments: Optional[list] = None,
     user_id: Optional[str] = None,
-    langfuse_session_id: Optional[str] = None,
 ) -> tuple[str, str]:
     """
     Generate title and short summary in a single LLM call using full conversation context.
@@ -222,7 +223,7 @@ async def generate_title_and_summary(
 "{conversation_text}"
 """
 
-        response = await async_generate(prompt, operation="title_summary", langfuse_session_id=langfuse_session_id)
+        response = await async_generate(prompt, operation="title_summary")
 
         # Parse response for Title: and Summary: lines
         title = None
@@ -249,12 +250,10 @@ async def generate_title_and_summary(
         return fallback_title or "Conversation", fallback_summary or "No content"
 
 
-
 async def generate_detailed_summary(
     text: str,
     segments: Optional[list] = None,
     memory_context: Optional[str] = None,
-    langfuse_session_id: Optional[str] = None,
 ) -> str:
     """
     Generate a comprehensive, detailed summary of the conversation.
@@ -330,7 +329,7 @@ async def generate_detailed_summary(
 "{conversation_text}"
 """
 
-        summary = await async_generate(prompt, operation="detailed_summary", langfuse_session_id=langfuse_session_id)
+        summary = await async_generate(prompt, operation="detailed_summary")
         return summary.strip().strip('"').strip("'") or "No meaningful content to summarize"
 
     except Exception as e:
@@ -350,7 +349,6 @@ async def generate_detailed_summary(
 # ============================================================================
 
 
-
 def extract_speakers_from_segments(segments: list) -> List[str]:
     """
     Extract unique speaker names from segments.
@@ -364,7 +362,11 @@ def extract_speakers_from_segments(segments: list) -> List[str]:
     speakers = []
     if segments:
         for seg in segments:
-            speaker = seg.get("speaker", "Unknown") if isinstance(seg, dict) else (seg.speaker or "Unknown")
+            speaker = (
+                seg.get("speaker", "Unknown")
+                if isinstance(seg, dict)
+                else (seg.speaker or "Unknown")
+            )
             if speaker and speaker != "Unknown" and speaker not in speakers:
                 speakers.append(speaker)
     return speakers
diff --git a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
index 34285062..ba2a4ee0 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
@@ -109,9 +109,7 @@ async def handle_end_of_conversation(
 
     from advanced_omi_backend.models.conversation import Conversation
 
-    conversation = await Conversation.find_one(
-        Conversation.conversation_id == conversation_id
-    )
+    conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id)
     if conversation:
         # Convert string to enum
         try:
@@ -126,9 +124,7 @@ async def handle_end_of_conversation(
             f"💾 Saved conversation {conversation_id[:12]} end_reason: {conversation.end_reason}"
         )
     else:
-        logger.warning(
-            f"⚠️ Conversation {conversation_id} not found for end reason tracking"
-        )
+        logger.warning(f"⚠️ Conversation {conversation_id} not found for end reason tracking")
 
     # Increment conversation count for this session
     conversation_count_key = f"session:conversation_count:{session_id}"
@@ -138,15 +134,36 @@ async def handle_end_of_conversation(
 
     # Check if session is still active (user still recording) and restart listening jobs
     session_key = f"audio:session:{session_id}"
-    session_status = await redis_client.hget(session_key, "status")
-    if session_status:
-        status_str = (
-            session_status.decode()
-            if isinstance(session_status, bytes)
-            else session_status
-        )
+    # Fetch both status and websocket_connected in one Redis call
+    status_raw, ws_connected_raw = await redis_client.hmget(
+        session_key, "status", "websocket_connected"
+    )
 
+    if status_raw:
+        status_str = status_raw.decode() if isinstance(status_raw, bytes) else status_raw
+        ws_connected = (
+            ws_connected_raw.decode()
+            if isinstance(ws_connected_raw, bytes)
+            else (ws_connected_raw or "false")
+        ) == "true"
+
+        # Determine if we should restart speech detection
+        should_restart = False
         if status_str == "active":
+            should_restart = True
+        elif ws_connected:
+            # Race condition recovery: WebSocket is still connected but status got
+            # corrupted (e.g., status endpoint polling set "finished" during the
+            # inter-conversation gap). Reset status and restart anyway.
+            logger.warning(
+                f"⚠️ Race condition recovery for session {session_id[:12]}: "
+                f"status={status_str} but websocket_connected=true. "
+                f"Resetting status to 'active' and restarting speech detection."
+            )
+            await redis_client.hset(session_key, "status", "active")
+            should_restart = True
+
+        if should_restart:
             # Session still active - enqueue new speech detection for next conversation
             logger.info(
                 f"🔄 Enqueueing new speech detection (conversation #{conversation_count + 1})"
@@ -175,7 +192,7 @@ async def handle_end_of_conversation(
                 client_id,
                 job_timeout=86400,  # 24 hours to match max_runtime in stream_speech_detection_job
                 result_ttl=JOB_RESULT_TTL,
-                job_id=f"speech-detect_{session_id[:12]}_{conversation_count}",
+                job_id=f"speech-detect_{session_id}_{conversation_count}",
                 description=f"Listening for speech (conversation #{conversation_count + 1})",
                 meta={"client_id": client_id, "session_level": True},
             )
@@ -192,7 +209,8 @@ async def handle_end_of_conversation(
             logger.info(f"✅ Enqueued speech detection job {speech_job.id}")
         else:
             logger.info(
-                f"Session {session_id} status={status_str}, not restarting (user stopped recording)"
+                f"Session {session_id} status={status_str}, ws_connected={ws_connected}, "
+                f"not restarting (user stopped recording)"
             )
     else:
         logger.info(f"Session {session_id} not found, not restarting (session ended)")
@@ -246,9 +264,7 @@ def _validate_segments(segments: list) -> list:
         start = seg.get("start", 0.0)
         end = seg.get("end", 0.0)
         if end <= start:
-            logger.debug(
-                f"Segment {i} has invalid timing (start={start}, end={end}), correcting"
-            )
+            logger.debug(f"Segment {i} has invalid timing (start={start}, end={end}), correcting")
             estimated_duration = len(text.split()) * 0.5  # ~0.5 seconds per word
             seg["end"] = start + estimated_duration
 
@@ -297,9 +313,7 @@ async def _initialize_conversation(
     conversation = None
     if existing_conversation_id_bytes:
         existing_conversation_id = existing_conversation_id_bytes.decode()
-        logger.info(
-            f"🔍 Found Redis key with conversation_id={existing_conversation_id}"
-        )
+        logger.info(f"🔍 Found Redis key with conversation_id={existing_conversation_id}")
 
         # Try to fetch the existing conversation by conversation_id
         conversation = await Conversation.find_one(
@@ -314,16 +328,13 @@ async def _initialize_conversation(
                 f"processing_status={processing_status}"
             )
         else:
-            logger.warning(
-                f"⚠️ Conversation {existing_conversation_id} not found in database!"
-            )
+            logger.warning(f"⚠️ Conversation {existing_conversation_id} not found in database!")
 
         # Verify it's a placeholder conversation (always_persist=True, processing_status='pending_transcription')
         if (
             conversation
             and getattr(conversation, "always_persist", False)
-            and getattr(conversation, "processing_status", None)
-            == "pending_transcription"
+            and getattr(conversation, "processing_status", None) == "pending_transcription"
         ):
             logger.info(
                 f"🔄 Reusing placeholder conversation {conversation.conversation_id} for session {session_id}"
@@ -342,9 +353,7 @@ async def _initialize_conversation(
                 )
             conversation = None
     else:
-        logger.info(
-            f"🔍 No Redis key found for {conversation_key}, creating new conversation"
-        )
+        logger.info(f"🔍 No Redis key found for {conversation_key}, creating new conversation")
 
     # If no valid placeholder found, create new conversation
     if not conversation:
@@ -356,18 +365,14 @@ async def _initialize_conversation(
         )
         await conversation.insert()
         conversation_id = conversation.conversation_id
-        logger.info(
-            f"✅ Created streaming conversation {conversation_id} for session {session_id}"
-        )
+        logger.info(f"✅ Created streaming conversation {conversation_id} for session {session_id}")
 
     # Attach markers from Redis session (e.g., button events captured during streaming)
     session_key = f"audio:session:{session_id}"
     markers_json = await redis_client.hget(session_key, "markers")
     if markers_json:
         try:
-            markers_data = (
-                markers_json if isinstance(markers_json, str) else markers_json.decode()
-            )
+            markers_data = markers_json if isinstance(markers_json, str) else markers_json.decode()
             conversation.markers = json.loads(markers_data)
             await conversation.save()
             logger.info(
@@ -387,9 +392,7 @@ async def _initialize_conversation(
         speaker_check_job_id = speech_job.meta.get("speaker_check_job_id")
         if speaker_check_job_id:
             try:
-                speaker_check_job = Job.fetch(
-                    speaker_check_job_id, connection=redis_conn
-                )
+                speaker_check_job = Job.fetch(speaker_check_job_id, connection=redis_conn)
                 speaker_check_job.meta["conversation_id"] = conversation_id
                 speaker_check_job.save_meta()
             except Exception as e:
@@ -413,9 +416,7 @@ async def _initialize_conversation(
 
     # Signal audio persistence job to rotate to this conversation's file
     rotation_signal_key = f"conversation:current:{session_id}"
-    await redis_client.set(
-        rotation_signal_key, conversation_id, ex=86400
-    )  # 24 hour TTL
+    await redis_client.set(rotation_signal_key, conversation_id, ex=86400)  # 24 hour TTL
     logger.info(
         f"🔄 Signaled audio persistence to rotate file for conversation {conversation_id[:12]}"
     )
@@ -444,16 +445,12 @@ async def _monitor_conversation_loop(
     close_requested_reason, last_result_count, and last_word_count.
     """
     session_key = f"audio:session:{state.session_id}"
-    max_runtime = (
-        10740  # 3 hours - 60 seconds (single conversations shouldn't exceed 3 hours)
-    )
+    max_runtime = 10740  # 3 hours - 60 seconds (single conversations shouldn't exceed 3 hours)
 
     finalize_received = False
 
     # Inactivity timeout configuration
-    inactivity_timeout_seconds = float(
-        os.getenv("SPEECH_INACTIVITY_THRESHOLD_SECONDS", "60")
-    )
+    inactivity_timeout_seconds = float(os.getenv("SPEECH_INACTIVITY_THRESHOLD_SECONDS", "60"))
     inactivity_timeout_minutes = inactivity_timeout_seconds / 60
     last_inactivity_log_time = (
         time.time()
@@ -461,9 +458,7 @@ async def _monitor_conversation_loop(
 
     # Test mode: wait for audio queue to drain before timing out
     # In real usage, ambient noise keeps connection alive. In tests, chunks arrive in bursts.
-    wait_for_queue_drain = (
-        os.getenv("WAIT_FOR_AUDIO_QUEUE_DRAIN", "false").lower() == "true"
-    )
+    wait_for_queue_drain = os.getenv("WAIT_FOR_AUDIO_QUEUE_DRAIN", "false").lower() == "true"
 
     logger.info(
         f"📊 Conversation timeout configured: {inactivity_timeout_minutes} minutes ({inactivity_timeout_seconds}s)"
@@ -480,51 +475,59 @@ async def _monitor_conversation_loop(
 
         # Check if session is finalizing (set by producer when recording stops)
         if not finalize_received:
-            status = await redis_client.hget(session_key, "status")
-            status_str = status.decode() if status else None
+            # Fetch status, completion_reason, and websocket_connected in one call
+            status_raw, reason_raw, ws_raw = await redis_client.hmget(
+                session_key, "status", "completion_reason", "websocket_connected"
+            )
+            status_str = status_raw.decode() if status_raw else None
+            completion_reason_str = reason_raw.decode() if reason_raw else "unknown"
+            ws_connected = (ws_raw.decode() if ws_raw else "false") == "true"
 
             if status_str in ["finalizing", "finished"]:
-                finalize_received = True
-
-                # Get completion reason (guaranteed to exist with unified API)
-                completion_reason = await redis_client.hget(
-                    session_key, "completion_reason"
-                )
-                completion_reason_str = (
-                    completion_reason.decode() if completion_reason else "unknown"
-                )
-
-                if completion_reason_str == "websocket_disconnect":
+                # Check for spurious "finished" from status endpoint race condition:
+                # If status is "finished" but WebSocket is still connected and reason
+                # is "all_jobs_complete", this was set during the inter-conversation gap.
+                # Reset to "active" and continue monitoring.
+                if (
+                    status_str == "finished"
+                    and ws_connected
+                    and completion_reason_str == "all_jobs_complete"
+                ):
                     logger.warning(
-                        f"🔌 WebSocket disconnected for session {state.session_id[:12]} - "
-                        f"ending conversation early"
-                    )
-                    state.timeout_triggered = (
-                        False  # This is a disconnect, not a timeout
+                        f"⚠️ Ignoring spurious 'finished' for session {state.session_id[:12]}: "
+                        f"websocket_connected=true, reason=all_jobs_complete. "
+                        f"Resetting status to 'active' and continuing."
                     )
+                    await redis_client.hset(session_key, "status", "active")
+                    # Do NOT break - continue monitoring
                 else:
-                    logger.info(
-                        f"🛑 Session finalizing (reason: {completion_reason_str}), "
-                        f"waiting for audio persistence job to complete..."
-                    )
-                break  # Exit immediately when finalize signal received
+                    finalize_received = True
+
+                    if completion_reason_str == "websocket_disconnect":
+                        logger.warning(
+                            f"🔌 WebSocket disconnected for session {state.session_id[:12]} - "
+                            f"ending conversation early"
+                        )
+                        state.timeout_triggered = False  # This is a disconnect, not a timeout
+                    else:
+                        logger.info(
+                            f"🛑 Session finalizing (reason: {completion_reason_str}), "
+                            f"waiting for audio persistence job to complete..."
+                        )
+                    break  # Exit immediately when finalize signal received
 
         # Check for conversation close request (set by API, plugins, button press)
         if not finalize_received:
-            close_reason = await redis_client.hget(
-                session_key, "conversation_close_requested"
-            )
+            close_reason = await redis_client.hget(session_key, "conversation_close_requested")
             if close_reason:
                 await redis_client.hdel(session_key, "conversation_close_requested")
                 state.close_requested_reason = (
-                    close_reason.decode()
-                    if isinstance(close_reason, bytes)
-                    else close_reason
+                    close_reason.decode() if isinstance(close_reason, bytes) else close_reason
                 )
-                logger.info(
-                    f"🔒 Conversation close requested: {state.close_requested_reason}"
+                logger.info(f"🔒 Conversation close requested: {state.close_requested_reason}")
+                state.timeout_triggered = (
+                    True  # Session stays active (same restart behavior as inactivity timeout)
                 )
-                state.timeout_triggered = True  # Session stays active (same restart behavior as inactivity timeout)
                 finalize_received = True
                 break
 
@@ -583,9 +586,7 @@ async def _monitor_conversation_loop(
             # Can't reliably detect inactivity, so skip timeout check this iteration
             inactivity_duration = 0
             if speech_analysis.get("fallback", False):
-                logger.debug(
-                    "⚠️ Skipping inactivity check (no audio timestamps available)"
-                )
+                logger.debug("⚠️ Skipping inactivity check (no audio timestamps available)")
 
         current_time = time.time()
 
@@ -697,15 +698,11 @@ async def _save_streaming_transcript(
     """
     from advanced_omi_backend.models.conversation import Conversation
 
-    logger.info(
-        f"📝 Retrieving final streaming transcript for conversation {conversation_id[:12]}"
-    )
+    logger.info(f"📝 Retrieving final streaming transcript for conversation {conversation_id[:12]}")
     final_transcript = await aggregator.get_combined_results(session_id)
 
     # Fetch conversation from database to ensure we have latest state
-    conversation = await Conversation.find_one(
-        Conversation.conversation_id == conversation_id
-    )
+    conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id)
     if not conversation:
         logger.error(f"❌ Conversation {conversation_id} not found in database")
         raise ValueError(f"Conversation {conversation_id} not found")
@@ -979,9 +976,7 @@ async def open_conversation_job(
         logger.info(f"📊 Using completion_reason from session: {state.end_reason}")
     elif state.close_requested_reason:
         state.end_reason = "close_requested"
-        logger.info(
-            f"📊 Conversation closed by request: {state.close_requested_reason}"
-        )
+        logger.info(f"📊 Conversation closed by request: {state.close_requested_reason}")
     elif state.timeout_triggered:
         state.end_reason = "inactivity_timeout"
     elif time.time() - state.start_time > 10740:
@@ -989,9 +984,7 @@ async def open_conversation_job(
     else:
         state.end_reason = "user_stopped"
 
-    logger.info(
-        f"📊 Conversation {conversation_id[:12]} end_reason determined: {state.end_reason}"
-    )
+    logger.info(f"📊 Conversation {conversation_id[:12]} end_reason determined: {state.end_reason}")
 
     # Phase 4-7: Post-processing (wrapped in try/finally for guaranteed cleanup)
     end_of_conversation_handled = False
@@ -1061,9 +1054,7 @@ async def open_conversation_job(
                 end_reason=state.end_reason,
             )
 
-        logger.info(
-            f"📦 MongoDB audio chunks ready for conversation {conversation_id[:12]}"
-        )
+        logger.info(f"📦 MongoDB audio chunks ready for conversation {conversation_id[:12]}")
 
         # Phase 6: Save streaming transcript
         version_id = await _save_streaming_transcript(
@@ -1117,9 +1108,7 @@ async def open_conversation_job(
 
 
 @async_job(redis=True, beanie=True)
-async def generate_title_summary_job(
-    conversation_id: str, *, redis_client=None
-) -> Dict[str, Any]:
+async def generate_title_summary_job(conversation_id: str, *, redis_client=None) -> Dict[str, Any]:
     """
     Generate title, short summary, and detailed summary for a conversation using LLM.
 
@@ -1143,16 +1132,12 @@ async def generate_title_summary_job(
     )
 
     set_otel_session(conversation_id)
-    logger.info(
-        f"📝 Starting title/summary generation for conversation {conversation_id}"
-    )
+    logger.info(f"📝 Starting title/summary generation for conversation {conversation_id}")
 
     start_time = time.time()
 
     # Get the conversation
-    conversation = await Conversation.find_one(
-        Conversation.conversation_id == conversation_id
-    )
+    conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id)
     if not conversation:
         logger.error(f"Conversation {conversation_id} not found")
         return {"success": False, "error": "Conversation not found"}
@@ -1162,9 +1147,7 @@ async def generate_title_summary_job(
     segments = conversation.segments or []
 
     if not transcript_text and (not segments or len(segments) == 0):
-        logger.warning(
-            f"⚠️ No transcript or segments available for conversation {conversation_id}"
-        )
+        logger.warning(f"⚠️ No transcript or segments available for conversation {conversation_id}")
         return {
             "success": False,
             "error": "No transcript or segments available",
@@ -1196,9 +1179,7 @@ async def generate_title_summary_job(
             else:
                 logger.info(f"📚 No memories found for context enrichment")
         except Exception as mem_error:
-            logger.warning(
-                f"⚠️ Could not fetch memory context (continuing without): {mem_error}"
-            )
+            logger.warning(f"⚠️ Could not fetch memory context (continuing without): {mem_error}")
 
         # Generate title+summary (one call) and detailed summary in parallel
         import asyncio
@@ -1208,13 +1189,11 @@ async def generate_title_summary_job(
                 transcript_text,
                 segments=segments,
                 user_id=conversation.user_id,
-                langfuse_session_id=conversation_id,
             ),
             generate_detailed_summary(
                 transcript_text,
                 segments=segments,
                 memory_context=memory_context,
-                langfuse_session_id=conversation_id,
             ),
         )
 
@@ -1224,9 +1203,7 @@ async def generate_title_summary_job(
 
         logger.info(f"✅ Generated title: '{conversation.title}'")
         logger.info(f"✅ Generated summary: '{conversation.summary}'")
-        logger.info(
-            f"✅ Generated detailed summary: {len(conversation.detailed_summary)} chars"
-        )
+        logger.info(f"✅ Generated detailed summary: {len(conversation.detailed_summary)} chars")
 
         # Update processing status for placeholder/reprocessing conversations
         if getattr(conversation, "processing_status", None) in [
@@ -1323,16 +1300,12 @@ async def dispatch_conversation_complete_event_job(
     """
     from advanced_omi_backend.models.conversation import Conversation
 
-    logger.info(
-        f"📌 Dispatching conversation.complete event for conversation {conversation_id}"
-    )
+    logger.info(f"📌 Dispatching conversation.complete event for conversation {conversation_id}")
 
     start_time = time.time()
 
     # Get the conversation to include in event data
-    conversation = await Conversation.find_one(
-        Conversation.conversation_id == conversation_id
-    )
+    conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id)
     if not conversation:
         logger.error(f"Conversation {conversation_id} not found")
         return {"success": False, "error": "Conversation not found"}
diff --git a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
index a9e98c5f..6cbf5af3 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
@@ -79,9 +79,7 @@ async def apply_speaker_recognition(
 
         speaker_client = SpeakerRecognitionClient()
         if not speaker_client.enabled:
-            logger.info(
-                f"🎤 Speaker recognition disabled, using original speaker labels"
-            )
+            logger.info(f"🎤 Speaker recognition disabled, using original speaker labels")
             return segments
 
         logger.info(
@@ -122,9 +120,7 @@ def get_speaker_at_time(timestamp: float, speaker_segments: list) -> str:
         updated_count = 0
         for seg in segments:
             seg_mid = (seg.start + seg.end) / 2.0
-            identified_speaker = get_speaker_at_time(
-                seg_mid, speaker_identified_segments
-            )
+            identified_speaker = get_speaker_at_time(seg_mid, speaker_identified_segments)
 
             if identified_speaker and identified_speaker != "Unknown":
                 original_speaker = seg.speaker
@@ -187,9 +183,7 @@ async def transcribe_full_audio_job(
     start_time = time.time()
 
     # Get the conversation
-    conversation = await Conversation.find_one(
-        Conversation.conversation_id == conversation_id
-    )
+    conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id)
     if not conversation:
         raise ValueError(f"Conversation {conversation_id} not found")
 
@@ -206,23 +200,18 @@ async def transcribe_full_audio_job(
     logger.info(f"Using transcription provider: {provider_name}")
 
     # Reconstruct audio from MongoDB chunks
-    logger.info(
-        f"📦 Reconstructing audio from MongoDB chunks for conversation {conversation_id}"
-    )
+    logger.info(f"📦 Reconstructing audio from MongoDB chunks for conversation {conversation_id}")
 
     try:
         # Reconstruct WAV from MongoDB chunks (already in memory as bytes)
         wav_data = await reconstruct_wav_from_conversation(conversation_id)
 
         logger.info(
-            f"📦 Reconstructed audio from MongoDB chunks: "
-            f"{len(wav_data) / 1024 / 1024:.2f} MB"
+            f"📦 Reconstructed audio from MongoDB chunks: " f"{len(wav_data) / 1024 / 1024:.2f} MB"
         )
     except ValueError as e:
         # No chunks found for conversation
-        raise FileNotFoundError(
-            f"No audio chunks found for conversation {conversation_id}: {e}"
-        )
+        raise FileNotFoundError(f"No audio chunks found for conversation {conversation_id}: {e}")
     except Exception as e:
         logger.error(f"Failed to reconstruct audio from MongoDB: {e}", exc_info=True)
         raise RuntimeError(f"Audio reconstruction failed: {e}")
@@ -305,9 +294,7 @@ def _on_batch_progress(event: dict) -> None:
                 description=f"conversation={conversation_id[:12]}, words={len(words)}",
             )
         except Exception as e:
-            logger.exception(
-                f"⚠️ Error triggering transcript plugins in batch mode: {e}"
-            )
+            logger.exception(f"⚠️ Error triggering transcript plugins in batch mode: {e}")
 
     logger.info(f"🔍 DEBUG: Plugin processing complete, moving to speech validation")
 
@@ -362,9 +349,7 @@ def _on_batch_progress(event: dict) -> None:
                                 f"Job {job_id} hash not found (likely already completed or expired)"
                             )
                         else:
-                            logger.debug(
-                                f"Job {job_id} not found or already completed: {e}"
-                            )
+                            logger.debug(f"Job {job_id} not found or already completed: {e}")
 
                 if cancelled_jobs:
                     logger.info(
@@ -594,9 +579,7 @@ async def create_audio_only_conversation(
         # Update status to show batch transcription is starting
         placeholder_conversation.processing_status = "batch_transcription"
         placeholder_conversation.title = "Audio Recording (Batch Transcription...)"
-        placeholder_conversation.summary = (
-            "Processing audio with offline transcription..."
-        )
+        placeholder_conversation.summary = "Processing audio with offline transcription..."
         await placeholder_conversation.save()
 
         # Audio chunks are already linked to this conversation_id
@@ -623,9 +606,7 @@ async def create_audio_only_conversation(
     )
     await conversation.insert()
 
-    logger.info(
-        f"✅ Created batch transcription conversation {session_id[:12]} for fallback"
-    )
+    logger.info(f"✅ Created batch transcription conversation {session_id[:12]} for fallback")
     return conversation
 
 
@@ -771,18 +752,14 @@ async def transcription_fallback_check_job(
                 sample_rate, channels, sample_width = 16000, 1, 2
                 session_key = f"audio:session:{session_id}"
                 try:
-                    audio_format_raw = await redis_client.hget(
-                        session_key, "audio_format"
-                    )
+                    audio_format_raw = await redis_client.hget(session_key, "audio_format")
                     if audio_format_raw:
                         audio_format = json.loads(audio_format_raw)
                         sample_rate = int(audio_format.get("rate", 16000))
                         channels = int(audio_format.get("channels", 1))
                         sample_width = int(audio_format.get("width", 2))
                 except Exception as e:
-                    logger.warning(
-                        f"Failed to read audio_format from Redis for {session_id}: {e}"
-                    )
+                    logger.warning(f"Failed to read audio_format from Redis for {session_id}: {e}")
 
                 bytes_per_second = sample_rate * channels * sample_width
                 logger.info(
@@ -791,9 +768,7 @@ async def transcription_fallback_check_job(
                 )
 
                 # Create conversation placeholder
-                conversation = await create_audio_only_conversation(
-                    session_id, user_id, client_id
-                )
+                conversation = await create_audio_only_conversation(session_id, user_id, client_id)
 
                 # Save audio to MongoDB chunks for batch transcription
                 num_chunks = await convert_audio_to_chunks(
@@ -810,9 +785,7 @@ async def transcription_fallback_check_job(
                 )
 
             except Exception as e:
-                logger.error(
-                    f"❌ Failed to extract audio from Redis stream: {e}", exc_info=True
-                )
+                logger.error(f"❌ Failed to extract audio from Redis stream: {e}", exc_info=True)
                 raise
         else:
             logger.info(
@@ -821,9 +794,7 @@ async def transcription_fallback_check_job(
             )
 
             # Create conversation placeholder for batch transcription
-            conversation = await create_audio_only_conversation(
-                session_id, user_id, client_id
-            )
+            conversation = await create_audio_only_conversation(session_id, user_id, client_id)
 
     # Enqueue batch transcription job
     version_id = f"batch_fallback_{session_id[:12]}"
@@ -919,14 +890,10 @@ async def stream_speech_detection_job(
     # Get conversation count
     conversation_count_key = f"session:conversation_count:{session_id}"
     conversation_count_bytes = await redis_client.get(conversation_count_key)
-    conversation_count = (
-        int(conversation_count_bytes) if conversation_count_bytes else 0
-    )
+    conversation_count = int(conversation_count_bytes) if conversation_count_bytes else 0
 
     # Check if speaker filtering is enabled
-    speaker_filter_enabled = (
-        os.getenv("RECORD_ONLY_ENROLLED_SPEAKERS", "false").lower() == "true"
-    )
+    speaker_filter_enabled = os.getenv("RECORD_ONLY_ENROLLED_SPEAKERS", "false").lower() == "true"
     logger.info(
         f"📊 Conversation #{conversation_count + 1}, Speaker filter: {'enabled' if speaker_filter_enabled else 'disabled'}"
     )
@@ -969,24 +936,17 @@ async def stream_speech_detection_job(
             )
 
         # Exit if grace period expired without speech
-        if (
-            session_closed_at
-            and (time.time() - session_closed_at) > final_check_grace_period
-        ):
+        if session_closed_at and (time.time() - session_closed_at) > final_check_grace_period:
             logger.info(f"✅ Session ended without speech (grace period expired)")
             break
 
         # Consume any stale conversation close request (defensive — shouldn't normally
         # appear since services.py gates on conversation:current, but handles race conditions)
-        close_reason = await redis_client.hget(
-            session_key, "conversation_close_requested"
-        )
+        close_reason = await redis_client.hget(session_key, "conversation_close_requested")
         if close_reason:
             await redis_client.hdel(session_key, "conversation_close_requested")
             close_reason_str = (
-                close_reason.decode()
-                if isinstance(close_reason, bytes)
-                else close_reason
+                close_reason.decode() if isinstance(close_reason, bytes) else close_reason
             )
             logger.info(
                 f"🔒 Conversation close requested ({close_reason_str}) during speech detection — "
@@ -1003,15 +963,11 @@ async def stream_speech_detection_job(
             # Health check: detect transcription errors early during grace period
             if session_closed_at:
                 # Check for streaming consumer errors in session metadata
-                error_status = await redis_client.hget(
-                    session_key, "transcription_error"
-                )
+                error_status = await redis_client.hget(session_key, "transcription_error")
                 if error_status:
                     error_msg = error_status.decode()
                     logger.error(f"❌ Transcription service error: {error_msg}")
-                    logger.error(
-                        f"❌ Session failed - transcription service unavailable"
-                    )
+                    logger.error(f"❌ Session failed - transcription service unavailable")
                     break
 
                 # Check if we've been waiting too long with no results at all
@@ -1021,9 +977,7 @@ async def stream_speech_detection_job(
                     logger.error(
                         f"❌ No transcription activity after {grace_elapsed:.1f}s - possible API key or connectivity issue"
                     )
-                    logger.error(
-                        f"❌ Session failed - check transcription service configuration"
-                    )
+                    logger.error(f"❌ Session failed - check transcription service configuration")
                     break
 
             await asyncio.sleep(2)
@@ -1063,9 +1017,7 @@ async def stream_speech_detection_job(
             "last_event",
             f"speech_detected:{datetime.utcnow().isoformat()}",
         )
-        await redis_client.hset(
-            session_key, "speech_detected_at", datetime.utcnow().isoformat()
-        )
+        await redis_client.hset(session_key, "speech_detected_at", datetime.utcnow().isoformat())
 
         # Step 2: If speaker filter enabled, check for enrolled speakers
         identified_speakers = []
@@ -1090,7 +1042,7 @@ async def stream_speech_detection_job(
                 client_id,
                 job_timeout=300,  # 5 minutes for speaker recognition
                 result_ttl=600,
-                job_id=f"speaker-check_{session_id[:12]}_{conversation_count}",
+                job_id=f"speaker-check_{session_id}_{conversation_count}",
                 description=f"Speaker check for conversation #{conversation_count+1}",
                 meta={"client_id": client_id},
             )
@@ -1117,9 +1069,7 @@ async def stream_speech_detection_job(
                     result = speaker_check_job.result
                     enrolled_present = result.get("enrolled_present", False)
                     identified_speakers = result.get("identified_speakers", [])
-                    logger.info(
-                        f"✅ Speaker check completed: enrolled={enrolled_present}"
-                    )
+                    logger.info(f"✅ Speaker check completed: enrolled={enrolled_present}")
 
                     # Update session event for speaker check complete
                     await redis_client.hset(
@@ -1148,9 +1098,7 @@ async def stream_speech_detection_job(
                         "last_event",
                         f"speaker_check_failed:{datetime.utcnow().isoformat()}",
                     )
-                    await redis_client.hset(
-                        session_key, "speaker_check_status", "failed"
-                    )
+                    await redis_client.hset(session_key, "speaker_check_status", "failed")
                     break
                 await asyncio.sleep(poll_interval)
                 waited += poll_interval
@@ -1197,15 +1145,13 @@ async def stream_speech_detection_job(
             speech_job_id,  # Pass speech detection job ID
             job_timeout=10800,  # 3 hours to match max_runtime in open_conversation_job
             result_ttl=JOB_RESULT_TTL,  # Use configured TTL (24 hours) instead of 10 minutes
-            job_id=f"open-conv_{session_id[:12]}_{conversation_count}",
-            description=f"Conversation #{conversation_count+1} for {session_id[:12]}",
+            job_id=f"open-conv_{session_id}_{conversation_count}",
+            description=f"Conversation #{conversation_count+1} for {session_id}",
             meta={"client_id": client_id},
         )
 
         # Track the job
-        await redis_client.set(
-            open_job_key, open_job.id, ex=10800
-        )  # 3 hours to match job timeout
+        await redis_client.set(open_job_key, open_job.id, ex=10800)  # 3 hours to match job timeout
 
         # Store metadata in speech detection job
         if current_job:
@@ -1218,31 +1164,23 @@ async def stream_speech_detection_job(
             current_job.meta.update(
                 {
                     "conversation_job_id": open_job.id,
-                    "speaker_check_job_id": (
-                        speaker_check_job.id if speaker_check_job else None
-                    ),
+                    "speaker_check_job_id": (speaker_check_job.id if speaker_check_job else None),
                     "detected_speakers": identified_speakers,
-                    "speech_detected_at": datetime.fromtimestamp(
-                        speech_detected_at
-                    ).isoformat(),
+                    "speech_detected_at": datetime.fromtimestamp(speech_detected_at).isoformat(),
                     "session_id": session_id,
                     "client_id": client_id,  # For job grouping
                 }
             )
             current_job.save_meta()
 
-        logger.info(
-            f"✅ Started conversation job {open_job.id}, exiting speech detection"
-        )
+        logger.info(f"✅ Started conversation job {open_job.id}, exiting speech detection")
 
         return {
             "session_id": session_id,
             "user_id": user_id,
             "client_id": client_id,
             "conversation_job_id": open_job.id,
-            "speech_detected_at": datetime.fromtimestamp(
-                speech_detected_at
-            ).isoformat(),
+            "speech_detected_at": datetime.fromtimestamp(speech_detected_at).isoformat(),
             "runtime_seconds": time.time() - start_time,
         }
 
@@ -1270,9 +1208,7 @@ async def stream_speech_detection_job(
     # Check if this is an always_persist conversation that needs to be marked as failed
     # NOTE: We check MongoDB directly because the conversation:current Redis key might have been
     # deleted by the audio persistence job cleanup (which runs in parallel).
-    logger.info(
-        f"🔍 Checking MongoDB for always_persist conversation with client_id: {client_id}"
-    )
+    logger.info(f"🔍 Checking MongoDB for always_persist conversation with client_id: {client_id}")
 
     # Find conversation by client_id that matches this session
     # session_id == client_id for streaming sessions (set in _initialize_streaming_session)
@@ -1312,7 +1248,7 @@ async def stream_speech_detection_job(
         client_id,
         timeout_seconds=config_timeout,
         job_timeout=config_timeout + 300,  # Extra 5 min overhead for fallback check
-        job_id=f"fallback_check_{session_id[:12]}",
+        job_id=f"fallback_check_{session_id}",
         description=f"Transcription fallback check for {session_id[:8]} (no speech)",
         meta={"session_id": session_id, "client_id": client_id, "no_speech": True},
     )
diff --git a/backends/advanced/webui/src/pages/ConnectApp.tsx b/backends/advanced/webui/src/pages/ConnectApp.tsx
deleted file mode 100644
index 768cdb6e..00000000
--- a/backends/advanced/webui/src/pages/ConnectApp.tsx
+++ /dev/null
@@ -1,120 +0,0 @@
-import { useState } from 'react'
-import { QRCodeSVG } from 'qrcode.react'
-import { Smartphone, Copy, Check } from 'lucide-react'
-import { useTheme } from '../contexts/ThemeContext'
-
-function getBackendHttpUrl(): string {
-  const { protocol, hostname, port } = window.location
-
-  const isStandardPort =
-    (protocol === 'https:' && (port === '' || port === '443')) ||
-    (protocol === 'http:' && (port === '' || port === '80'))
-
-  const basePath = import.meta.env.BASE_URL
-  if (isStandardPort && basePath && basePath !== '/') {
-    // Caddy path-based routing — return full origin
-    return `${protocol}//${hostname}`
-  }
-
-  if (import.meta.env.VITE_BACKEND_URL) {
-    const url = import.meta.env.VITE_BACKEND_URL as string
-    // If it's a relative URL, make it absolute
-    if (url.startsWith('/') || url === '') {
-      return `${protocol}//${hostname}${port ? `:${port}` : ''}`
-    }
-    return url
-  }
-
-  if (isStandardPort) {
-    return `${protocol}//${hostname}`
-  }
-
-  if (port === '5173') {
-    return `${protocol}//${hostname}:8000`
-  }
-
-  return `${protocol}//${hostname}${port ? `:${port}` : ''}`
-}
-
-export default function ConnectApp() {
-  const { isDark } = useTheme()
-  const [copied, setCopied] = useState(false)
-  const backendUrl = getBackendHttpUrl()
-
-  const handleCopy = async () => {
-    try {
-      await navigator.clipboard.writeText(backendUrl)
-      setCopied(true)
-      setTimeout(() => setCopied(false), 2000)
-    } catch {
-      // Fallback for older browsers
-      const textArea = document.createElement('textarea')
-      textArea.value = backendUrl
-      document.body.appendChild(textArea)
-      textArea.select()
-      document.execCommand('copy')
-      document.body.removeChild(textArea)
-      setCopied(true)
-      setTimeout(() => setCopied(false), 2000)
-    }
-  }
-
-  return (
-    <div className="space-y-6">
-      <div className="flex items-center space-x-3">
-        <Smartphone className="h-6 w-6 text-blue-600" />
-        <h2 className="text-2xl font-semibold text-gray-900 dark:text-gray-100">
-          Connect App
-        </h2>
-      </div>
-
-      <p className="text-gray-600 dark:text-gray-400">
-        Scan this QR code with the Chronicle mobile app to connect it to your backend.
-      </p>
-
-      {/* QR Code */}
-      <div className="flex flex-col items-center space-y-4 py-6">
-        <div className="p-4 bg-white rounded-xl shadow-sm border border-gray-200 dark:border-gray-600">
-          <QRCodeSVG
-            value={backendUrl}
-            size={256}
-            level="M"
-            fgColor={isDark ? '#1f2937' : '#111827'}
-            bgColor="#ffffff"
-          />
-        </div>
-
-        {/* URL display + copy */}
-        <div className="flex items-center space-x-2">
-          <code className="px-3 py-1.5 bg-gray-100 dark:bg-gray-700 rounded text-sm text-gray-800 dark:text-gray-200 font-mono">
-            {backendUrl}
-          </code>
-          <button
-            onClick={handleCopy}
-            className="p-2 rounded-lg hover:bg-gray-100 dark:hover:bg-gray-700 transition-colors text-gray-600 dark:text-gray-300"
-            title="Copy URL"
-          >
-            {copied ? (
-              <Check className="h-4 w-4 text-green-500" />
-            ) : (
-              <Copy className="h-4 w-4" />
-            )}
-          </button>
-        </div>
-      </div>
-
-      {/* Instructions */}
-      <div className="bg-blue-50 dark:bg-blue-900/20 border border-blue-200 dark:border-blue-800 rounded-lg p-4">
-        <h3 className="font-medium text-blue-900 dark:text-blue-200 mb-2">
-          How to connect
-        </h3>
-        <ol className="list-decimal list-inside space-y-1.5 text-sm text-blue-800 dark:text-blue-300">
-          <li>Open the Chronicle app on your phone</li>
-          <li>Go to Settings and tap <strong>Scan QR Code</strong></li>
-          <li>Point your camera at the QR code above</li>
-          <li>The backend URL will be configured automatically</li>
-        </ol>
-      </div>
-    </div>
-  )
-}
diff --git a/backends/advanced/webui/src/pages/Queue.tsx b/backends/advanced/webui/src/pages/Queue.tsx
index f1c6a784..1812db83 100644
--- a/backends/advanced/webui/src/pages/Queue.tsx
+++ b/backends/advanced/webui/src/pages/Queue.tsx
@@ -22,7 +22,7 @@ import {
   Repeat,
   Zap
 } from 'lucide-react';
-import { queueApi } from '../services/api';
+import { queueApi, conversationsApi } from '../services/api';
 
 interface QueueStats {
   total_jobs: number;
@@ -1143,7 +1143,7 @@ const Queue: React.FC = () => {
                               className={`flex items-center justify-between p-3 cursor-pointer transition-colors ${hasFailedJob ? 'hover:bg-red-100' : 'hover:bg-cyan-100'}`}
                               onClick={() => toggleConversationExpansion(conversationId)}
                             >
-                              <div className="flex-1">
+                              <div className="flex-1 min-w-0">
                                 <div className="flex items-center space-x-2">
                                   {isExpanded ? (
                                     <ChevronDown className={`w-4 h-4 ${hasFailedJob ? 'text-red-600' : 'text-cyan-600'}`} />
@@ -1169,7 +1169,7 @@ const Queue: React.FC = () => {
                                     </span>
                                   )}
                                 </div>
-                                <div className="mt-1 text-xs text-gray-600">
+                                <div className="mt-1 text-xs text-gray-600 truncate">
                                   Conversation: {conversationId.substring(0, 8)}... •
                                   {createdAt && `Started: ${new Date(createdAt).toLocaleTimeString()} • `}
                                   Words: {wordCount}
@@ -1181,6 +1181,27 @@ const Queue: React.FC = () => {
                                   </div>
                                 )}
                               </div>
+                              {/* Close Conversation Button - only for actively running conversations */}
+                              {openConvJob && openConvJob.status === 'started' && (
+                                <button
+                                  onClick={async (e) => {
+                                    e.stopPropagation();
+                                    if (!confirm(`Close the active conversation for ${clientId}? This will end the current conversation and trigger post-processing.`)) return;
+                                    try {
+                                      await conversationsApi.closeActiveConversation(clientId);
+                                      fetchData();
+                                    } catch (error: any) {
+                                      console.error('Failed to close conversation:', error);
+                                      alert(`Failed to close conversation: ${error.response?.data?.error || error.message}`);
+                                    }
+                                  }}
+                                  className="flex items-center space-x-1 px-3 py-2 bg-red-600 text-white rounded-md hover:bg-red-700 transition-colors text-sm font-medium flex-shrink-0 ml-3"
+                                  title="Close the current active conversation"
+                                >
+                                  <StopCircle className="w-4 h-4" />
+                                  <span>Close</span>
+                                </button>
+                              )}
                             </div>
 
                           {/* Expanded Jobs Section */}
diff --git a/backends/advanced/webui/src/services/api.ts b/backends/advanced/webui/src/services/api.ts
index 8929fb28..7961c285 100644
--- a/backends/advanced/webui/src/services/api.ts
+++ b/backends/advanced/webui/src/services/api.ts
@@ -150,6 +150,9 @@ export const conversationsApi = {
   activateTranscriptVersion: (conversationId: string, versionId: string) => api.post(`/api/conversations/${conversationId}/activate-transcript/${versionId}`),
   activateMemoryVersion: (conversationId: string, versionId: string) => api.post(`/api/conversations/${conversationId}/activate-memory/${versionId}`),
   getVersionHistory: (conversationId: string) => api.get(`/api/conversations/${conversationId}/versions`),
+
+  // Active conversation management
+  closeActiveConversation: (clientId: string) => api.post(`/api/conversations/${clientId}/close`),
 }
 
 export const memoriesApi = {