From f0244403012cf1eef48c9efb175b0ed5ce07454b Mon Sep 17 00:00:00 2001 From: Jay Chao Date: Sun, 9 Nov 2025 01:01:31 -0800 Subject: [PATCH 1/5] add searching function to ResCanvas --- backend/app.py | 2 + backend/requirements.txt | 4 +- backend/routes/search_ai.py | 129 ++++++++++++++++++ backend/services/search_algorithms.py | 58 ++++++++ frontend/package-lock.json | 64 +++++++-- frontend/src/api/apiClient.js | 4 +- .../src/components/Search/AISearchPanel.jsx | 98 +++++++++++++ .../components/Search/VisualSearchUpload.jsx | 45 ++++++ frontend/src/pages/Dashboard.jsx | 2 + 9 files changed, 395 insertions(+), 11 deletions(-) create mode 100644 backend/routes/search_ai.py create mode 100644 backend/services/search_algorithms.py create mode 100644 frontend/src/components/Search/AISearchPanel.jsx create mode 100644 frontend/src/components/Search/VisualSearchUpload.jsx diff --git a/backend/app.py b/backend/app.py index 583632cd..a716470b 100644 --- a/backend/app.py +++ b/backend/app.py @@ -29,6 +29,7 @@ from routes.frontend import frontend_bp from routes.analytics import analytics_bp from routes.export import export_bp +from routes.search_ai import search_ai_bp from services.db import redis_client from services.canvas_counter import get_canvas_draw_count from services.graphql_service import commit_transaction_via_graphql @@ -191,6 +192,7 @@ def handle_all_exceptions(e): app.register_blueprint(users_v1_bp) app.register_blueprint(stamps_bp, url_prefix='/api') app.register_blueprint(templates_v1_bp) +app.register_blueprint(search_ai_bp) # Frontend serving must be last to avoid route conflicts app.register_blueprint(frontend_bp) diff --git a/backend/requirements.txt b/backend/requirements.txt index 6cf11cff..11e7f4e7 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -31,7 +31,7 @@ limits==5.6.0 markdown-it-py==4.0.0 MarkupSafe==3.0.3 mdurl==0.1.2 -motor==3.7.1 +# motor==3.7.1 ordered-set==4.1.0 packaging==25.0 passlib==1.7.4 @@ -51,7 +51,7 @@ python-engineio==4.12.3 python-socketio==5.14.1 redis==6.2.0 requests==2.32.4 -resilient-python-cache==0.1.1 +# resilient-python-cache==0.1.1 rich==13.9.4 simple-websocket==1.1.0 simplejson==3.19.3 diff --git a/backend/routes/search_ai.py b/backend/routes/search_ai.py new file mode 100644 index 00000000..c8da5808 --- /dev/null +++ b/backend/routes/search_ai.py @@ -0,0 +1,129 @@ +from flask import Blueprint, request, jsonify, g +from middleware.auth import require_auth_optional +from services.db import rooms_coll, shares_coll +from bson import ObjectId +from services.search_algorithms import text_search, image_search +import logging + +search_ai_bp = Blueprint('search_ai', __name__) +logger = logging.getLogger(__name__) + + + + +@search_ai_bp.route('/api/v1/search/ai', methods=['POST']) +@require_auth_optional +def search_ai(): + payload = request.get_json(silent=True) or {} + q = (payload.get('q') or '').strip() + image_b64 = payload.get('image_b64') + user = g.current_user + claims = getattr(g, 'token_claims', None) + + # ---- Visibility: public OR owner OR shared; always exclude archived ---- + vis_or = [{"type": "public"}] + if user and claims and claims.get('sub'): + # collect shared room ObjectIds + try: + shared_cursor = shares_coll.find( + {"$or": [{"userId": claims['sub']}, {"username": claims['sub']}]}, + {"roomId": 1} + ) + oids_obj = [] + oids_str = [] + for doc in shared_cursor: + rid = doc.get("roomId") + # roomId may be stored as a string (hex) or as an ObjectId already + if isinstance(rid, str): + oids_str.append(rid) + try: + oids_obj.append(ObjectId(rid)) + except Exception: + pass + else: + # assume it's an ObjectId or similar + try: + oids_obj.append(ObjectId(rid)) + except Exception: + try: + # fallback: convert to str + oids_str.append(str(rid)) + except Exception: + pass + except Exception: + oids_obj = [] + oids_str = [] + + # Match ownerId stored as string or as ObjectId (legacy/varied schemas) + try: + oid_owner = ObjectId(claims['sub']) + vis_or.append({"ownerId": claims['sub']}) + vis_or.append({"ownerId": oid_owner}) + except Exception: + vis_or.append({"ownerId": claims['sub']}) + # Also match by ownerName to handle legacy documents that store owner + # as a username or when ownerId formats vary. + if claims.get('username'): + vis_or.append({"ownerName": claims.get('username')}) + # include shared rooms by _id; support both ObjectId and string representations + if oids_obj: + vis_or.append({"_id": {"$in": oids_obj}}) + if oids_str: + vis_or.append({"_id": {"$in": oids_str}}) + logger.debug("search_ai: visibility OR clauses count: owners=%s, shared_obj=%s, shared_str=%s", len([c for c in vis_or if 'ownerId' in c or 'ownerName' in c]), len(oids_obj), len(oids_str)) + + # If a text query is provided, search across all non-archived rooms (public+private) + if q: + match = {"archived": {"$ne": True}} + else: + match = {"$and": [{"archived": {"$ne": True}}, {"$or": vis_or}]} + + # ---- Limit / fields ---- + LIMIT = min(int(payload.get("limit", 50)), 100) + fields = {"name": 1, "type": 1, "ownerName": 1, "description": 1, "createdAt": 1, "updatedAt": 1} + + # ---- Fetch candidates (visibility-filtered) ---- + try: + candidates = [] + logger.debug("search_ai: using match=%s", match) + for r in rooms_coll.find(match, fields).limit(LIMIT * 5): # oversample; rank later + candidates.append({ + "id": str(r.get("_id")), + "name": r.get("name"), + "type": r.get("type"), + "ownerName": r.get("ownerName"), + "description": r.get("description"), + "createdAt": r.get("createdAt"), + "updatedAt": r.get("updatedAt"), + }) + logger.info("search_ai: fetched %d candidate rooms (limit=%s)", len(candidates), LIMIT * 5) + except Exception as e: + logger.exception("Search candidate fetch failed: %s", e) + return jsonify({"status": "ok", "results": []}), 200 + + # ---- Match + rank using your stubs ---- + try: + if image_b64: + ranked = image_search(image_b64=image_b64, rooms=candidates, q=q or None, top_n=LIMIT) + elif q: + ranked = text_search(query=q, rooms=candidates, top_n=LIMIT) + else: + # No signals → recency fallback with default score + ranked = sorted( + candidates, + key=lambda x: x.get("updatedAt") or x.get("createdAt"), + reverse=True + )[:LIMIT] + for r in ranked: + r["score"] = 1.0 + except Exception as e: + logger.exception("Search ranking failed: %s", e) + ranked = candidates[:LIMIT] # fail soft + + # ---- Presentation fields ---- + for r in ranked: + r["snippet"] = (r.get("description") or "")[:300] + if "score" not in r: + r["score"] = 1.0 + + return jsonify({"status": "ok", "results": ranked}), 200 diff --git a/backend/services/search_algorithms.py b/backend/services/search_algorithms.py new file mode 100644 index 00000000..acce7d43 --- /dev/null +++ b/backend/services/search_algorithms.py @@ -0,0 +1,58 @@ +"""Simple search algorithm stubs for ResCanvas. + +These are intentionally naive implementations used by the website prototype. +They accept a list of room dicts and return them with a uniform score so the +frontend can exercise the UI without any model or vector DB dependencies. +""" +from typing import List, Dict, Any +import random + +DEFAULT_TOP_N = 50 + + + +def text_search(query: str, rooms: List[Dict[str, Any]], top_n: int = DEFAULT_TOP_N, seed: int | None = None) -> List[Dict[str, Any]]: + """Prototype text search: random scores + ranking.""" + rng = random.Random(seed) if seed is not None else random + scored = [{**r, "score": rng.random()} for r in rooms] + scored.sort(key=lambda x: x["score"], reverse=True) + return scored[:top_n] + +def image_search(image_b64: str, rooms: List[Dict[str, Any]], q: str | None = None, top_n: int = DEFAULT_TOP_N, seed: int | None = None) -> List[Dict[str, Any]]: + """Prototype image search: random scores + ranking.""" + rng = random.Random(seed) if seed is not None else random + scored = [{**r, "score": rng.random()} for r in rooms] + scored.sort(key=lambda x: x["score"], reverse=True) + return scored[:top_n] + + + +# def text_search(query: str, rooms: List[Dict[str, Any]], top_n: int = DEFAULT_TOP_N) -> List[Dict[str, Any]]: +# """Naive text search stub. + +# Returns the input rooms unchanged except for attaching a default score and +# a snippet field (from description) so the frontend can render results. +# """ +# out = [] +# for r in rooms[:top_n]: +# out.append({ +# **r, +# 'score': 1.0, +# 'snippet': (r.get('description') or '')[:300] +# }) +# return out + + +# def image_search(image_b64: str, rooms: List[Dict[str, Any]], top_n: int = DEFAULT_TOP_N) -> List[Dict[str, Any]]: +# """Naive image search stub. + +# Currently ignores the image and returns input rooms with default score. +# """ +# out = [] +# for r in rooms[:top_n]: +# out.append({ +# **r, +# 'score': 1.0, +# 'snippet': (r.get('description') or '')[:300] +# }) +# return out diff --git a/frontend/package-lock.json b/frontend/package-lock.json index f1b6a622..f64749d3 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -96,6 +96,7 @@ "version": "7.26.0", "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.26.0.tgz", "integrity": "sha512-i1SLeK+DzNnQ3LL/CswPCa/E5u4lh1k6IAEphON8F+cXt0t9euTshDru0q7/IqMa1PMPz5RnHuHscF8/ZJsStg==", + "peer": true, "dependencies": { "@ampproject/remapping": "^2.2.0", "@babel/code-frame": "^7.26.0", @@ -715,6 +716,7 @@ "version": "7.26.0", "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-flow/-/plugin-syntax-flow-7.26.0.tgz", "integrity": "sha512-B+O2DnPc0iG+YXFqOxv2WNuNU97ToWjOomUQ78DouOENWUaM5sVrmet9mcomUGQFwpJd//gvUagXBSdzO1fRKg==", + "peer": true, "dependencies": { "@babel/helper-plugin-utils": "^7.25.9" }, @@ -1524,6 +1526,7 @@ "version": "7.25.9", "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx/-/plugin-transform-react-jsx-7.25.9.tgz", "integrity": "sha512-s5XwpQYCqGerXl+Pu6VDL3x0j2d82eiV77UJ8a2mDHAW7j9SWRqQ2y1fNo1Z74CdcYipl5Z41zvjj4Nfzq36rw==", + "peer": true, "dependencies": { "@babel/helper-annotate-as-pure": "^7.25.9", "@babel/helper-module-imports": "^7.25.9", @@ -2327,6 +2330,7 @@ "resolved": "https://registry.npmjs.org/@emotion/react/-/react-11.13.5.tgz", "integrity": "sha512-6zeCUxUH+EPF1s+YF/2hPVODeV/7V07YU5x+2tfuRL8MdW6rv5vb2+CBEGTGwBdux0OIERcOS+RzxeK80k2DsQ==", "license": "MIT", + "peer": true, "dependencies": { "@babel/runtime": "^7.18.3", "@emotion/babel-plugin": "^11.13.5", @@ -2370,6 +2374,7 @@ "resolved": "https://registry.npmjs.org/@emotion/styled/-/styled-11.13.5.tgz", "integrity": "sha512-gnOQ+nGLPvDXgIx119JqGalys64lhMdnNQA9TMxhDA4K0Hq5+++OE20Zs5GxiCV9r814xQ2K5WmtofSpHVW6BQ==", "license": "MIT", + "peer": true, "dependencies": { "@babel/runtime": "^7.18.3", "@emotion/babel-plugin": "^11.13.5", @@ -3050,6 +3055,7 @@ "version": "6.4.7", "resolved": "https://registry.npmjs.org/@mui/material/-/material-6.4.7.tgz", "integrity": "sha512-K65StXUeGAtFJ4ikvHKtmDCO5Ab7g0FZUu2J5VpoKD+O6Y3CjLYzRi+TMlI3kaL4CL158+FccMoOd/eaddmeRQ==", + "peer": true, "dependencies": { "@babel/runtime": "^7.26.0", "@mui/core-downloads-tracker": "^6.4.7", @@ -3719,7 +3725,6 @@ "version": "10.4.0", "resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.0.tgz", "integrity": "sha512-pemlzrSESWbdAloYml3bAJMEfNh1Z7EduzqPKprCH5S341frlpYnUEW0H72dLxa6IsYr+mPno20GiSm+h9dEdQ==", - "peer": true, "dependencies": { "@babel/code-frame": "^7.10.4", "@babel/runtime": "^7.12.5", @@ -3738,7 +3743,6 @@ "version": "5.3.0", "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-5.3.0.tgz", "integrity": "sha512-b0P0sZPKtyu8HkeRAfCq0IfURZK+SuwMjY1UXGBU27wpAiTwQAIlq56IbIO+ytk/JjS1fMR14ee5WBBfKi5J6A==", - "peer": true, "dependencies": { "dequal": "^2.0.3" } @@ -4314,6 +4318,7 @@ "version": "18.3.12", "resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.12.tgz", "integrity": "sha512-D2wOSq/d6Agt28q7rSI3jhU7G6aiuzljDGZ2hTZHIkrTLUI+AF3WMeKkEZ9nN2fkBAlcktT6vcZjDFiIhMYEQw==", + "peer": true, "dependencies": { "@types/prop-types": "*", "csstype": "^3.0.2" @@ -4437,6 +4442,7 @@ "version": "5.62.0", "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-5.62.0.tgz", "integrity": "sha512-TiZzBSJja/LbhNPvk6yc0JrX9XqhQ0hdh6M2svYfsHGejaKFIAGd9MQ+ERIMzLGlN/kZoYIgdxFV0PuljTKXag==", + "peer": true, "dependencies": { "@eslint-community/regexpp": "^4.4.0", "@typescript-eslint/scope-manager": "5.62.0", @@ -4488,6 +4494,7 @@ "version": "5.62.0", "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-5.62.0.tgz", "integrity": "sha512-VlJEV0fOQ7BExOsHYAGrgbEiZoi8D+Bl2+f6V2RrXerRSylnp+ZBHmPvaIa8cz0Ajx7WO7Z5RqfgYg7ED1nRhA==", + "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "5.62.0", "@typescript-eslint/types": "5.62.0", @@ -4827,6 +4834,7 @@ "version": "8.14.0", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.14.0.tgz", "integrity": "sha512-cl669nCJTZBsL97OF4kUQm5g5hC2uihk0NxY3WENAC0TYdILVkAyHymAntgxGkl7K+t0cXIrH5siy5S4XkFycA==", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -4905,6 +4913,7 @@ "version": "6.12.6", "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "peer": true, "dependencies": { "fast-deep-equal": "^3.1.1", "fast-json-stable-stringify": "^2.0.0", @@ -5713,6 +5722,7 @@ "url": "https://github.com/sponsors/ai" } ], + "peer": true, "dependencies": { "caniuse-lite": "^1.0.30001669", "electron-to-chromium": "^1.5.41", @@ -7057,6 +7067,7 @@ "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", "license": "ISC", + "peer": true, "engines": { "node": ">=12" } @@ -7988,6 +7999,7 @@ "resolved": "https://registry.npmjs.org/eslint/-/eslint-8.57.1.tgz", "integrity": "sha512-ypowyDxpVSYpkXr9WPv2PAZCtNip1Mv5KTW0SCurXv/9iOpcrH9PaqUElksqEB6pChqHGDRCFTyrZlGhnLNGiA==", "deprecated": "This version is no longer supported. Please see https://eslint.org/version-support for other options.", + "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.6.1", @@ -10816,6 +10828,7 @@ "version": "27.5.1", "resolved": "https://registry.npmjs.org/jest/-/jest-27.5.1.tgz", "integrity": "sha512-Yn0mADZB89zTtjkPJEXwrac3LHudkQMR+Paqa8uxJHCBr9agxztUifWCyiYrjhMPBoUVBjyny0I7XH6ozDr7QQ==", + "peer": true, "dependencies": { "@jest/core": "^27.5.1", "import-local": "^3.0.2", @@ -13861,6 +13874,7 @@ "url": "https://github.com/sponsors/ai" } ], + "peer": true, "dependencies": { "nanoid": "^3.3.7", "picocolors": "^1.1.0", @@ -14953,6 +14967,7 @@ "version": "6.1.2", "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.1.2.tgz", "integrity": "sha512-Q8qQfPiZ+THO/3ZrOrO0cJJKfpYCagtMUkXbnEfmgUjwXg6z/WBeOyS9APBBPCTSiDV+s4SwQGu8yFsiMRIudg==", + "peer": true, "dependencies": { "cssesc": "^3.0.0", "util-deprecate": "^1.0.2" @@ -15296,6 +15311,7 @@ "version": "18.3.1", "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz", "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==", + "peer": true, "dependencies": { "loose-envify": "^1.1.0" }, @@ -15458,6 +15474,7 @@ "version": "18.3.1", "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz", "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==", + "peer": true, "dependencies": { "loose-envify": "^1.1.0", "scheduler": "^0.23.2" @@ -15506,6 +15523,7 @@ "version": "0.11.0", "resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.11.0.tgz", "integrity": "sha512-F27qZr8uUqwhWZboondsPx8tnC3Ct3SxZA3V5WyEvujRyyNv0VYPhoBg1gZ8/MV5tubQp76Trw8lTv9hzRBa+A==", + "peer": true, "engines": { "node": ">=0.10.0" } @@ -16296,6 +16314,7 @@ "version": "2.79.2", "resolved": "https://registry.npmjs.org/rollup/-/rollup-2.79.2.tgz", "integrity": "sha512-fS6iqSPZDs3dr/y7Od6y5nha8dW1YnbgtsyotCVvoFGKbERG++CVRFv1meyGDE1SNItQA8BrnCw7ScdAhRJ3XQ==", + "peer": true, "bin": { "rollup": "dist/bin/rollup" }, @@ -16515,6 +16534,7 @@ "version": "8.17.1", "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", + "peer": true, "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", @@ -18058,6 +18078,7 @@ "version": "0.21.3", "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.21.3.tgz", "integrity": "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==", + "peer": true, "engines": { "node": ">=10" }, @@ -18597,6 +18618,7 @@ "version": "5.96.1", "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.96.1.tgz", "integrity": "sha512-l2LlBSvVZGhL4ZrPwyr8+37AunkcYj5qh8o6u2/2rzoPc8gxFJkLj1WxNgooi9pnoc06jh0BjuXnamM4qlujZA==", + "peer": true, "dependencies": { "@types/eslint-scope": "^3.7.7", "@types/estree": "^1.0.6", @@ -18664,6 +18686,7 @@ "version": "4.15.2", "resolved": "https://registry.npmjs.org/webpack-dev-server/-/webpack-dev-server-4.15.2.tgz", "integrity": "sha512-0XavAZbNJ5sDrCbkpWL8mia0o5WPOd2YGtxrEiZkBK9FjLppIUK2TgxK6qGD2P3hUXTJNNPVibrerKcx5WkR1g==", + "peer": true, "dependencies": { "@types/bonjour": "^3.5.9", "@types/connect-history-api-fallback": "^1.3.5", @@ -19062,6 +19085,7 @@ "version": "8.17.1", "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", + "peer": true, "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", @@ -19477,6 +19501,7 @@ "version": "7.26.0", "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.26.0.tgz", "integrity": "sha512-i1SLeK+DzNnQ3LL/CswPCa/E5u4lh1k6IAEphON8F+cXt0t9euTshDru0q7/IqMa1PMPz5RnHuHscF8/ZJsStg==", + "peer": true, "requires": { "@ampproject/remapping": "^2.2.0", "@babel/code-frame": "^7.26.0", @@ -19893,6 +19918,7 @@ "version": "7.26.0", "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-flow/-/plugin-syntax-flow-7.26.0.tgz", "integrity": "sha512-B+O2DnPc0iG+YXFqOxv2WNuNU97ToWjOomUQ78DouOENWUaM5sVrmet9mcomUGQFwpJd//gvUagXBSdzO1fRKg==", + "peer": true, "requires": { "@babel/helper-plugin-utils": "^7.25.9" } @@ -20384,6 +20410,7 @@ "version": "7.25.9", "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx/-/plugin-transform-react-jsx-7.25.9.tgz", "integrity": "sha512-s5XwpQYCqGerXl+Pu6VDL3x0j2d82eiV77UJ8a2mDHAW7j9SWRqQ2y1fNo1Z74CdcYipl5Z41zvjj4Nfzq36rw==", + "peer": true, "requires": { "@babel/helper-annotate-as-pure": "^7.25.9", "@babel/helper-module-imports": "^7.25.9", @@ -20897,6 +20924,7 @@ "version": "11.13.5", "resolved": "https://registry.npmjs.org/@emotion/react/-/react-11.13.5.tgz", "integrity": "sha512-6zeCUxUH+EPF1s+YF/2hPVODeV/7V07YU5x+2tfuRL8MdW6rv5vb2+CBEGTGwBdux0OIERcOS+RzxeK80k2DsQ==", + "peer": true, "requires": { "@babel/runtime": "^7.18.3", "@emotion/babel-plugin": "^11.13.5", @@ -20929,6 +20957,7 @@ "version": "11.13.5", "resolved": "https://registry.npmjs.org/@emotion/styled/-/styled-11.13.5.tgz", "integrity": "sha512-gnOQ+nGLPvDXgIx119JqGalys64lhMdnNQA9TMxhDA4K0Hq5+++OE20Zs5GxiCV9r814xQ2K5WmtofSpHVW6BQ==", + "peer": true, "requires": { "@babel/runtime": "^7.18.3", "@emotion/babel-plugin": "^11.13.5", @@ -21418,6 +21447,7 @@ "version": "6.4.7", "resolved": "https://registry.npmjs.org/@mui/material/-/material-6.4.7.tgz", "integrity": "sha512-K65StXUeGAtFJ4ikvHKtmDCO5Ab7g0FZUu2J5VpoKD+O6Y3CjLYzRi+TMlI3kaL4CL158+FccMoOd/eaddmeRQ==", + "peer": true, "requires": { "@babel/runtime": "^7.26.0", "@mui/core-downloads-tracker": "^6.4.7", @@ -21793,7 +21823,6 @@ "version": "10.4.0", "resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.0.tgz", "integrity": "sha512-pemlzrSESWbdAloYml3bAJMEfNh1Z7EduzqPKprCH5S341frlpYnUEW0H72dLxa6IsYr+mPno20GiSm+h9dEdQ==", - "peer": true, "requires": { "@babel/code-frame": "^7.10.4", "@babel/runtime": "^7.12.5", @@ -21809,7 +21838,6 @@ "version": "5.3.0", "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-5.3.0.tgz", "integrity": "sha512-b0P0sZPKtyu8HkeRAfCq0IfURZK+SuwMjY1UXGBU27wpAiTwQAIlq56IbIO+ytk/JjS1fMR14ee5WBBfKi5J6A==", - "peer": true, "requires": { "dequal": "^2.0.3" } @@ -22322,6 +22350,7 @@ "version": "18.3.12", "resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.12.tgz", "integrity": "sha512-D2wOSq/d6Agt28q7rSI3jhU7G6aiuzljDGZ2hTZHIkrTLUI+AF3WMeKkEZ9nN2fkBAlcktT6vcZjDFiIhMYEQw==", + "peer": true, "requires": { "@types/prop-types": "*", "csstype": "^3.0.2" @@ -22442,6 +22471,7 @@ "version": "5.62.0", "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-5.62.0.tgz", "integrity": "sha512-TiZzBSJja/LbhNPvk6yc0JrX9XqhQ0hdh6M2svYfsHGejaKFIAGd9MQ+ERIMzLGlN/kZoYIgdxFV0PuljTKXag==", + "peer": true, "requires": { "@eslint-community/regexpp": "^4.4.0", "@typescript-eslint/scope-manager": "5.62.0", @@ -22467,6 +22497,7 @@ "version": "5.62.0", "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-5.62.0.tgz", "integrity": "sha512-VlJEV0fOQ7BExOsHYAGrgbEiZoi8D+Bl2+f6V2RrXerRSylnp+ZBHmPvaIa8cz0Ajx7WO7Z5RqfgYg7ED1nRhA==", + "peer": true, "requires": { "@typescript-eslint/scope-manager": "5.62.0", "@typescript-eslint/types": "5.62.0", @@ -22723,7 +22754,8 @@ "acorn": { "version": "8.14.0", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.14.0.tgz", - "integrity": "sha512-cl669nCJTZBsL97OF4kUQm5g5hC2uihk0NxY3WENAC0TYdILVkAyHymAntgxGkl7K+t0cXIrH5siy5S4XkFycA==" + "integrity": "sha512-cl669nCJTZBsL97OF4kUQm5g5hC2uihk0NxY3WENAC0TYdILVkAyHymAntgxGkl7K+t0cXIrH5siy5S4XkFycA==", + "peer": true }, "acorn-globals": { "version": "6.0.0", @@ -22778,6 +22810,7 @@ "version": "6.12.6", "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "peer": true, "requires": { "fast-deep-equal": "^3.1.1", "fast-json-stable-stringify": "^2.0.0", @@ -23356,6 +23389,7 @@ "version": "4.24.2", "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.24.2.tgz", "integrity": "sha512-ZIc+Q62revdMcqC6aChtW4jz3My3klmCO1fEmINZY/8J3EpBg5/A/D0AKmBveUh6pgoeycoMkVMko84tuYS+Gg==", + "peer": true, "requires": { "caniuse-lite": "^1.0.30001669", "electron-to-chromium": "^1.5.41", @@ -24267,7 +24301,8 @@ "d3-selection": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", - "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==" + "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", + "peer": true }, "d3-shape": { "version": "3.2.0", @@ -24950,6 +24985,7 @@ "version": "8.57.1", "resolved": "https://registry.npmjs.org/eslint/-/eslint-8.57.1.tgz", "integrity": "sha512-ypowyDxpVSYpkXr9WPv2PAZCtNip1Mv5KTW0SCurXv/9iOpcrH9PaqUElksqEB6pChqHGDRCFTyrZlGhnLNGiA==", + "peer": true, "requires": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.6.1", @@ -26905,6 +26941,7 @@ "version": "27.5.1", "resolved": "https://registry.npmjs.org/jest/-/jest-27.5.1.tgz", "integrity": "sha512-Yn0mADZB89zTtjkPJEXwrac3LHudkQMR+Paqa8uxJHCBr9agxztUifWCyiYrjhMPBoUVBjyny0I7XH6ozDr7QQ==", + "peer": true, "requires": { "@jest/core": "^27.5.1", "import-local": "^3.0.2", @@ -29020,6 +29057,7 @@ "version": "8.4.47", "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.47.tgz", "integrity": "sha512-56rxCq7G/XfB4EkXq9Egn5GCqugWvDFjafDOThIdMBsI15iqPqR5r15TfSr1YPYeEI19YeaXMCbY6u88Y76GLQ==", + "peer": true, "requires": { "nanoid": "^3.3.7", "picocolors": "^1.1.0", @@ -29615,6 +29653,7 @@ "version": "6.1.2", "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.1.2.tgz", "integrity": "sha512-Q8qQfPiZ+THO/3ZrOrO0cJJKfpYCagtMUkXbnEfmgUjwXg6z/WBeOyS9APBBPCTSiDV+s4SwQGu8yFsiMRIudg==", + "peer": true, "requires": { "cssesc": "^3.0.0", "util-deprecate": "^1.0.2" @@ -29865,6 +29904,7 @@ "version": "18.3.1", "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz", "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==", + "peer": true, "requires": { "loose-envify": "^1.1.0" } @@ -29987,6 +30027,7 @@ "version": "18.3.1", "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz", "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==", + "peer": true, "requires": { "loose-envify": "^1.1.0", "scheduler": "^0.23.2" @@ -30022,7 +30063,8 @@ "react-refresh": { "version": "0.11.0", "resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.11.0.tgz", - "integrity": "sha512-F27qZr8uUqwhWZboondsPx8tnC3Ct3SxZA3V5WyEvujRyyNv0VYPhoBg1gZ8/MV5tubQp76Trw8lTv9hzRBa+A==" + "integrity": "sha512-F27qZr8uUqwhWZboondsPx8tnC3Ct3SxZA3V5WyEvujRyyNv0VYPhoBg1gZ8/MV5tubQp76Trw8lTv9hzRBa+A==", + "peer": true }, "react-router": { "version": "7.0.2", @@ -30565,6 +30607,7 @@ "version": "2.79.2", "resolved": "https://registry.npmjs.org/rollup/-/rollup-2.79.2.tgz", "integrity": "sha512-fS6iqSPZDs3dr/y7Od6y5nha8dW1YnbgtsyotCVvoFGKbERG++CVRFv1meyGDE1SNItQA8BrnCw7ScdAhRJ3XQ==", + "peer": true, "requires": { "fsevents": "~2.3.2" } @@ -30694,6 +30737,7 @@ "version": "8.17.1", "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", + "peer": true, "requires": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", @@ -31869,7 +31913,8 @@ "type-fest": { "version": "0.21.3", "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.21.3.tgz", - "integrity": "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==" + "integrity": "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==", + "peer": true }, "type-is": { "version": "1.6.18", @@ -32245,6 +32290,7 @@ "version": "5.96.1", "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.96.1.tgz", "integrity": "sha512-l2LlBSvVZGhL4ZrPwyr8+37AunkcYj5qh8o6u2/2rzoPc8gxFJkLj1WxNgooi9pnoc06jh0BjuXnamM4qlujZA==", + "peer": true, "requires": { "@types/eslint-scope": "^3.7.7", "@types/estree": "^1.0.6", @@ -32313,6 +32359,7 @@ "version": "4.15.2", "resolved": "https://registry.npmjs.org/webpack-dev-server/-/webpack-dev-server-4.15.2.tgz", "integrity": "sha512-0XavAZbNJ5sDrCbkpWL8mia0o5WPOd2YGtxrEiZkBK9FjLppIUK2TgxK6qGD2P3hUXTJNNPVibrerKcx5WkR1g==", + "peer": true, "requires": { "@types/bonjour": "^3.5.9", "@types/connect-history-api-fallback": "^1.3.5", @@ -32579,6 +32626,7 @@ "version": "8.17.1", "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", + "peer": true, "requires": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", diff --git a/frontend/src/api/apiClient.js b/frontend/src/api/apiClient.js index 6a7d454d..9cf683fc 100644 --- a/frontend/src/api/apiClient.js +++ b/frontend/src/api/apiClient.js @@ -18,7 +18,9 @@ import { globalRateLimitMonitor, } from '../utils/rateLimitHandler'; -const API_BASE = process.env.REACT_APP_API_BASE; +// Fallback to backend default (used in local development) when REACT_APP_API_BASE +// is not provided. This prevents malformed requests like "undefined/api/..." +const API_BASE = process.env.REACT_APP_API_BASE || 'http://localhost:10010'; console.log("API Base URL", API_BASE) diff --git a/frontend/src/components/Search/AISearchPanel.jsx b/frontend/src/components/Search/AISearchPanel.jsx new file mode 100644 index 00000000..ed0a3493 --- /dev/null +++ b/frontend/src/components/Search/AISearchPanel.jsx @@ -0,0 +1,98 @@ +import React, { useState } from 'react'; +import { Box, TextField, Button, Stack, Paper, Typography, List, ListItem, Divider, CircularProgress, Alert } from '@mui/material'; +import apiClient from '../../api/apiClient'; +import RouterLinkWrapper from '../RouterLinkWrapper'; +import VisualSearchUpload from './VisualSearchUpload'; + +export default function AISearchPanel({ auth }) { + const [imageB64, setImageB64] = useState(null); + const [query, setQuery] = useState(''); + const [uploadedFilename, setUploadedFilename] = useState(null); + const [loading, setLoading] = useState(false); + const [results, setResults] = useState([]); + const [error, setError] = useState(null); + + const doSearch = async () => { + setLoading(true); + setError(null); + try { + if (!imageB64 && (!query || !query.trim())) { + setError('Please provide a description or upload an image to search.'); + setResults([]); + return; + } + const payload = {}; + if (query && query.trim()) payload.q = query.trim(); + if (imageB64) payload.image_b64 = imageB64; + const res = await apiClient.post('/api/v1/search/ai', payload); + if (!res) { + setResults([]); + setError('No response from server'); + } else if (res.status && res.status !== 'ok') { + setResults([]); + setError(res.message || 'Search failed'); + } else { + setResults((res && res.results) || []); + } + } catch (e) { + console.error('Search failed', e); + const msg = (e && e.message) || 'Network or server error'; + setError(msg); + setResults([]); + } finally { + setLoading(false); + } + }; + + return ( + + + AI Search + + setQuery(e.target.value)} + fullWidth + multiline + minRows={2} + /> + + setImageB64(b64)} onFileName={(fn) => setUploadedFilename(fn)} /> + + + + + {uploadedFilename && ( + Uploaded: {uploadedFilename} + )} + {loading && } + {error && {error}} + + + + + Results + + {results.length === 0 && No results} + {results.map(r => ( + + + + {r.name} + {r.ownerName || ''} + + + score: {typeof r.score === 'number' ? r.score.toFixed(2) : '-'} + + + {r.snippet && {r.snippet}} + + ))} + + + + + ); +} diff --git a/frontend/src/components/Search/VisualSearchUpload.jsx b/frontend/src/components/Search/VisualSearchUpload.jsx new file mode 100644 index 00000000..d9878eb1 --- /dev/null +++ b/frontend/src/components/Search/VisualSearchUpload.jsx @@ -0,0 +1,45 @@ +import React from 'react'; +import { Button, Typography } from '@mui/material'; + +/** + * VisualSearchUpload + * Props: + * - onImageBase64(base64String) + * - onFileName(filename) + * - accept (string) optional file accept string + */ +export default function VisualSearchUpload({ onImageBase64, onFileName, accept = 'image/*' }) { + const fileInputRef = React.useRef(null); + const [filename, setFilename] = React.useState(null); + + const handleFile = (file) => { + if (!file) return; + setFilename(file.name || null); + if (typeof onFileName === 'function') onFileName(file.name || null); + const reader = new FileReader(); + reader.onload = (e) => { + const dataUrl = e.target.result || ''; + const b64 = dataUrl.split(',')[1] || ''; + if (typeof onImageBase64 === 'function') onImageBase64(b64); + }; + reader.readAsDataURL(file); + }; + + return ( + <> + handleFile(e.target.files && e.target.files[0])} + /> + + {filename && ( + {filename} + )} + + ); +} diff --git a/frontend/src/pages/Dashboard.jsx b/frontend/src/pages/Dashboard.jsx index c80b42f2..b4b1a6fa 100644 --- a/frontend/src/pages/Dashboard.jsx +++ b/frontend/src/pages/Dashboard.jsx @@ -11,6 +11,7 @@ import Autocomplete from '@mui/material/Autocomplete'; import TemplateGallery from '../components/TemplateGallery'; import TemplateLoader from '../services/templateLoader'; import { listRooms, createRoom, shareRoom, listInvites, acceptInvite, declineInvite, updateRoom, suggestUsers, suggestRooms, getRoomMembers } from '../api/rooms'; +import AISearchPanel from '../components/Search/AISearchPanel'; import { getUsername } from '../utils/getUsername'; import { useNavigate, Link } from 'react-router-dom'; import RouterLinkWrapper from '../components/RouterLinkWrapper'; @@ -568,6 +569,7 @@ export default function Dashboard({ auth }) { {/* Pending invites */} + Pending Invites From a0b85f5e74c2e1d9d475d1d1f4310633f8fe7572 Mon Sep 17 00:00:00 2001 From: Steven Trujillo Date: Tue, 11 Nov 2025 19:27:13 -0800 Subject: [PATCH 2/5] Add embedding and vector search services for AI features --- backend/services/embedding_service.py | 23 ++++++++ backend/services/vector_search_service.py | 0 frontend/package-lock.json | 64 ++++++++++++++++++++--- 3 files changed, 79 insertions(+), 8 deletions(-) create mode 100644 backend/services/embedding_service.py create mode 100644 backend/services/vector_search_service.py diff --git a/backend/services/embedding_service.py b/backend/services/embedding_service.py new file mode 100644 index 00000000..dd2c5f4b --- /dev/null +++ b/backend/services/embedding_service.py @@ -0,0 +1,23 @@ +# Text +import torch, numpy as np, open_clip +from PIL import Image + +device = "cuda" if torch.cuda.is_available() else "cpu" +model, preprocess, tokenizer = open_clip.create_model_and_transforms( + 'ViT-B-32', pretrained='laion2b_s34b_b79k', device=device +) +model.eval() + +def embed_text(texts: list[str]) -> np.ndarray: + with torch.no_grad(): + tok = tokenizer(texts) + feats = model.encode_text(tok.to(device)) + feats = feats / feats.norm(dim=-1, keepdim=True) + return feats.cpu().numpy().astype(np.float32) # e.g., (N, 512) + +def embed_image(png_path: str) -> np.ndarray: + img = preprocess(Image.open(png_path)).unsqueeze(0).to(device) + with torch.no_grad(): + feats = model.encode_image(img) + feats = feats / feats.norm(dim=-1, keepdim=True) + return feats.cpu().numpy().astype(np.float32) # (1, 512) diff --git a/backend/services/vector_search_service.py b/backend/services/vector_search_service.py new file mode 100644 index 00000000..e69de29b diff --git a/frontend/package-lock.json b/frontend/package-lock.json index f1b6a622..f64749d3 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -96,6 +96,7 @@ "version": "7.26.0", "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.26.0.tgz", "integrity": "sha512-i1SLeK+DzNnQ3LL/CswPCa/E5u4lh1k6IAEphON8F+cXt0t9euTshDru0q7/IqMa1PMPz5RnHuHscF8/ZJsStg==", + "peer": true, "dependencies": { "@ampproject/remapping": "^2.2.0", "@babel/code-frame": "^7.26.0", @@ -715,6 +716,7 @@ "version": "7.26.0", "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-flow/-/plugin-syntax-flow-7.26.0.tgz", "integrity": "sha512-B+O2DnPc0iG+YXFqOxv2WNuNU97ToWjOomUQ78DouOENWUaM5sVrmet9mcomUGQFwpJd//gvUagXBSdzO1fRKg==", + "peer": true, "dependencies": { "@babel/helper-plugin-utils": "^7.25.9" }, @@ -1524,6 +1526,7 @@ "version": "7.25.9", "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx/-/plugin-transform-react-jsx-7.25.9.tgz", "integrity": "sha512-s5XwpQYCqGerXl+Pu6VDL3x0j2d82eiV77UJ8a2mDHAW7j9SWRqQ2y1fNo1Z74CdcYipl5Z41zvjj4Nfzq36rw==", + "peer": true, "dependencies": { "@babel/helper-annotate-as-pure": "^7.25.9", "@babel/helper-module-imports": "^7.25.9", @@ -2327,6 +2330,7 @@ "resolved": "https://registry.npmjs.org/@emotion/react/-/react-11.13.5.tgz", "integrity": "sha512-6zeCUxUH+EPF1s+YF/2hPVODeV/7V07YU5x+2tfuRL8MdW6rv5vb2+CBEGTGwBdux0OIERcOS+RzxeK80k2DsQ==", "license": "MIT", + "peer": true, "dependencies": { "@babel/runtime": "^7.18.3", "@emotion/babel-plugin": "^11.13.5", @@ -2370,6 +2374,7 @@ "resolved": "https://registry.npmjs.org/@emotion/styled/-/styled-11.13.5.tgz", "integrity": "sha512-gnOQ+nGLPvDXgIx119JqGalys64lhMdnNQA9TMxhDA4K0Hq5+++OE20Zs5GxiCV9r814xQ2K5WmtofSpHVW6BQ==", "license": "MIT", + "peer": true, "dependencies": { "@babel/runtime": "^7.18.3", "@emotion/babel-plugin": "^11.13.5", @@ -3050,6 +3055,7 @@ "version": "6.4.7", "resolved": "https://registry.npmjs.org/@mui/material/-/material-6.4.7.tgz", "integrity": "sha512-K65StXUeGAtFJ4ikvHKtmDCO5Ab7g0FZUu2J5VpoKD+O6Y3CjLYzRi+TMlI3kaL4CL158+FccMoOd/eaddmeRQ==", + "peer": true, "dependencies": { "@babel/runtime": "^7.26.0", "@mui/core-downloads-tracker": "^6.4.7", @@ -3719,7 +3725,6 @@ "version": "10.4.0", "resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.0.tgz", "integrity": "sha512-pemlzrSESWbdAloYml3bAJMEfNh1Z7EduzqPKprCH5S341frlpYnUEW0H72dLxa6IsYr+mPno20GiSm+h9dEdQ==", - "peer": true, "dependencies": { "@babel/code-frame": "^7.10.4", "@babel/runtime": "^7.12.5", @@ -3738,7 +3743,6 @@ "version": "5.3.0", "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-5.3.0.tgz", "integrity": "sha512-b0P0sZPKtyu8HkeRAfCq0IfURZK+SuwMjY1UXGBU27wpAiTwQAIlq56IbIO+ytk/JjS1fMR14ee5WBBfKi5J6A==", - "peer": true, "dependencies": { "dequal": "^2.0.3" } @@ -4314,6 +4318,7 @@ "version": "18.3.12", "resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.12.tgz", "integrity": "sha512-D2wOSq/d6Agt28q7rSI3jhU7G6aiuzljDGZ2hTZHIkrTLUI+AF3WMeKkEZ9nN2fkBAlcktT6vcZjDFiIhMYEQw==", + "peer": true, "dependencies": { "@types/prop-types": "*", "csstype": "^3.0.2" @@ -4437,6 +4442,7 @@ "version": "5.62.0", "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-5.62.0.tgz", "integrity": "sha512-TiZzBSJja/LbhNPvk6yc0JrX9XqhQ0hdh6M2svYfsHGejaKFIAGd9MQ+ERIMzLGlN/kZoYIgdxFV0PuljTKXag==", + "peer": true, "dependencies": { "@eslint-community/regexpp": "^4.4.0", "@typescript-eslint/scope-manager": "5.62.0", @@ -4488,6 +4494,7 @@ "version": "5.62.0", "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-5.62.0.tgz", "integrity": "sha512-VlJEV0fOQ7BExOsHYAGrgbEiZoi8D+Bl2+f6V2RrXerRSylnp+ZBHmPvaIa8cz0Ajx7WO7Z5RqfgYg7ED1nRhA==", + "peer": true, "dependencies": { "@typescript-eslint/scope-manager": "5.62.0", "@typescript-eslint/types": "5.62.0", @@ -4827,6 +4834,7 @@ "version": "8.14.0", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.14.0.tgz", "integrity": "sha512-cl669nCJTZBsL97OF4kUQm5g5hC2uihk0NxY3WENAC0TYdILVkAyHymAntgxGkl7K+t0cXIrH5siy5S4XkFycA==", + "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -4905,6 +4913,7 @@ "version": "6.12.6", "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "peer": true, "dependencies": { "fast-deep-equal": "^3.1.1", "fast-json-stable-stringify": "^2.0.0", @@ -5713,6 +5722,7 @@ "url": "https://github.com/sponsors/ai" } ], + "peer": true, "dependencies": { "caniuse-lite": "^1.0.30001669", "electron-to-chromium": "^1.5.41", @@ -7057,6 +7067,7 @@ "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", "license": "ISC", + "peer": true, "engines": { "node": ">=12" } @@ -7988,6 +7999,7 @@ "resolved": "https://registry.npmjs.org/eslint/-/eslint-8.57.1.tgz", "integrity": "sha512-ypowyDxpVSYpkXr9WPv2PAZCtNip1Mv5KTW0SCurXv/9iOpcrH9PaqUElksqEB6pChqHGDRCFTyrZlGhnLNGiA==", "deprecated": "This version is no longer supported. Please see https://eslint.org/version-support for other options.", + "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.6.1", @@ -10816,6 +10828,7 @@ "version": "27.5.1", "resolved": "https://registry.npmjs.org/jest/-/jest-27.5.1.tgz", "integrity": "sha512-Yn0mADZB89zTtjkPJEXwrac3LHudkQMR+Paqa8uxJHCBr9agxztUifWCyiYrjhMPBoUVBjyny0I7XH6ozDr7QQ==", + "peer": true, "dependencies": { "@jest/core": "^27.5.1", "import-local": "^3.0.2", @@ -13861,6 +13874,7 @@ "url": "https://github.com/sponsors/ai" } ], + "peer": true, "dependencies": { "nanoid": "^3.3.7", "picocolors": "^1.1.0", @@ -14953,6 +14967,7 @@ "version": "6.1.2", "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.1.2.tgz", "integrity": "sha512-Q8qQfPiZ+THO/3ZrOrO0cJJKfpYCagtMUkXbnEfmgUjwXg6z/WBeOyS9APBBPCTSiDV+s4SwQGu8yFsiMRIudg==", + "peer": true, "dependencies": { "cssesc": "^3.0.0", "util-deprecate": "^1.0.2" @@ -15296,6 +15311,7 @@ "version": "18.3.1", "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz", "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==", + "peer": true, "dependencies": { "loose-envify": "^1.1.0" }, @@ -15458,6 +15474,7 @@ "version": "18.3.1", "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz", "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==", + "peer": true, "dependencies": { "loose-envify": "^1.1.0", "scheduler": "^0.23.2" @@ -15506,6 +15523,7 @@ "version": "0.11.0", "resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.11.0.tgz", "integrity": "sha512-F27qZr8uUqwhWZboondsPx8tnC3Ct3SxZA3V5WyEvujRyyNv0VYPhoBg1gZ8/MV5tubQp76Trw8lTv9hzRBa+A==", + "peer": true, "engines": { "node": ">=0.10.0" } @@ -16296,6 +16314,7 @@ "version": "2.79.2", "resolved": "https://registry.npmjs.org/rollup/-/rollup-2.79.2.tgz", "integrity": "sha512-fS6iqSPZDs3dr/y7Od6y5nha8dW1YnbgtsyotCVvoFGKbERG++CVRFv1meyGDE1SNItQA8BrnCw7ScdAhRJ3XQ==", + "peer": true, "bin": { "rollup": "dist/bin/rollup" }, @@ -16515,6 +16534,7 @@ "version": "8.17.1", "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", + "peer": true, "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", @@ -18058,6 +18078,7 @@ "version": "0.21.3", "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.21.3.tgz", "integrity": "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==", + "peer": true, "engines": { "node": ">=10" }, @@ -18597,6 +18618,7 @@ "version": "5.96.1", "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.96.1.tgz", "integrity": "sha512-l2LlBSvVZGhL4ZrPwyr8+37AunkcYj5qh8o6u2/2rzoPc8gxFJkLj1WxNgooi9pnoc06jh0BjuXnamM4qlujZA==", + "peer": true, "dependencies": { "@types/eslint-scope": "^3.7.7", "@types/estree": "^1.0.6", @@ -18664,6 +18686,7 @@ "version": "4.15.2", "resolved": "https://registry.npmjs.org/webpack-dev-server/-/webpack-dev-server-4.15.2.tgz", "integrity": "sha512-0XavAZbNJ5sDrCbkpWL8mia0o5WPOd2YGtxrEiZkBK9FjLppIUK2TgxK6qGD2P3hUXTJNNPVibrerKcx5WkR1g==", + "peer": true, "dependencies": { "@types/bonjour": "^3.5.9", "@types/connect-history-api-fallback": "^1.3.5", @@ -19062,6 +19085,7 @@ "version": "8.17.1", "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", + "peer": true, "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", @@ -19477,6 +19501,7 @@ "version": "7.26.0", "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.26.0.tgz", "integrity": "sha512-i1SLeK+DzNnQ3LL/CswPCa/E5u4lh1k6IAEphON8F+cXt0t9euTshDru0q7/IqMa1PMPz5RnHuHscF8/ZJsStg==", + "peer": true, "requires": { "@ampproject/remapping": "^2.2.0", "@babel/code-frame": "^7.26.0", @@ -19893,6 +19918,7 @@ "version": "7.26.0", "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-flow/-/plugin-syntax-flow-7.26.0.tgz", "integrity": "sha512-B+O2DnPc0iG+YXFqOxv2WNuNU97ToWjOomUQ78DouOENWUaM5sVrmet9mcomUGQFwpJd//gvUagXBSdzO1fRKg==", + "peer": true, "requires": { "@babel/helper-plugin-utils": "^7.25.9" } @@ -20384,6 +20410,7 @@ "version": "7.25.9", "resolved": "https://registry.npmjs.org/@babel/plugin-transform-react-jsx/-/plugin-transform-react-jsx-7.25.9.tgz", "integrity": "sha512-s5XwpQYCqGerXl+Pu6VDL3x0j2d82eiV77UJ8a2mDHAW7j9SWRqQ2y1fNo1Z74CdcYipl5Z41zvjj4Nfzq36rw==", + "peer": true, "requires": { "@babel/helper-annotate-as-pure": "^7.25.9", "@babel/helper-module-imports": "^7.25.9", @@ -20897,6 +20924,7 @@ "version": "11.13.5", "resolved": "https://registry.npmjs.org/@emotion/react/-/react-11.13.5.tgz", "integrity": "sha512-6zeCUxUH+EPF1s+YF/2hPVODeV/7V07YU5x+2tfuRL8MdW6rv5vb2+CBEGTGwBdux0OIERcOS+RzxeK80k2DsQ==", + "peer": true, "requires": { "@babel/runtime": "^7.18.3", "@emotion/babel-plugin": "^11.13.5", @@ -20929,6 +20957,7 @@ "version": "11.13.5", "resolved": "https://registry.npmjs.org/@emotion/styled/-/styled-11.13.5.tgz", "integrity": "sha512-gnOQ+nGLPvDXgIx119JqGalys64lhMdnNQA9TMxhDA4K0Hq5+++OE20Zs5GxiCV9r814xQ2K5WmtofSpHVW6BQ==", + "peer": true, "requires": { "@babel/runtime": "^7.18.3", "@emotion/babel-plugin": "^11.13.5", @@ -21418,6 +21447,7 @@ "version": "6.4.7", "resolved": "https://registry.npmjs.org/@mui/material/-/material-6.4.7.tgz", "integrity": "sha512-K65StXUeGAtFJ4ikvHKtmDCO5Ab7g0FZUu2J5VpoKD+O6Y3CjLYzRi+TMlI3kaL4CL158+FccMoOd/eaddmeRQ==", + "peer": true, "requires": { "@babel/runtime": "^7.26.0", "@mui/core-downloads-tracker": "^6.4.7", @@ -21793,7 +21823,6 @@ "version": "10.4.0", "resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.0.tgz", "integrity": "sha512-pemlzrSESWbdAloYml3bAJMEfNh1Z7EduzqPKprCH5S341frlpYnUEW0H72dLxa6IsYr+mPno20GiSm+h9dEdQ==", - "peer": true, "requires": { "@babel/code-frame": "^7.10.4", "@babel/runtime": "^7.12.5", @@ -21809,7 +21838,6 @@ "version": "5.3.0", "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-5.3.0.tgz", "integrity": "sha512-b0P0sZPKtyu8HkeRAfCq0IfURZK+SuwMjY1UXGBU27wpAiTwQAIlq56IbIO+ytk/JjS1fMR14ee5WBBfKi5J6A==", - "peer": true, "requires": { "dequal": "^2.0.3" } @@ -22322,6 +22350,7 @@ "version": "18.3.12", "resolved": "https://registry.npmjs.org/@types/react/-/react-18.3.12.tgz", "integrity": "sha512-D2wOSq/d6Agt28q7rSI3jhU7G6aiuzljDGZ2hTZHIkrTLUI+AF3WMeKkEZ9nN2fkBAlcktT6vcZjDFiIhMYEQw==", + "peer": true, "requires": { "@types/prop-types": "*", "csstype": "^3.0.2" @@ -22442,6 +22471,7 @@ "version": "5.62.0", "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-5.62.0.tgz", "integrity": "sha512-TiZzBSJja/LbhNPvk6yc0JrX9XqhQ0hdh6M2svYfsHGejaKFIAGd9MQ+ERIMzLGlN/kZoYIgdxFV0PuljTKXag==", + "peer": true, "requires": { "@eslint-community/regexpp": "^4.4.0", "@typescript-eslint/scope-manager": "5.62.0", @@ -22467,6 +22497,7 @@ "version": "5.62.0", "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-5.62.0.tgz", "integrity": "sha512-VlJEV0fOQ7BExOsHYAGrgbEiZoi8D+Bl2+f6V2RrXerRSylnp+ZBHmPvaIa8cz0Ajx7WO7Z5RqfgYg7ED1nRhA==", + "peer": true, "requires": { "@typescript-eslint/scope-manager": "5.62.0", "@typescript-eslint/types": "5.62.0", @@ -22723,7 +22754,8 @@ "acorn": { "version": "8.14.0", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.14.0.tgz", - "integrity": "sha512-cl669nCJTZBsL97OF4kUQm5g5hC2uihk0NxY3WENAC0TYdILVkAyHymAntgxGkl7K+t0cXIrH5siy5S4XkFycA==" + "integrity": "sha512-cl669nCJTZBsL97OF4kUQm5g5hC2uihk0NxY3WENAC0TYdILVkAyHymAntgxGkl7K+t0cXIrH5siy5S4XkFycA==", + "peer": true }, "acorn-globals": { "version": "6.0.0", @@ -22778,6 +22810,7 @@ "version": "6.12.6", "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "peer": true, "requires": { "fast-deep-equal": "^3.1.1", "fast-json-stable-stringify": "^2.0.0", @@ -23356,6 +23389,7 @@ "version": "4.24.2", "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.24.2.tgz", "integrity": "sha512-ZIc+Q62revdMcqC6aChtW4jz3My3klmCO1fEmINZY/8J3EpBg5/A/D0AKmBveUh6pgoeycoMkVMko84tuYS+Gg==", + "peer": true, "requires": { "caniuse-lite": "^1.0.30001669", "electron-to-chromium": "^1.5.41", @@ -24267,7 +24301,8 @@ "d3-selection": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", - "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==" + "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", + "peer": true }, "d3-shape": { "version": "3.2.0", @@ -24950,6 +24985,7 @@ "version": "8.57.1", "resolved": "https://registry.npmjs.org/eslint/-/eslint-8.57.1.tgz", "integrity": "sha512-ypowyDxpVSYpkXr9WPv2PAZCtNip1Mv5KTW0SCurXv/9iOpcrH9PaqUElksqEB6pChqHGDRCFTyrZlGhnLNGiA==", + "peer": true, "requires": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.6.1", @@ -26905,6 +26941,7 @@ "version": "27.5.1", "resolved": "https://registry.npmjs.org/jest/-/jest-27.5.1.tgz", "integrity": "sha512-Yn0mADZB89zTtjkPJEXwrac3LHudkQMR+Paqa8uxJHCBr9agxztUifWCyiYrjhMPBoUVBjyny0I7XH6ozDr7QQ==", + "peer": true, "requires": { "@jest/core": "^27.5.1", "import-local": "^3.0.2", @@ -29020,6 +29057,7 @@ "version": "8.4.47", "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.47.tgz", "integrity": "sha512-56rxCq7G/XfB4EkXq9Egn5GCqugWvDFjafDOThIdMBsI15iqPqR5r15TfSr1YPYeEI19YeaXMCbY6u88Y76GLQ==", + "peer": true, "requires": { "nanoid": "^3.3.7", "picocolors": "^1.1.0", @@ -29615,6 +29653,7 @@ "version": "6.1.2", "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.1.2.tgz", "integrity": "sha512-Q8qQfPiZ+THO/3ZrOrO0cJJKfpYCagtMUkXbnEfmgUjwXg6z/WBeOyS9APBBPCTSiDV+s4SwQGu8yFsiMRIudg==", + "peer": true, "requires": { "cssesc": "^3.0.0", "util-deprecate": "^1.0.2" @@ -29865,6 +29904,7 @@ "version": "18.3.1", "resolved": "https://registry.npmjs.org/react/-/react-18.3.1.tgz", "integrity": "sha512-wS+hAgJShR0KhEvPJArfuPVN1+Hz1t0Y6n5jLrGQbkb4urgPE/0Rve+1kMB1v/oWgHgm4WIcV+i7F2pTVj+2iQ==", + "peer": true, "requires": { "loose-envify": "^1.1.0" } @@ -29987,6 +30027,7 @@ "version": "18.3.1", "resolved": "https://registry.npmjs.org/react-dom/-/react-dom-18.3.1.tgz", "integrity": "sha512-5m4nQKp+rZRb09LNH59GM4BxTh9251/ylbKIbpe7TpGxfJ+9kv6BLkLBXIjjspbgbnIBNqlI23tRnTWT0snUIw==", + "peer": true, "requires": { "loose-envify": "^1.1.0", "scheduler": "^0.23.2" @@ -30022,7 +30063,8 @@ "react-refresh": { "version": "0.11.0", "resolved": "https://registry.npmjs.org/react-refresh/-/react-refresh-0.11.0.tgz", - "integrity": "sha512-F27qZr8uUqwhWZboondsPx8tnC3Ct3SxZA3V5WyEvujRyyNv0VYPhoBg1gZ8/MV5tubQp76Trw8lTv9hzRBa+A==" + "integrity": "sha512-F27qZr8uUqwhWZboondsPx8tnC3Ct3SxZA3V5WyEvujRyyNv0VYPhoBg1gZ8/MV5tubQp76Trw8lTv9hzRBa+A==", + "peer": true }, "react-router": { "version": "7.0.2", @@ -30565,6 +30607,7 @@ "version": "2.79.2", "resolved": "https://registry.npmjs.org/rollup/-/rollup-2.79.2.tgz", "integrity": "sha512-fS6iqSPZDs3dr/y7Od6y5nha8dW1YnbgtsyotCVvoFGKbERG++CVRFv1meyGDE1SNItQA8BrnCw7ScdAhRJ3XQ==", + "peer": true, "requires": { "fsevents": "~2.3.2" } @@ -30694,6 +30737,7 @@ "version": "8.17.1", "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", + "peer": true, "requires": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", @@ -31869,7 +31913,8 @@ "type-fest": { "version": "0.21.3", "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.21.3.tgz", - "integrity": "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==" + "integrity": "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==", + "peer": true }, "type-is": { "version": "1.6.18", @@ -32245,6 +32290,7 @@ "version": "5.96.1", "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.96.1.tgz", "integrity": "sha512-l2LlBSvVZGhL4ZrPwyr8+37AunkcYj5qh8o6u2/2rzoPc8gxFJkLj1WxNgooi9pnoc06jh0BjuXnamM4qlujZA==", + "peer": true, "requires": { "@types/eslint-scope": "^3.7.7", "@types/estree": "^1.0.6", @@ -32313,6 +32359,7 @@ "version": "4.15.2", "resolved": "https://registry.npmjs.org/webpack-dev-server/-/webpack-dev-server-4.15.2.tgz", "integrity": "sha512-0XavAZbNJ5sDrCbkpWL8mia0o5WPOd2YGtxrEiZkBK9FjLppIUK2TgxK6qGD2P3hUXTJNNPVibrerKcx5WkR1g==", + "peer": true, "requires": { "@types/bonjour": "^3.5.9", "@types/connect-history-api-fallback": "^1.3.5", @@ -32579,6 +32626,7 @@ "version": "8.17.1", "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", + "peer": true, "requires": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", From 6341f556d173a29574735bee3c3a6697b0e25785 Mon Sep 17 00:00:00 2001 From: Steven Trujillo Date: Tue, 11 Nov 2025 20:06:47 -0800 Subject: [PATCH 3/5] Implement Qdrant vector search service and semantic search algorithms - Added vector_search_service.py with Qdrant integration * store_canvas_embedding() for storing 512-dim CLIP embeddings * search_by_embedding() for similarity search * batch operations and collection management - Updated search_algorithms.py to use real embeddings * Replaced random score stubs with CLIP-based semantic search * text_search() now uses embed_text() + vector similarity * image_search() now uses embed_image() + vector similarity * Graceful fallback to random if AI services unavailable - Added Qdrant configuration to config.py - Added dependencies: qdrant-client, torch, open_clip_torch, pillow - Added Qdrant service to docker-compose.yml - Created setup guides for local development Ready for testing - NOT pushed to remote yet --- backend/NEXT_STEPS.md | 361 ++++++++++++++++++++++ backend/SETUP_VECTOR_SEARCH.md | 276 +++++++++++++++++ backend/config.py | 7 + backend/docker-compose.yml | 26 ++ backend/requirements.txt | 7 + backend/services/search_algorithms.py | 216 ++++++++++--- backend/services/vector_search_service.py | 295 ++++++++++++++++++ 7 files changed, 1148 insertions(+), 40 deletions(-) create mode 100644 backend/NEXT_STEPS.md create mode 100644 backend/SETUP_VECTOR_SEARCH.md diff --git a/backend/NEXT_STEPS.md b/backend/NEXT_STEPS.md new file mode 100644 index 00000000..d66d3040 --- /dev/null +++ b/backend/NEXT_STEPS.md @@ -0,0 +1,361 @@ +# Next Steps: Complete AI Search Implementation + +## What's Done ✅ + +1. **Core Infrastructure** ✅ + - Qdrant vector database setup + - `vector_search_service.py` - Complete CRUD operations + - `search_algorithms.py` - Real semantic search (no more stubs) + - Docker Compose configuration + - All dependencies added + +2. **Integration** ✅ + - Jay's API endpoint (`search_ai.py`) → Your search logic + - Jay's frontend UI → Backend API + - Your embedding service → Vector search service + +--- + +## What's Missing ❌ + +### Priority 1: Canvas Snapshot Generation (Required for Image Embeddings) + +**Problem:** To generate embeddings for canvas content, you need to convert stroke data to images. + +**Options:** + +#### Option A: Text-Based Embeddings (Simplest - Start Here) +Use canvas name + description for embeddings instead of visual content. + +```python +# backend/scripts/populate_embeddings.py +from services.db import rooms_coll +from services.embedding_service import embed_text +from services.vector_search_service import store_canvas_embedding + +def populate_text_embeddings(): + """Generate embeddings from room metadata (name + description).""" + rooms = rooms_coll.find({"archived": {"$ne": True}}) + + for room in rooms: + room_id = str(room['_id']) + name = room.get('name', '') + desc = room.get('description', '') + + # Combine name and description for richer embedding + text = f"{name}. {desc}" if desc else name + + if text.strip(): + embedding = embed_text([text]) + + store_canvas_embedding( + room_id=room_id, + embedding=embedding, + metadata={ + 'name': name, + 'description': desc, + 'type': room.get('type'), + 'ownerName': room.get('ownerName') + } + ) + print(f"✓ Stored embedding for {room_id}: {name}") + +if __name__ == "__main__": + populate_text_embeddings() +``` + +**Pros:** Works immediately, no canvas rendering needed +**Cons:** Doesn't capture visual content +**Use case:** "Find rooms about trees" works, "Find rooms similar to this sketch" won't + +#### Option B: Server-Side Canvas Rendering (Better, More Complex) + +Render strokes to PNG using Pillow: + +```python +# backend/services/canvas_renderer.py +from PIL import Image, ImageDraw +from services.db import strokes_coll + +def render_canvas_to_image(room_id: str, width=800, height=600) -> str: + """Render canvas strokes to PNG file, return path.""" + # Fetch strokes + strokes = list(strokes_coll.find({"roomId": room_id}).sort("ts", 1)) + + # Create image + img = Image.new('RGB', (width, height), 'white') + draw = ImageDraw.Draw(img) + + for stroke in strokes: + points = stroke.get('points', []) + color = stroke.get('color', '#000000') + width = stroke.get('width', 2) + + # Draw lines between points + for i in range(len(points) - 1): + x1, y1 = points[i]['x'], points[i]['y'] + x2, y2 = points[i+1]['x'], points[i+1]['y'] + draw.line([(x1, y1), (x2, y2)], fill=color, width=int(width)) + + # Save to temp file + path = f"/tmp/canvas_{room_id}.png" + img.save(path) + return path +``` + +**Pros:** Captures visual content +**Cons:** Need to understand stroke data format, coordinate systems +**Recommendation:** Start with Option A, add this later + +#### Option C: Frontend Thumbnail Export (Hybrid Approach) + +Let the frontend generate thumbnails and upload them: + +1. Add endpoint: `POST /api/v1/rooms/{room_id}/snapshot` +2. Frontend captures canvas as base64 PNG +3. Backend generates embedding and stores it + +**Pros:** Frontend already knows how to render +**Cons:** Requires frontend changes, manual trigger + +--- + +### Priority 2: Background Embedding Worker + +**Current State:** Embeddings are NOT auto-generated on canvas create/update + +**Solution:** Create a periodic batch processor (simplest approach) + +```python +# backend/workers/embedding_worker.py +import time +import logging +from services.db import rooms_coll +from services.embedding_service import embed_text +from services.vector_search_service import store_canvas_embedding, get_collection_stats + +logger = logging.getLogger(__name__) + +def sync_embeddings_batch(): + """ + Sync embeddings for all canvases that don't have them yet. + Run this periodically (e.g., every 5 minutes). + """ + # Get all room IDs in Qdrant + stats = get_collection_stats() + existing_count = stats.get('points_count', 0) + + # Get all rooms from MongoDB + rooms = list(rooms_coll.find({"archived": {"$ne": True}})) + total_rooms = len(rooms) + + logger.info(f"Found {total_rooms} rooms, {existing_count} embeddings exist") + + new_embeddings = 0 + for room in rooms: + room_id = str(room['_id']) + + # Simple approach: Always regenerate (or add logic to check if exists) + name = room.get('name', '') + desc = room.get('description', '') + text = f"{name}. {desc}" if desc else name + + if text.strip(): + embedding = embed_text([text]) + success = store_canvas_embedding( + room_id=room_id, + embedding=embedding, + metadata={ + 'name': name, + 'description': desc, + 'type': room.get('type'), + 'ownerName': room.get('ownerName') + } + ) + if success: + new_embeddings += 1 + + logger.info(f"Synced {new_embeddings} new embeddings") + return new_embeddings + +def run_worker(interval_seconds=300): + """Run worker in loop.""" + logger.info(f"Starting embedding worker (interval={interval_seconds}s)") + while True: + try: + sync_embeddings_batch() + except Exception as e: + logger.exception(f"Worker error: {e}") + + time.sleep(interval_seconds) + +if __name__ == "__main__": + run_worker() +``` + +**How to run:** +```bash +# In separate terminal +python backend/workers/embedding_worker.py + +# Or add to supervisor/systemd/docker-compose +``` + +**Alternative (Production):** Use Celery for more robust job scheduling + +--- + +### Priority 3: Hook into Canvas Updates + +Trigger embedding regeneration when canvases change: + +```python +# In backend/routes/rooms.py (after canvas update) + +from services.embedding_service import embed_text +from services.vector_search_service import store_canvas_embedding + +@rooms_bp.route('/api/v1/rooms/', methods=['PATCH']) +@require_auth +def update_room(room_id): + # ... existing update logic ... + + # After successful update, regenerate embedding + try: + name = updated_room.get('name', '') + desc = updated_room.get('description', '') + text = f"{name}. {desc}" if desc else name + + if text.strip(): + embedding = embed_text([text]) + store_canvas_embedding( + room_id=room_id, + embedding=embedding, + metadata={ + 'name': name, + 'description': desc, + 'type': updated_room.get('type'), + 'ownerName': updated_room.get('ownerName') + } + ) + except Exception as e: + logger.warning(f"Failed to update embedding for {room_id}: {e}") + + return jsonify(updated_room) +``` + +--- + +### Priority 4: Database Indexes (Performance) + +Add indexes for faster queries: + +```python +# In backend/services/db.py (add to existing indexes) + +# For search filtering +rooms_coll.create_index([("type", 1), ("archived", 1)]) +rooms_coll.create_index([("ownerId", 1), ("archived", 1)]) +``` + +--- + +## Recommended Implementation Order + +### Week 1: Get It Working +1. ✅ Setup Qdrant (Done!) +2. ✅ Implement vector_search_service.py (Done!) +3. ✅ Update search_algorithms.py (Done!) +4. ⏳ **Create `populate_embeddings.py` script** (Option A - Text-based) +5. ⏳ **Test search from UI** + +### Week 2: Automate +6. ⏳ Create `embedding_worker.py` (periodic batch sync) +7. ⏳ Add hooks to `rooms.py` for real-time updates +8. ⏳ Add canvas deletion → embedding cleanup + +### Week 3: Visual Search +9. ⏳ Implement canvas rendering (Option B or C) +10. ⏳ Update embeddings to use visual content +11. ⏳ Test image-based search + +### Week 4: Polish +12. ⏳ Add monitoring/logging +13. ⏳ Performance tuning +14. ⏳ Error handling improvements + +--- + +## Quick Test Script + +Save as `backend/scripts/test_vector_search.py`: + +```python +#!/usr/bin/env python3 +"""Quick test script for vector search functionality.""" + +from services.embedding_service import embed_text, embed_image +from services.vector_search_service import ( + store_canvas_embedding, + search_by_embedding, + get_collection_stats +) +import numpy as np + +def test_basic_flow(): + print("🧪 Testing Vector Search...") + + # 1. Store test embeddings + test_data = [ + ("room1", "A beautiful landscape with mountains and trees"), + ("room2", "Abstract geometric shapes in bright colors"), + ("room3", "Portrait of a person with blue eyes"), + ("room4", "Forest scene with tall pine trees"), + ] + + print("\n📝 Storing test embeddings...") + for room_id, description in test_data: + emb = embed_text([description]) + store_canvas_embedding(room_id, emb, {"description": description}) + print(f" ✓ {room_id}: {description[:50]}...") + + # 2. Check stats + print("\n📊 Collection stats:") + stats = get_collection_stats() + print(f" Points: {stats.get('points_count')}") + print(f" Dimension: {stats.get('config', {}).get('dimension')}") + + # 3. Search + print("\n🔍 Searching for 'trees'...") + query_emb = embed_text(["trees"]) + results = search_by_embedding(query_emb, top_k=5) + + print(f"\n Found {len(results)} results:") + for i, r in enumerate(results, 1): + print(f" {i}. {r['room_id']} (score: {r['score']:.3f})") + print(f" {r.get('description', '')[:60]}...") + + print("\n✅ Test complete!") + +if __name__ == "__main__": + test_basic_flow() +``` + +Run with: +```bash +cd backend +python scripts/test_vector_search.py +``` + +--- + +## Summary + +**You have:** Complete Qdrant integration, working vector search, connected UI +**You need:** Populate embeddings (start with text-based), then add automation + +**Fastest path to demo:** +1. Run the test script above +2. Create `populate_embeddings.py` for real rooms +3. Test search in the UI +4. Show Jay it works! 🎉 diff --git a/backend/SETUP_VECTOR_SEARCH.md b/backend/SETUP_VECTOR_SEARCH.md new file mode 100644 index 00000000..e65aeb7c --- /dev/null +++ b/backend/SETUP_VECTOR_SEARCH.md @@ -0,0 +1,276 @@ +# Vector Search Setup Guide + +## ✅ Completed Integration + +The vector search functionality has been integrated with Jay's search UI. Here's what was implemented: + +### Files Modified/Created: +1. ✅ `backend/requirements.txt` - Added AI/ML dependencies +2. ✅ `backend/config.py` - Added Qdrant configuration +3. ✅ `backend/services/vector_search_service.py` - **Implemented complete Qdrant integration** +4. ✅ `backend/services/search_algorithms.py` - **Replaced stubs with real semantic search** +5. ✅ `backend/docker-compose.yml` - Added Qdrant service + +### Your Files (Already Complete): +- ✅ `backend/services/embedding_service.py` - CLIP embeddings (text + image) + +### Jay's Files (Already Complete): +- ✅ `backend/routes/search_ai.py` - API endpoint +- ✅ `frontend/src/components/Search/AISearchPanel.jsx` - Search UI +- ✅ `frontend/src/components/Search/VisualSearchUpload.jsx` - Image upload UI +- ✅ `frontend/src/pages/Dashboard.jsx` - Integrated search panel + +--- + +## 🚀 Quick Start + +### Option 1: Using Docker Compose (Recommended) + +```bash +cd backend + +# Start Qdrant and Redis +docker-compose up -d qdrant redis + +# Verify Qdrant is running +curl http://localhost:6333/healthz +# Should return: {"title":"healthz","version":"1.x.x"} + +# Install Python dependencies (if not already done) +pip install -r requirements.txt + +# Run backend +python app.py +``` + +### Option 2: Local Qdrant Installation + +```bash +# macOS with Homebrew +brew install qdrant + +# Or using Docker standalone +docker run -p 6333:6333 -p 6334:6334 \ + -v $(pwd)/qdrant_storage:/qdrant/storage \ + qdrant/qdrant + +# Install Python dependencies +cd backend +pip install -r requirements.txt + +# Run backend +python app.py +``` + +--- + +## 🔧 Configuration + +The following environment variables can be set in `.env`: + +```bash +# Qdrant Vector Database +QDRANT_HOST=localhost +QDRANT_PORT=6333 +QDRANT_GRPC_PORT=6334 +QDRANT_COLLECTION_NAME=rescanvas_embeddings + +# These are already in your config: +# REDIS_HOST=localhost +# REDIS_PORT=6379 +# MONGO_ATLAS_URI=... +``` + +--- + +## 📊 How It Works + +### 1. **Search Flow (Already Wired Up):** +``` +User enters query → AISearchPanel.jsx + ↓ +POST /api/v1/search/ai → search_ai.py + ↓ +Filters rooms by visibility → search_algorithms.py + ↓ +text_search() or image_search() + ↓ +embedding_service.embed_text() or .embed_image() + ↓ +vector_search_service.search_by_embedding() + ↓ +Qdrant returns similar canvases + ↓ +Results ranked by similarity score + ↓ +UI displays ranked results +``` + +### 2. **Vector Search Service Functions:** + +```python +# Store canvas embedding +vector_search_service.store_canvas_embedding( + room_id="123abc", + embedding=np.array([...512 dims...]), + metadata={"name": "My Canvas", "type": "public", "owner": "user123"} +) + +# Search by text +query_emb = embedding_service.embed_text(["rooms with trees"]) +results = vector_search_service.search_by_embedding(query_emb, top_k=50) + +# Search by image +img_emb = embedding_service.embed_image("path/to/image.png") +results = vector_search_service.search_by_embedding(img_emb, top_k=50) + +# Delete embedding +vector_search_service.delete_canvas_embedding(room_id="123abc") +``` + +--- + +## 🧪 Testing the Integration + +### 1. Start Services: +```bash +# Terminal 1: Start Qdrant +cd backend +docker-compose up qdrant + +# Terminal 2: Start Backend +python app.py +``` + +### 2. Test Qdrant Connection: +```bash +# Check collection stats +curl http://localhost:6333/collections + +# Should show empty collection or newly created one +``` + +### 3. Test Search API (currently will work but return empty results): +```bash +# Text search +curl -X POST http://localhost:10010/api/v1/search/ai \ + -H "Content-Type: application/json" \ + -d '{"q": "rooms with trees"}' + +# Should return: {"status": "ok", "results": []} +# (Empty because no embeddings stored yet) +``` + +### 4. Test from Frontend: +1. Open Dashboard (http://localhost:3000) +2. Find the "AI Search" panel +3. Type a query: "rooms with trees" +4. Click "Search" +5. Should see results (or empty if no embeddings yet) + +--- + +## ⚠️ What's Still Missing + +### 1. **Canvas Snapshot Generation** (Not Yet Implemented) +To search by canvas content, you need to: +- Convert canvas strokes to images +- Generate embeddings for those images +- Store them in Qdrant + +**Simple approach for testing:** +```python +# Create a manual embedding for a test canvas +from services.embedding_service import embed_text +from services.vector_search_service import store_canvas_embedding + +# Text-based embedding (easiest for testing) +room_id = "YOUR_ROOM_ID_HERE" +description = "A beautiful landscape with trees and mountains" +embedding = embed_text([description]) + +store_canvas_embedding( + room_id=room_id, + embedding=embedding, + metadata={ + "name": "Test Canvas", + "description": description, + "type": "public", + "ownerName": "testuser" + } +) +``` + +### 2. **Background Worker** (`embedding_worker.py`) - Not Yet Implemented +For automatic embedding generation, you'll need: +- Worker to listen for canvas create/update events +- Canvas-to-image rendering logic +- Batch processing for existing canvases + +**Manual/Periodic approach (simplest):** +- Create a script to batch-process canvases +- Run it manually or via cron job +- See `NEXT_STEPS.md` for implementation details + +--- + +## 🔍 Debugging + +### Check if Qdrant is running: +```bash +curl http://localhost:6333/healthz +``` + +### Check collection stats: +```bash +curl http://localhost:6333/collections/rescanvas_embeddings +``` + +### View logs: +```bash +# Backend logs will show: +# "Connected to Qdrant at localhost:6333" +# "Collection rescanvas_embeddings created successfully" +# "Vector search returned X results (top_k=50)" +``` + +### Common issues: +1. **"Connection refused"** - Qdrant not running + - Solution: `docker-compose up qdrant` + +2. **"Import could not be resolved"** - Dependencies not installed + - Solution: `pip install -r requirements.txt` + +3. **"No module named 'torch'"** - PyTorch not installed + - Solution: `pip install torch` (may take time, ~2GB download) + +4. **Empty search results** - No embeddings stored yet + - Solution: Manually store test embeddings (see above) + +--- + +## 📈 Next Steps (Future Enhancements) + +See `NEXT_STEPS.md` for: +- Implementing `embedding_worker.py` +- Canvas-to-image rendering +- Batch processing existing canvases +- Performance optimization +- Monitoring and observability + +--- + +## 🎉 What's Working Now + +✅ Qdrant integration complete +✅ Vector search service implemented +✅ Search algorithms use real embeddings +✅ API endpoint ready +✅ Frontend UI connected +✅ Docker setup included + +**Ready to test!** Just need to: +1. Start Qdrant +2. Install dependencies +3. Populate some test embeddings +4. Try searching from the UI diff --git a/backend/config.py b/backend/config.py index 0655cd6e..e2da25cc 100644 --- a/backend/config.py +++ b/backend/config.py @@ -44,6 +44,13 @@ REDIS_HOST = os.getenv("REDIS_HOST", "localhost") REDIS_PORT = int(os.getenv("REDIS_PORT", "6379")) +# Qdrant Vector Database Configuration +QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost") +QDRANT_PORT = int(os.getenv("QDRANT_PORT", "6333")) +QDRANT_COLLECTION_NAME = os.getenv("QDRANT_COLLECTION_NAME", "rescanvas_embeddings") +EMBEDDING_DIMENSION = 512 # OpenCLIP ViT-B-32 output dimension +QDRANT_GRPC_PORT = int(os.getenv("QDRANT_GRPC_PORT", "6334")) + # Rate Limiting Configuration RATE_LIMIT_STORAGE_URI = f"redis://{REDIS_HOST}:{REDIS_PORT}" RATE_LIMIT_ENABLED = os.getenv("RATE_LIMIT_ENABLED", "True") == "True" diff --git a/backend/docker-compose.yml b/backend/docker-compose.yml index 4c52833f..26374261 100644 --- a/backend/docker-compose.yml +++ b/backend/docker-compose.yml @@ -18,6 +18,25 @@ services: networks: - rescanvas-network + qdrant: + image: qdrant/qdrant:latest + container_name: rescanvas-qdrant + ports: + - "6333:6333" # HTTP API + - "6334:6334" # gRPC API + volumes: + - qdrant_data:/qdrant/storage + environment: + - QDRANT__SERVICE__GRPC_PORT=6334 + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:6333/healthz"] + interval: 10s + timeout: 5s + retries: 5 + networks: + - rescanvas-network + backend: build: context: . @@ -34,6 +53,9 @@ services: - RATE_LIMIT_STORAGE_URI=redis://redis:6379 - REDIS_HOST=redis - REDIS_PORT=6379 + - QDRANT_HOST=qdrant + - QDRANT_PORT=6333 + - QDRANT_GRPC_PORT=6334 - JWT_SECRET=${JWT_SECRET:-dev-insecure-change-me} - OPENAI_API_KEY=${OPENAI_API_KEY} - ANALYTICS_ENABLED=${ANALYTICS_ENABLED:-True} @@ -42,6 +64,8 @@ services: depends_on: redis: condition: service_healthy + qdrant: + condition: service_healthy restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:10010/api/analytics/health"] @@ -55,6 +79,8 @@ services: volumes: redis_data: driver: local + qdrant_data: + driver: local networks: rescanvas-network: diff --git a/backend/requirements.txt b/backend/requirements.txt index 11e7f4e7..ff1069ac 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -64,3 +64,10 @@ websockets==10.4 Werkzeug==3.1.3 wrapt==2.0.0 wsproto==1.2.0 + +# AI/ML dependencies for semantic search +torch>=2.0.0 +open_clip_torch>=2.20.0 +pillow>=10.0.0 +qdrant-client>=1.7.0 +numpy>=1.24.0 diff --git a/backend/services/search_algorithms.py b/backend/services/search_algorithms.py index acce7d43..4d87664f 100644 --- a/backend/services/search_algorithms.py +++ b/backend/services/search_algorithms.py @@ -1,58 +1,194 @@ -"""Simple search algorithm stubs for ResCanvas. +"""Search algorithm implementations for ResCanvas. -These are intentionally naive implementations used by the website prototype. -They accept a list of room dicts and return them with a uniform score so the -frontend can exercise the UI without any model or vector DB dependencies. +Provides text and image-based semantic search using CLIP embeddings and Qdrant vector search. """ from typing import List, Dict, Any -import random +import logging +import base64 +import io +from PIL import Image +import tempfile +import os + +logger = logging.getLogger(__name__) DEFAULT_TOP_N = 50 +# Lazy imports to avoid startup failures if dependencies aren't installed +_embedding_service = None +_vector_search_service = None + + +def _get_services(): + """Lazy load embedding and vector search services.""" + global _embedding_service, _vector_search_service + + if _embedding_service is None or _vector_search_service is None: + try: + from services import embedding_service, vector_search_service + _embedding_service = embedding_service + _vector_search_service = vector_search_service + logger.info("Loaded embedding and vector search services") + except Exception as e: + logger.error(f"Failed to load AI services: {e}") + raise + + return _embedding_service, _vector_search_service def text_search(query: str, rooms: List[Dict[str, Any]], top_n: int = DEFAULT_TOP_N, seed: int | None = None) -> List[Dict[str, Any]]: - """Prototype text search: random scores + ranking.""" - rng = random.Random(seed) if seed is not None else random - scored = [{**r, "score": rng.random()} for r in rooms] - scored.sort(key=lambda x: x["score"], reverse=True) - return scored[:top_n] + """ + Semantic text search using CLIP embeddings. + + Args: + query: Natural language search query (e.g., "rooms with trees") + rooms: List of candidate room dicts (pre-filtered by visibility) + top_n: Maximum number of results to return + seed: Unused (kept for backward compatibility) + + Returns: + List of rooms ranked by semantic similarity with score field added + """ + try: + embed_svc, vector_svc = _get_services() + + # Generate embedding for the query text + query_embedding = embed_svc.embed_text([query]) # Returns (1, 512) + + if query_embedding is None or query_embedding.size == 0: + logger.warning("Failed to generate query embedding, falling back to random") + return _fallback_random_search(rooms, top_n, seed) + + # Search vector database for similar canvases + vector_results = vector_svc.search_by_embedding( + query_embedding=query_embedding, + top_k=top_n * 2, # Get extra results to filter by visibility + score_threshold=0.0 + ) + + # Create a map of room_id -> score from vector results + score_map = {str(r['room_id']): r['score'] for r in vector_results} + + # Match vector results with provided rooms (visibility-filtered) + # and add scores + scored_rooms = [] + for room in rooms: + room_id = room.get('id') or str(room.get('_id', '')) + if room_id in score_map: + room_copy = {**room, 'score': score_map[room_id]} + scored_rooms.append(room_copy) + else: + # Room not in vector DB yet, give low score + room_copy = {**room, 'score': 0.1} + scored_rooms.append(room_copy) + + # Sort by score descending + scored_rooms.sort(key=lambda x: x['score'], reverse=True) + + logger.info(f"Text search for '{query}' returned {len(scored_rooms[:top_n])} results") + return scored_rooms[:top_n] + + except Exception as e: + logger.exception(f"Text search failed: {e}") + return _fallback_random_search(rooms, top_n, seed) + def image_search(image_b64: str, rooms: List[Dict[str, Any]], q: str | None = None, top_n: int = DEFAULT_TOP_N, seed: int | None = None) -> List[Dict[str, Any]]: - """Prototype image search: random scores + ranking.""" + """ + Semantic image search using CLIP embeddings. + + Args: + image_b64: Base64-encoded image (without data URI prefix) + rooms: List of candidate room dicts (pre-filtered by visibility) + q: Optional text query to combine with image (future enhancement) + top_n: Maximum number of results to return + seed: Unused (kept for backward compatibility) + + Returns: + List of rooms ranked by visual similarity with score field added + """ + try: + embed_svc, vector_svc = _get_services() + + # Decode base64 image and save to temporary file + try: + image_data = base64.b64decode(image_b64) + image = Image.open(io.BytesIO(image_data)) + + # Save to temp file (embedding service expects file path) + with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp: + tmp_path = tmp.name + image.save(tmp_path, 'PNG') + + # Generate embedding for the image + query_embedding = embed_svc.embed_image(tmp_path) # Returns (1, 512) + + # Clean up temp file + os.unlink(tmp_path) + + except Exception as e: + logger.error(f"Failed to process image: {e}") + return _fallback_random_search(rooms, top_n, seed) + + if query_embedding is None or query_embedding.size == 0: + logger.warning("Failed to generate image embedding, falling back to random") + return _fallback_random_search(rooms, top_n, seed) + + # Search vector database for similar canvases + vector_results = vector_svc.search_by_embedding( + query_embedding=query_embedding, + top_k=top_n * 2, # Get extra results to filter by visibility + score_threshold=0.0 + ) + + # Create a map of room_id -> score from vector results + score_map = {str(r['room_id']): r['score'] for r in vector_results} + + # Match vector results with provided rooms (visibility-filtered) + scored_rooms = [] + for room in rooms: + room_id = room.get('id') or str(room.get('_id', '')) + if room_id in score_map: + room_copy = {**room, 'score': score_map[room_id]} + scored_rooms.append(room_copy) + else: + # Room not in vector DB yet, give low score + room_copy = {**room, 'score': 0.1} + scored_rooms.append(room_copy) + + # Sort by score descending + scored_rooms.sort(key=lambda x: x['score'], reverse=True) + + logger.info(f"Image search returned {len(scored_rooms[:top_n])} results") + return scored_rooms[:top_n] + + except Exception as e: + logger.exception(f"Image search failed: {e}") + return _fallback_random_search(rooms, top_n, seed) + + +def _fallback_random_search(rooms: List[Dict[str, Any]], top_n: int, seed: int | None = None) -> List[Dict[str, Any]]: + """Fallback to random ranking if embedding search fails.""" + import random + logger.warning("Using fallback random search") rng = random.Random(seed) if seed is not None else random scored = [{**r, "score": rng.random()} for r in rooms] scored.sort(key=lambda x: x["score"], reverse=True) return scored[:top_n] +# Keep old stub implementations commented for reference +# def text_search(query: str, rooms: List[Dict[str, Any]], top_n: int = DEFAULT_TOP_N, seed: int | None = None) -> List[Dict[str, Any]]: +# """Prototype text search: random scores + ranking.""" +# rng = random.Random(seed) if seed is not None else random +# scored = [{**r, "score": rng.random()} for r in rooms] +# scored.sort(key=lambda x: x["score"], reverse=True) +# return scored[:top_n] + +# def image_search(image_b64: str, rooms: List[Dict[str, Any]], q: str | None = None, top_n: int = DEFAULT_TOP_N, seed: int | None = None) -> List[Dict[str, Any]]: +# """Prototype image search: random scores + ranking.""" +# rng = random.Random(seed) if seed is not None else random +# scored = [{**r, "score": rng.random()} for r in rooms] +# scored.sort(key=lambda x: x["score"], reverse=True) +# return scored[:top_n] -# def text_search(query: str, rooms: List[Dict[str, Any]], top_n: int = DEFAULT_TOP_N) -> List[Dict[str, Any]]: -# """Naive text search stub. - -# Returns the input rooms unchanged except for attaching a default score and -# a snippet field (from description) so the frontend can render results. -# """ -# out = [] -# for r in rooms[:top_n]: -# out.append({ -# **r, -# 'score': 1.0, -# 'snippet': (r.get('description') or '')[:300] -# }) -# return out - - -# def image_search(image_b64: str, rooms: List[Dict[str, Any]], top_n: int = DEFAULT_TOP_N) -> List[Dict[str, Any]]: -# """Naive image search stub. - -# Currently ignores the image and returns input rooms with default score. -# """ -# out = [] -# for r in rooms[:top_n]: -# out.append({ -# **r, -# 'score': 1.0, -# 'snippet': (r.get('description') or '')[:300] -# }) -# return out diff --git a/backend/services/vector_search_service.py b/backend/services/vector_search_service.py index e69de29b..58f71256 100644 --- a/backend/services/vector_search_service.py +++ b/backend/services/vector_search_service.py @@ -0,0 +1,295 @@ +""" +Vector search service using Qdrant for semantic canvas search. + +This service manages storage and retrieval of canvas embeddings in Qdrant, +enabling semantic similarity search across canvases. +""" +import logging +import numpy as np +from typing import List, Dict, Any, Optional +from qdrant_client import QdrantClient +from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue +from qdrant_client.http.exceptions import UnexpectedResponse +from config import QDRANT_HOST, QDRANT_PORT, QDRANT_COLLECTION_NAME, EMBEDDING_DIMENSION + +logger = logging.getLogger(__name__) + +# Global Qdrant client (lazy initialization) +_qdrant_client: Optional[QdrantClient] = None + + +def get_qdrant_client() -> QdrantClient: + """Get or create Qdrant client instance.""" + global _qdrant_client + if _qdrant_client is None: + try: + _qdrant_client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT) + logger.info(f"Connected to Qdrant at {QDRANT_HOST}:{QDRANT_PORT}") + _ensure_collection_exists() + except Exception as e: + logger.error(f"Failed to connect to Qdrant: {e}") + raise + return _qdrant_client + + +def _ensure_collection_exists(): + """Create collection if it doesn't exist.""" + client = _qdrant_client + try: + # Check if collection exists + collections = client.get_collections().collections + collection_names = [c.name for c in collections] + + if QDRANT_COLLECTION_NAME not in collection_names: + logger.info(f"Creating Qdrant collection: {QDRANT_COLLECTION_NAME}") + client.create_collection( + collection_name=QDRANT_COLLECTION_NAME, + vectors_config=VectorParams( + size=EMBEDDING_DIMENSION, + distance=Distance.COSINE # Cosine similarity for normalized embeddings + ) + ) + logger.info(f"Collection {QDRANT_COLLECTION_NAME} created successfully") + else: + logger.debug(f"Collection {QDRANT_COLLECTION_NAME} already exists") + except Exception as e: + logger.error(f"Error ensuring collection exists: {e}") + raise + + +def store_canvas_embedding( + room_id: str, + embedding: np.ndarray, + metadata: Optional[Dict[str, Any]] = None +) -> bool: + """ + Store or update a canvas embedding in Qdrant. + + Args: + room_id: Unique identifier for the canvas/room + embedding: 512-dimensional vector from CLIP model + metadata: Additional metadata (name, description, type, owner, etc.) + + Returns: + True if successful, False otherwise + """ + try: + client = get_qdrant_client() + + # Ensure embedding is the right shape and type + if embedding.ndim == 2: + embedding = embedding.flatten() + + if len(embedding) != EMBEDDING_DIMENSION: + logger.error(f"Embedding dimension mismatch: expected {EMBEDDING_DIMENSION}, got {len(embedding)}") + return False + + # Prepare payload with metadata + payload = metadata or {} + payload['room_id'] = room_id + + # Use room_id as the point ID (convert to hash for Qdrant) + point_id = hash(room_id) & 0x7FFFFFFFFFFFFFFF # Ensure positive int + + # Upsert the point (will update if exists, insert if new) + client.upsert( + collection_name=QDRANT_COLLECTION_NAME, + points=[ + PointStruct( + id=point_id, + vector=embedding.tolist(), + payload=payload + ) + ] + ) + + logger.info(f"Stored embedding for room_id={room_id}") + return True + + except Exception as e: + logger.exception(f"Failed to store embedding for room_id={room_id}: {e}") + return False + + +def search_by_embedding( + query_embedding: np.ndarray, + top_k: int = 50, + filters: Optional[Dict[str, Any]] = None, + score_threshold: float = 0.0 +) -> List[Dict[str, Any]]: + """ + Search for similar canvases using vector similarity. + + Args: + query_embedding: 512-dimensional query vector from CLIP + top_k: Number of results to return + filters: Optional filters (e.g., {"type": "public", "owner": "user123"}) + score_threshold: Minimum similarity score (0.0 to 1.0) + + Returns: + List of dicts with keys: room_id, score, and metadata fields + """ + try: + client = get_qdrant_client() + + # Ensure embedding is the right shape + if query_embedding.ndim == 2: + query_embedding = query_embedding.flatten() + + if len(query_embedding) != EMBEDDING_DIMENSION: + logger.error(f"Query embedding dimension mismatch: expected {EMBEDDING_DIMENSION}, got {len(query_embedding)}") + return [] + + # Build Qdrant filter if provided + qdrant_filter = None + if filters: + conditions = [] + for key, value in filters.items(): + conditions.append( + FieldCondition(key=key, match=MatchValue(value=value)) + ) + if conditions: + qdrant_filter = Filter(must=conditions) + + # Perform vector search + search_result = client.search( + collection_name=QDRANT_COLLECTION_NAME, + query_vector=query_embedding.tolist(), + limit=top_k, + query_filter=qdrant_filter, + score_threshold=score_threshold + ) + + # Format results + results = [] + for hit in search_result: + result = { + 'room_id': hit.payload.get('room_id'), + 'score': float(hit.score), + **hit.payload # Include all metadata + } + results.append(result) + + logger.info(f"Vector search returned {len(results)} results (top_k={top_k})") + return results + + except Exception as e: + logger.exception(f"Vector search failed: {e}") + return [] + + +def update_canvas_embedding( + room_id: str, + new_embedding: np.ndarray, + metadata: Optional[Dict[str, Any]] = None +) -> bool: + """ + Update an existing canvas embedding. + + This is just an alias for store_canvas_embedding since upsert handles both. + """ + return store_canvas_embedding(room_id, new_embedding, metadata) + + +def delete_canvas_embedding(room_id: str) -> bool: + """ + Delete a canvas embedding from Qdrant. + + Args: + room_id: Unique identifier for the canvas/room to delete + + Returns: + True if successful, False otherwise + """ + try: + client = get_qdrant_client() + point_id = hash(room_id) & 0x7FFFFFFFFFFFFFFF + + client.delete( + collection_name=QDRANT_COLLECTION_NAME, + points_selector=[point_id] + ) + + logger.info(f"Deleted embedding for room_id={room_id}") + return True + + except Exception as e: + logger.exception(f"Failed to delete embedding for room_id={room_id}: {e}") + return False + + +def batch_store_embeddings(embeddings: List[Dict[str, Any]]) -> int: + """ + Store multiple embeddings in batch (more efficient). + + Args: + embeddings: List of dicts with keys: room_id, embedding, metadata + + Returns: + Number of successfully stored embeddings + """ + try: + client = get_qdrant_client() + points = [] + + for item in embeddings: + room_id = item['room_id'] + embedding = item['embedding'] + metadata = item.get('metadata', {}) + + # Prepare embedding + if embedding.ndim == 2: + embedding = embedding.flatten() + + if len(embedding) != EMBEDDING_DIMENSION: + logger.warning(f"Skipping room_id={room_id} due to dimension mismatch") + continue + + # Prepare payload + payload = metadata.copy() + payload['room_id'] = room_id + + point_id = hash(room_id) & 0x7FFFFFFFFFFFFFFF + + points.append( + PointStruct( + id=point_id, + vector=embedding.tolist(), + payload=payload + ) + ) + + if points: + client.upsert( + collection_name=QDRANT_COLLECTION_NAME, + points=points + ) + logger.info(f"Batch stored {len(points)} embeddings") + return len(points) + + return 0 + + except Exception as e: + logger.exception(f"Batch store failed: {e}") + return 0 + + +def get_collection_stats() -> Dict[str, Any]: + """Get statistics about the vector collection.""" + try: + client = get_qdrant_client() + collection_info = client.get_collection(collection_name=QDRANT_COLLECTION_NAME) + + return { + 'collection_name': QDRANT_COLLECTION_NAME, + 'vectors_count': collection_info.vectors_count, + 'points_count': collection_info.points_count, + 'status': collection_info.status, + 'config': { + 'dimension': EMBEDDING_DIMENSION, + 'distance': 'COSINE' + } + } + except Exception as e: + logger.exception(f"Failed to get collection stats: {e}") + return {'error': str(e)} From e3eec5bb4017ab0a7f2fe13e61f36e127167df0e Mon Sep 17 00:00:00 2001 From: Steven Trujillo Date: Wed, 26 Nov 2025 14:01:09 -0800 Subject: [PATCH 4/5] Check SETUP_VECTOR_SEARCH.md Outlines how to get the embedding worker up / qdrant db for working ai search example --- .gitignore | 1 + backend/SETUP_VECTOR_SEARCH.md | 226 +--------- backend/app.py | 3 + backend/requirements.txt | 1 - backend/routes/rooms.py | 79 ++++ backend/services/embedding_service.py | 10 +- backend/services/search_algorithms.py | 71 ++-- backend/services/vector_search_service.py | 98 ++--- backend/workers/embedding_worker.py | 482 ++++++++++++++++++++++ frontend/src/components/Canvas.js | 103 +++++ 10 files changed, 769 insertions(+), 305 deletions(-) create mode 100644 backend/workers/embedding_worker.py diff --git a/.gitignore b/.gitignore index d0ab0575..c29eead4 100644 --- a/.gitignore +++ b/.gitignore @@ -35,6 +35,7 @@ coverage/ *.coverage backend/.coverage node_modules +backend/qdrant_storage/ # Exclude all .env files .env diff --git a/backend/SETUP_VECTOR_SEARCH.md b/backend/SETUP_VECTOR_SEARCH.md index e65aeb7c..6d19d789 100644 --- a/backend/SETUP_VECTOR_SEARCH.md +++ b/backend/SETUP_VECTOR_SEARCH.md @@ -1,28 +1,6 @@ # Vector Search Setup Guide -## ✅ Completed Integration - -The vector search functionality has been integrated with Jay's search UI. Here's what was implemented: - -### Files Modified/Created: -1. ✅ `backend/requirements.txt` - Added AI/ML dependencies -2. ✅ `backend/config.py` - Added Qdrant configuration -3. ✅ `backend/services/vector_search_service.py` - **Implemented complete Qdrant integration** -4. ✅ `backend/services/search_algorithms.py` - **Replaced stubs with real semantic search** -5. ✅ `backend/docker-compose.yml` - Added Qdrant service - -### Your Files (Already Complete): -- ✅ `backend/services/embedding_service.py` - CLIP embeddings (text + image) - -### Jay's Files (Already Complete): -- ✅ `backend/routes/search_ai.py` - API endpoint -- ✅ `frontend/src/components/Search/AISearchPanel.jsx` - Search UI -- ✅ `frontend/src/components/Search/VisualSearchUpload.jsx` - Image upload UI -- ✅ `frontend/src/pages/Dashboard.jsx` - Integrated search panel - ---- - -## 🚀 Quick Start +## Quick Start ### Option 1: Using Docker Compose (Recommended) @@ -41,6 +19,9 @@ pip install -r requirements.txt # Run backend python app.py + +# Seperate terminal +python worker/embedding_service.py ``` ### Option 2: Local Qdrant Installation @@ -60,13 +41,16 @@ pip install -r requirements.txt # Run backend python app.py + +# Seperate terminal +python worker/embedding_service.py ``` --- ## 🔧 Configuration -The following environment variables can be set in `.env`: +The following environment variables can be set in `config.py`: ```bash # Qdrant Vector Database @@ -80,197 +64,3 @@ QDRANT_COLLECTION_NAME=rescanvas_embeddings # REDIS_PORT=6379 # MONGO_ATLAS_URI=... ``` - ---- - -## 📊 How It Works - -### 1. **Search Flow (Already Wired Up):** -``` -User enters query → AISearchPanel.jsx - ↓ -POST /api/v1/search/ai → search_ai.py - ↓ -Filters rooms by visibility → search_algorithms.py - ↓ -text_search() or image_search() - ↓ -embedding_service.embed_text() or .embed_image() - ↓ -vector_search_service.search_by_embedding() - ↓ -Qdrant returns similar canvases - ↓ -Results ranked by similarity score - ↓ -UI displays ranked results -``` - -### 2. **Vector Search Service Functions:** - -```python -# Store canvas embedding -vector_search_service.store_canvas_embedding( - room_id="123abc", - embedding=np.array([...512 dims...]), - metadata={"name": "My Canvas", "type": "public", "owner": "user123"} -) - -# Search by text -query_emb = embedding_service.embed_text(["rooms with trees"]) -results = vector_search_service.search_by_embedding(query_emb, top_k=50) - -# Search by image -img_emb = embedding_service.embed_image("path/to/image.png") -results = vector_search_service.search_by_embedding(img_emb, top_k=50) - -# Delete embedding -vector_search_service.delete_canvas_embedding(room_id="123abc") -``` - ---- - -## 🧪 Testing the Integration - -### 1. Start Services: -```bash -# Terminal 1: Start Qdrant -cd backend -docker-compose up qdrant - -# Terminal 2: Start Backend -python app.py -``` - -### 2. Test Qdrant Connection: -```bash -# Check collection stats -curl http://localhost:6333/collections - -# Should show empty collection or newly created one -``` - -### 3. Test Search API (currently will work but return empty results): -```bash -# Text search -curl -X POST http://localhost:10010/api/v1/search/ai \ - -H "Content-Type: application/json" \ - -d '{"q": "rooms with trees"}' - -# Should return: {"status": "ok", "results": []} -# (Empty because no embeddings stored yet) -``` - -### 4. Test from Frontend: -1. Open Dashboard (http://localhost:3000) -2. Find the "AI Search" panel -3. Type a query: "rooms with trees" -4. Click "Search" -5. Should see results (or empty if no embeddings yet) - ---- - -## ⚠️ What's Still Missing - -### 1. **Canvas Snapshot Generation** (Not Yet Implemented) -To search by canvas content, you need to: -- Convert canvas strokes to images -- Generate embeddings for those images -- Store them in Qdrant - -**Simple approach for testing:** -```python -# Create a manual embedding for a test canvas -from services.embedding_service import embed_text -from services.vector_search_service import store_canvas_embedding - -# Text-based embedding (easiest for testing) -room_id = "YOUR_ROOM_ID_HERE" -description = "A beautiful landscape with trees and mountains" -embedding = embed_text([description]) - -store_canvas_embedding( - room_id=room_id, - embedding=embedding, - metadata={ - "name": "Test Canvas", - "description": description, - "type": "public", - "ownerName": "testuser" - } -) -``` - -### 2. **Background Worker** (`embedding_worker.py`) - Not Yet Implemented -For automatic embedding generation, you'll need: -- Worker to listen for canvas create/update events -- Canvas-to-image rendering logic -- Batch processing for existing canvases - -**Manual/Periodic approach (simplest):** -- Create a script to batch-process canvases -- Run it manually or via cron job -- See `NEXT_STEPS.md` for implementation details - ---- - -## 🔍 Debugging - -### Check if Qdrant is running: -```bash -curl http://localhost:6333/healthz -``` - -### Check collection stats: -```bash -curl http://localhost:6333/collections/rescanvas_embeddings -``` - -### View logs: -```bash -# Backend logs will show: -# "Connected to Qdrant at localhost:6333" -# "Collection rescanvas_embeddings created successfully" -# "Vector search returned X results (top_k=50)" -``` - -### Common issues: -1. **"Connection refused"** - Qdrant not running - - Solution: `docker-compose up qdrant` - -2. **"Import could not be resolved"** - Dependencies not installed - - Solution: `pip install -r requirements.txt` - -3. **"No module named 'torch'"** - PyTorch not installed - - Solution: `pip install torch` (may take time, ~2GB download) - -4. **Empty search results** - No embeddings stored yet - - Solution: Manually store test embeddings (see above) - ---- - -## 📈 Next Steps (Future Enhancements) - -See `NEXT_STEPS.md` for: -- Implementing `embedding_worker.py` -- Canvas-to-image rendering -- Batch processing existing canvases -- Performance optimization -- Monitoring and observability - ---- - -## 🎉 What's Working Now - -✅ Qdrant integration complete -✅ Vector search service implemented -✅ Search algorithms use real embeddings -✅ API endpoint ready -✅ Frontend UI connected -✅ Docker setup included - -**Ready to test!** Just need to: -1. Start Qdrant -2. Install dependencies -3. Populate some test embeddings -4. Try searching from the UI diff --git a/backend/app.py b/backend/app.py index a716470b..b07639f1 100644 --- a/backend/app.py +++ b/backend/app.py @@ -12,6 +12,9 @@ app = Flask(__name__) +# Allow large request bodies for thumbnail uploads (up to 20MB) +app.config['MAX_CONTENT_LENGTH'] = 20 * 1024 * 1024 + # Initialize rate limiting BEFORE importing routes (routes use limiter decorators) from middleware.rate_limit import init_limiter, rate_limit_error_handler limiter = init_limiter(app) diff --git a/backend/requirements.txt b/backend/requirements.txt index ff1069ac..824a2346 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -64,7 +64,6 @@ websockets==10.4 Werkzeug==3.1.3 wrapt==2.0.0 wsproto==1.2.0 - # AI/ML dependencies for semantic search torch>=2.0.0 open_clip_torch>=2.20.0 diff --git a/backend/routes/rooms.py b/backend/routes/rooms.py index 510580c0..2ce63a38 100644 --- a/backend/routes/rooms.py +++ b/backend/routes/rooms.py @@ -2928,3 +2928,82 @@ def notification_preferences(): except Exception: return jsonify({"status":"error","message":"Failed to persist preferences"}), 500 return jsonify({"status":"ok","preferences": clean}) + + +@rooms_bp.route("/rooms//thumbnail", methods=["POST"]) +@require_auth +@require_room_access(room_id_param='roomId') +def upload_room_thumbnail(roomId): + try: + data = request.get_json() + if not data: + return jsonify({"error": "Request body required"}), 400 + + thumbnail_data = data.get('thumbnail') + if not thumbnail_data: + return jsonify({"error": "thumbnail field required"}), 400 + + # Strip data URL prefix if present + # Format: data:image/png;base64,iVBORw0KG... + if thumbnail_data.startswith('data:'): + if ',' in thumbnail_data: + thumbnail_data = thumbnail_data.split(',', 1)[1] + else: + return jsonify({"error": "Invalid data URL format"}), 400 + + # Decode base64 to binary + import base64 + try: + thumbnail_bytes = base64.b64decode(thumbnail_data) + except Exception as e: + logger.error(f"Failed to decode thumbnail base64 for room {roomId}: {e}") + return jsonify({"error": "Invalid base64 encoding"}), 400 + + # Validate minimum size (at least 100 bytes for a valid image) + if len(thumbnail_bytes) < 100: + return jsonify({"error": "Thumbnail too small, likely invalid"}), 400 + + # Validate maximum size (10MB limit) + if len(thumbnail_bytes) > 10 * 1024 * 1024: + return jsonify({"error": "Thumbnail too large (max 10MB)"}), 400 + + # Optional: Validate it's actually a PNG/JPEG using magic bytes + # PNG magic bytes: 89 50 4E 47 + # JPEG magic bytes: FF D8 FF + is_png = thumbnail_bytes[:4] == b'\x89PNG' + is_jpeg = thumbnail_bytes[:3] == b'\xff\xd8\xff' + + if not (is_png or is_jpeg): + logger.warning(f"Thumbnail for room {roomId} doesn't appear to be PNG or JPEG") + # Don't reject, just log warning + + # Store thumbnail in room document + updated_at = datetime.utcnow() + result = rooms_coll.update_one( + {'_id': ObjectId(roomId)}, + { + '$set': { + 'thumbnail': thumbnail_bytes, # Binary data + 'thumbnailUpdatedAt': updated_at, + 'updatedAt': updated_at # Also update room's main timestamp + } + } + ) + + if result.matched_count == 0: + return jsonify({"error": "Room not found"}), 404 + + logger.info(f"Stored thumbnail for room {roomId}: {len(thumbnail_bytes)} bytes " + f"(format: {'PNG' if is_png else 'JPEG' if is_jpeg else 'unknown'})") + + return jsonify({ + "status": "success", + "roomId": roomId, + "thumbnailSize": len(thumbnail_bytes), + "format": "PNG" if is_png else "JPEG" if is_jpeg else "unknown", + "updatedAt": updated_at.isoformat() + }), 200 + + except Exception as e: + logger.exception(f"Failed to upload thumbnail for room {roomId}: {e}") + return jsonify({"error": "Internal server error", "details": str(e)}), 500 diff --git a/backend/services/embedding_service.py b/backend/services/embedding_service.py index dd2c5f4b..aa43855b 100644 --- a/backend/services/embedding_service.py +++ b/backend/services/embedding_service.py @@ -1,13 +1,18 @@ -# Text import torch, numpy as np, open_clip from PIL import Image +# Check Device device = "cuda" if torch.cuda.is_available() else "cpu" -model, preprocess, tokenizer = open_clip.create_model_and_transforms( +# Load model and tokenizer +model, preprocess, _ = open_clip.create_model_and_transforms( 'ViT-B-32', pretrained='laion2b_s34b_b79k', device=device ) +tokenizer = open_clip.get_tokenizer('ViT-B-32') + +# Set model to evaluation mode model.eval() +# Function to generate text embeddings def embed_text(texts: list[str]) -> np.ndarray: with torch.no_grad(): tok = tokenizer(texts) @@ -15,6 +20,7 @@ def embed_text(texts: list[str]) -> np.ndarray: feats = feats / feats.norm(dim=-1, keepdim=True) return feats.cpu().numpy().astype(np.float32) # e.g., (N, 512) +# Function to generate image embeddings def embed_image(png_path: str) -> np.ndarray: img = preprocess(Image.open(png_path)).unsqueeze(0).to(device) with torch.no_grad(): diff --git a/backend/services/search_algorithms.py b/backend/services/search_algorithms.py index 4d87664f..73be15dd 100644 --- a/backend/services/search_algorithms.py +++ b/backend/services/search_algorithms.py @@ -9,6 +9,7 @@ from PIL import Image import tempfile import os +import numpy as np logger = logging.getLogger(__name__) @@ -20,7 +21,7 @@ def _get_services(): - """Lazy load embedding and vector search services.""" + # Load embedding and vector search services. global _embedding_service, _vector_search_service if _embedding_service is None or _vector_search_service is None: @@ -62,8 +63,8 @@ def text_search(query: str, rooms: List[Dict[str, Any]], top_n: int = DEFAULT_TO # Search vector database for similar canvases vector_results = vector_svc.search_by_embedding( query_embedding=query_embedding, - top_k=top_n * 2, # Get extra results to filter by visibility - score_threshold=0.0 + top_k=top_n, # Get extra results to filter by visibility + score_threshold=0.0 # (Optional) Minimum score for results ) # Create a map of room_id -> score from vector results @@ -121,10 +122,49 @@ def image_search(image_b64: str, rooms: List[Dict[str, Any]], q: str | None = No image.save(tmp_path, 'PNG') # Generate embedding for the image - query_embedding = embed_svc.embed_image(tmp_path) # Returns (1, 512) - + image_embedding = embed_svc.embed_image(tmp_path) # Returns (1, 512) + # Clean up temp file os.unlink(tmp_path) + + # If a text query is provided, generate text embedding and combine + if q: + try: + text_embedding = embed_svc.embed_text([q]) # (1, 512) + except Exception as e: + logger.warning(f"Failed to generate text embedding for hybrid search: {e}") + text_embedding = None + + if text_embedding is not None and getattr(text_embedding, 'size', 0) > 0: + # Flatten to 1D arrays + img_vec = np.asarray(image_embedding).reshape(-1).astype(np.float32) + txt_vec = np.asarray(text_embedding).reshape(-1).astype(np.float32) + + # L2-normalize + # img_norm = np.linalg.norm(img_vec) + # txt_norm = np.linalg.norm(txt_vec) + # if img_norm > 0: + # img_vec = img_vec / img_norm + # if txt_norm > 0: + # txt_vec = txt_vec / txt_norm + + # Weighted combination (image-heavy by default) + weight_image = 0.6 + weight_text = 0.4 + combined = weight_image * img_vec + weight_text * txt_vec + + # Re-normalize combined vector + comb_norm = np.linalg.norm(combined) + if comb_norm > 0: + combined = (combined / comb_norm).astype(np.float32) + + query_embedding = combined.reshape(1, -1) + logger.info("Performed hybrid (image+text) embedding combination (image_weight=%s,text_weight=%s)", weight_image, weight_text) + else: + # fall back to image-only embedding + query_embedding = image_embedding + else: + query_embedding = image_embedding except Exception as e: logger.error(f"Failed to process image: {e}") @@ -137,8 +177,8 @@ def image_search(image_b64: str, rooms: List[Dict[str, Any]], q: str | None = No # Search vector database for similar canvases vector_results = vector_svc.search_by_embedding( query_embedding=query_embedding, - top_k=top_n * 2, # Get extra results to filter by visibility - score_threshold=0.0 + top_k=top_n, # Get extra results to filter by visibility + score_threshold=0.0 # (Optional) Minimum score for results ) # Create a map of room_id -> score from vector results @@ -175,20 +215,3 @@ def _fallback_random_search(rooms: List[Dict[str, Any]], top_n: int, seed: int | scored = [{**r, "score": rng.random()} for r in rooms] scored.sort(key=lambda x: x["score"], reverse=True) return scored[:top_n] - - -# Keep old stub implementations commented for reference -# def text_search(query: str, rooms: List[Dict[str, Any]], top_n: int = DEFAULT_TOP_N, seed: int | None = None) -> List[Dict[str, Any]]: -# """Prototype text search: random scores + ranking.""" -# rng = random.Random(seed) if seed is not None else random -# scored = [{**r, "score": rng.random()} for r in rooms] -# scored.sort(key=lambda x: x["score"], reverse=True) -# return scored[:top_n] - -# def image_search(image_b64: str, rooms: List[Dict[str, Any]], q: str | None = None, top_n: int = DEFAULT_TOP_N, seed: int | None = None) -> List[Dict[str, Any]]: -# """Prototype image search: random scores + ranking.""" -# rng = random.Random(seed) if seed is not None else random -# scored = [{**r, "score": rng.random()} for r in rooms] -# scored.sort(key=lambda x: x["score"], reverse=True) -# return scored[:top_n] - diff --git a/backend/services/vector_search_service.py b/backend/services/vector_search_service.py index 58f71256..ca82d3db 100644 --- a/backend/services/vector_search_service.py +++ b/backend/services/vector_search_service.py @@ -14,13 +14,15 @@ logger = logging.getLogger(__name__) -# Global Qdrant client (lazy initialization) +# Global Qdrant client _qdrant_client: Optional[QdrantClient] = None def get_qdrant_client() -> QdrantClient: - """Get or create Qdrant client instance.""" + # Get Qdrant client global _qdrant_client + + # If client not initialized, create it if _qdrant_client is None: try: _qdrant_client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT) @@ -33,19 +35,20 @@ def get_qdrant_client() -> QdrantClient: def _ensure_collection_exists(): - """Create collection if it doesn't exist.""" + # Create collection if it doesn't exist. client = _qdrant_client try: - # Check if collection exists + # Check if collection exists (should be a single collection name) collections = client.get_collections().collections collection_names = [c.name for c in collections] + # If it doesnt exist create Qdrant collection if QDRANT_COLLECTION_NAME not in collection_names: logger.info(f"Creating Qdrant collection: {QDRANT_COLLECTION_NAME}") client.create_collection( collection_name=QDRANT_COLLECTION_NAME, vectors_config=VectorParams( - size=EMBEDDING_DIMENSION, + size=EMBEDDING_DIMENSION, # e.g., 512 distance=Distance.COSINE # Cosine similarity for normalized embeddings ) ) @@ -62,20 +65,15 @@ def store_canvas_embedding( embedding: np.ndarray, metadata: Optional[Dict[str, Any]] = None ) -> bool: - """ - Store or update a canvas embedding in Qdrant. - - Args: - room_id: Unique identifier for the canvas/room - embedding: 512-dimensional vector from CLIP model - metadata: Additional metadata (name, description, type, owner, etc.) - - Returns: - True if successful, False otherwise - """ + # Store or update a canvas embedding in Qdrant.x try: + # Get Qdrant client client = get_qdrant_client() + # Ensure embedding is a numpy array + if not isinstance(embedding, np.ndarray): + embedding = np.array(embedding, dtype=np.float32) + # Ensure embedding is the right shape and type if embedding.ndim == 2: embedding = embedding.flatten() @@ -88,16 +86,17 @@ def store_canvas_embedding( payload = metadata or {} payload['room_id'] = room_id - # Use room_id as the point ID (convert to hash for Qdrant) - point_id = hash(room_id) & 0x7FFFFFFFFFFFFFFF # Ensure positive int + # Use hashlib for consistent point IDs across worker restarts + import hashlib + point_id = int(hashlib.md5(room_id.encode()).hexdigest()[:15], 16) - # Upsert the point (will update if exists, insert if new) + # Update the room (will update if exists, insert if new) client.upsert( collection_name=QDRANT_COLLECTION_NAME, points=[ PointStruct( id=point_id, - vector=embedding.tolist(), + vector=embedding.tolist(), # Convert to list for Qdrant payload=payload ) ] @@ -117,19 +116,9 @@ def search_by_embedding( filters: Optional[Dict[str, Any]] = None, score_threshold: float = 0.0 ) -> List[Dict[str, Any]]: - """ - Search for similar canvases using vector similarity. - - Args: - query_embedding: 512-dimensional query vector from CLIP - top_k: Number of results to return - filters: Optional filters (e.g., {"type": "public", "owner": "user123"}) - score_threshold: Minimum similarity score (0.0 to 1.0) - - Returns: - List of dicts with keys: room_id, score, and metadata fields - """ + # Search for similar canvases using vector similarity. try: + # Get Qdrant client client = get_qdrant_client() # Ensure embedding is the right shape @@ -154,7 +143,7 @@ def search_by_embedding( # Perform vector search search_result = client.search( collection_name=QDRANT_COLLECTION_NAME, - query_vector=query_embedding.tolist(), + query_vector=query_embedding.tolist(), # Convert to list for Qdrant limit=top_k, query_filter=qdrant_filter, score_threshold=score_threshold @@ -183,28 +172,20 @@ def update_canvas_embedding( new_embedding: np.ndarray, metadata: Optional[Dict[str, Any]] = None ) -> bool: - """ - Update an existing canvas embedding. - - This is just an alias for store_canvas_embedding since upsert handles both. - """ + # Update an existing canvas embedding. return store_canvas_embedding(room_id, new_embedding, metadata) def delete_canvas_embedding(room_id: str) -> bool: - """ - Delete a canvas embedding from Qdrant. - - Args: - room_id: Unique identifier for the canvas/room to delete - - Returns: - True if successful, False otherwise - """ + # Delete a canvas embedding from Qdrant. try: + # Get Qdrant client client = get_qdrant_client() - point_id = hash(room_id) & 0x7FFFFFFFFFFFFFFF - + + import hashlib + point_id = int(hashlib.md5(room_id.encode()).hexdigest()[:15], 16) + + # Delete the room client.delete( collection_name=QDRANT_COLLECTION_NAME, points_selector=[point_id] @@ -219,16 +200,9 @@ def delete_canvas_embedding(room_id: str) -> bool: def batch_store_embeddings(embeddings: List[Dict[str, Any]]) -> int: - """ - Store multiple embeddings in batch (more efficient). - - Args: - embeddings: List of dicts with keys: room_id, embedding, metadata - - Returns: - Number of successfully stored embeddings - """ + # Store multiple embeddings in batch (more efficient). try: + # Get Qdrant client client = get_qdrant_client() points = [] @@ -249,7 +223,10 @@ def batch_store_embeddings(embeddings: List[Dict[str, Any]]) -> int: payload = metadata.copy() payload['room_id'] = room_id - point_id = hash(room_id) & 0x7FFFFFFFFFFFFFFF + #point_id = hash(room_id) & 0x7FFFFFFFFFFFFFFF + # Use hashlib for consistent point IDs across worker restarts + import hashlib + point_id = int(hashlib.md5(room_id.encode()).hexdigest()[:15], 16) points.append( PointStruct( @@ -275,8 +252,9 @@ def batch_store_embeddings(embeddings: List[Dict[str, Any]]) -> int: def get_collection_stats() -> Dict[str, Any]: - """Get statistics about the vector collection.""" + # Get statistics about the vector collection. try: + # Get Qdrant client client = get_qdrant_client() collection_info = client.get_collection(collection_name=QDRANT_COLLECTION_NAME) diff --git a/backend/workers/embedding_worker.py b/backend/workers/embedding_worker.py new file mode 100644 index 00000000..661fb36b --- /dev/null +++ b/backend/workers/embedding_worker.py @@ -0,0 +1,482 @@ +""" +Embedding Worker - Incremental Canvas Embedding Generation + +Automatically generates and updates vector embeddings for canvases in the background. +This worker monitors for new/modified rooms and updates their embeddings without +requiring manual re-population. + +Usage: + python -m workers.embedding_worker [--interval SECONDS] [--batch-size N] +""" + +import sys +import os +import time +import logging +import argparse +from datetime import datetime, timedelta +from typing import Dict, List, Optional, Set +import threading +import signal + +# Add parent directory to path +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from services.db import rooms_coll, strokes_coll, mongo_client +from services.embedding_service import embed_text, embed_image +from services.vector_search_service import store_canvas_embedding, get_collection_stats +from bson import ObjectId +import numpy as np + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(name)s:%(lineno)d – %(message)s", + datefmt="%Y-%m-%d %H:%M:%S" +) +logger = logging.getLogger(__name__) + +# Configuration +DEFAULT_UPDATE_INTERVAL = 300 # 5 minutes - how often to check for updates +DEFAULT_DEBOUNCE_PERIOD = 180 # 3 minutes - minimum time since last room update before embedding +DEFAULT_BATCH_SIZE = 10 # Process this many rooms per batch +DEFAULT_THUMBNAIL_SIZE = (512, 512) # Thumbnail dimensions for visual embeddings + +# Global state +_shutdown_requested = False +_last_processed_times: Dict[str, datetime] = {} # room_id -> last embedding time + + +class CanvasRenderer: + """ + Renders canvas strokes into a PIL Image for visual embedding generation. + """ + + @staticmethod + def render_room_thumbnail(room_id: str, size: tuple = DEFAULT_THUMBNAIL_SIZE) -> Optional[bytes]: + """ + Retrieve stored canvas thumbnail from MongoDB. + + Frontend uploads thumbnails via POST /api/rooms//thumbnail using canvas.toDataURL(). + This method retrieves the stored thumbnail bytes for embedding generation. + + Args: + room_id: Room ID to get thumbnail for + size: Unused (kept for API compatibility) + + Returns: + PNG/JPEG image bytes, or None if no thumbnail available + """ + try: + room = rooms_coll.find_one( + {'_id': ObjectId(room_id)}, + {'thumbnail': 1, 'thumbnailUpdatedAt': 1} + ) + + if not room: + logger.debug(f"Room {room_id} not found") + return None + + if 'thumbnail' not in room: + logger.debug(f"No thumbnail stored for room {room_id}") + return None + + thumbnail_bytes = room['thumbnail'] + + # Validate it's actually binary image data + if not isinstance(thumbnail_bytes, bytes): + logger.warning(f"Invalid thumbnail type for room {room_id}: {type(thumbnail_bytes)}") + return None + + if len(thumbnail_bytes) < 100: + logger.warning(f"Thumbnail too small for room {room_id}: {len(thumbnail_bytes)} bytes") + return None + + # Log when thumbnail was last updated (for debugging staleness) + updated_at = room.get('thumbnailUpdatedAt') + if updated_at: + logger.debug(f"Retrieved thumbnail for room {room_id}: {len(thumbnail_bytes)} bytes " + f"(updated {updated_at})") + else: + logger.debug(f"Retrieved thumbnail for room {room_id}: {len(thumbnail_bytes)} bytes") + + return thumbnail_bytes + + except Exception as e: + logger.exception(f"Failed to retrieve thumbnail for room {room_id}: {e}") + return None + + +class EmbeddingWorker: + """Background worker for incremental embedding updates.""" + + def __init__(self, + update_interval: int = DEFAULT_UPDATE_INTERVAL, + debounce_period: int = DEFAULT_DEBOUNCE_PERIOD, + batch_size: int = DEFAULT_BATCH_SIZE): + """ + Initialize the embedding worker. + + Args: + update_interval: How often (seconds) to check for room updates + debounce_period: Minimum time (seconds) since room update before embedding + batch_size: Maximum rooms to process per iteration + """ + self.update_interval = update_interval + self.debounce_period = debounce_period + self.batch_size = batch_size + self.renderer = CanvasRenderer() + + # Track which rooms we've already processed + self.processed_rooms: Set[str] = set() + + logger.info(f"Initialized EmbeddingWorker: update_interval={update_interval}s, " + f"debounce_period={debounce_period}s, batch_size={batch_size}") + + def should_process_room(self, room: Dict) -> bool: + """ + Determine if a room should have its embedding updated. + + Conditions for processing: + 1. Room was created/updated recently + 2. Sufficient time has passed since last update (debouncing) + 3. Room doesn't already have current embedding + 4. Room is not archived + """ + room_id = str(room['_id']) + + # Skip archived rooms + if room.get('archived'): + return False + + # Check when room was last updated + updated_at = room.get('updatedAt') or room.get('createdAt') + if not updated_at: + return False + + # Debounce: Don't embed if room was updated very recently + # (might still be actively being edited) + time_since_update = datetime.utcnow() - updated_at + if time_since_update.total_seconds() < self.debounce_period: + logger.debug(f"Room {room_id} updated {time_since_update.total_seconds()}s ago, " + f"waiting for debounce period ({self.debounce_period}s)") + return False + + # Check if we've already processed this room recently + last_processed = _last_processed_times.get(room_id) + if last_processed: + # Only re-process if room was updated after our last embedding + if updated_at <= last_processed: + return False + + return True + + def find_rooms_to_update(self) -> List[Dict]: + """ + Find rooms that need embedding updates. + + Strategy: + 1. Query rooms updated in the last (update_interval + debounce_period) + 2. Filter to those that meet processing criteria + 3. Limit to batch_size + """ + try: + # Look for rooms updated since our last check (with some overlap) + lookback_window = self.update_interval + self.debounce_period + cutoff_time = datetime.utcnow() - timedelta(seconds=lookback_window) + + # Query rooms that have been updated but not too recently + query = { + 'archived': {'$ne': True}, + 'updatedAt': {'$gte': cutoff_time} + } + + rooms = list(rooms_coll.find(query).sort('updatedAt', 1).limit(self.batch_size * 2)) + + # Filter using our processing logic + to_process = [room for room in rooms if self.should_process_room(room)] + + # Limit batch size + to_process = to_process[:self.batch_size] + + logger.info(f"Found {len(to_process)} rooms to process (from {len(rooms)} candidates)") + return to_process + + except Exception as e: + logger.exception(f"Failed to find rooms to update: {e}") + return [] + + def generate_embedding_for_room(self, room: Dict) -> bool: + """ + Generate and store embedding for a single room. + + Process: + 1. Extract text metadata (name, description) + 2. Attempt to render canvas thumbnail + 3. Generate text embedding (always) + 4. Generate image embedding (if thumbnail available) + 5. Combine embeddings if both available + 6. Store in Qdrant + + Args: + room: Room document from MongoDB + + Returns: + True if embedding was successfully generated and stored + """ + room_id = str(room['_id']) + room_name = room.get('name', '') + room_desc = room.get('description', '') + room_type = room.get('type', 'public') + room_owner = room.get('ownerName', '') + + try: + logger.info(f"Processing room '{room_name}' (id={room_id})") + + # 1. Generate text embedding (from name + description) + text = f"{room_name}. {room_desc}" if room_desc else room_name + text_embedding = None + + if text.strip(): + text_embedding = embed_text([text]) + logger.debug(f" Generated text embedding: shape={text_embedding.shape}") + + # 2. Attempt to render canvas thumbnail + thumbnail_bytes = self.renderer.render_room_thumbnail(room_id) + image_embedding = None + + if thumbnail_bytes: + # Save thumbnail temporarily and generate embedding + import tempfile + with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp: + tmp.write(thumbnail_bytes) + tmp_path = tmp.name + + try: + image_embedding = embed_image(tmp_path) + logger.debug(f" Generated image embedding: shape={image_embedding.shape}") + finally: + os.unlink(tmp_path) + else: + logger.debug(f" No thumbnail available, using text-only embedding") + + # 3. Combine embeddings if we have both + if text_embedding is not None and image_embedding is not None: + # Hybrid embedding: weighted combination + # Flatten to 1D + text_vec = np.asarray(text_embedding).reshape(-1).astype(np.float32) + img_vec = np.asarray(image_embedding).reshape(-1).astype(np.float32) + + # L2-normalize + # text_norm = np.linalg.norm(text_vec) + # img_norm = np.linalg.norm(img_vec) + # if text_norm > 0: + # text_vec = text_vec / text_norm + # if img_norm > 0: + # img_vec = img_vec / img_norm + + # Weighted combination (image-heavy since visual search is primary) + weight_text = 0.4 + weight_image = 0.6 + combined = weight_text * text_vec + weight_image * img_vec + + # Re-normalize + comb_norm = np.linalg.norm(combined) + if comb_norm > 0: + combined = combined / comb_norm + + final_embedding = combined.reshape(1, -1) + logger.info(f" Combined text+image embedding (weights: {weight_text}/{weight_image})") + + elif text_embedding is not None: + final_embedding = text_embedding + logger.info(f" Using text-only embedding") + + elif image_embedding is not None: + final_embedding = image_embedding + logger.info(f" Using image-only embedding") + + else: + logger.warning(f" No embedding could be generated for room {room_id}") + return False + + # 4. Store in Qdrant + success = store_canvas_embedding( + room_id=room_id, + embedding=final_embedding, + metadata={ + 'name': room_name, + 'description': room_desc, + 'type': room_type, + 'ownerName': room_owner, + 'updated_at': room.get('updatedAt', datetime.utcnow()).isoformat(), + 'has_visual': thumbnail_bytes is not None + } + ) + + if success: + logger.info(f"Successfully stored embedding for '{room_name}'") + _last_processed_times[room_id] = datetime.utcnow() + return True + else: + logger.error(f"Failed to store embedding for '{room_name}'") + return False + + except Exception as e: + logger.exception(f"Failed to generate embedding for room {room_id}: {e}") + return False + + def run_iteration(self) -> Dict: + """ + Run one iteration of the worker loop. + """ + start_time = time.time() + + # Find rooms that need updates + rooms_to_process = self.find_rooms_to_update() + + if not rooms_to_process: + logger.debug("No rooms to process in this iteration") + return { + 'processed': 0, + 'success': 0, + 'failed': 0, + 'duration_seconds': time.time() - start_time + } + + # Process each room + success_count = 0 + failed_count = 0 + + for room in rooms_to_process: + if _shutdown_requested: + logger.info("Shutdown requested, stopping iteration") + break + + try: + if self.generate_embedding_for_room(room): + success_count += 1 + else: + failed_count += 1 + except Exception as e: + logger.exception(f"Error processing room {room.get('_id')}: {e}") + failed_count += 1 + + duration = time.time() - start_time + + stats = { + 'processed': len(rooms_to_process), + 'success': success_count, + 'failed': failed_count, + 'duration_seconds': duration + } + + logger.info(f"Iteration complete: {stats}") + return stats + + def run(self): + """ + Main worker loop. Runs indefinitely until shutdown. + """ + logger.info("Embedding worker started") + logger.info(f"Configuration: update_interval={self.update_interval}s, " + f"debounce={self.debounce_period}s, batch_size={self.batch_size}") + + # Print initial Qdrant stats + try: + stats = get_collection_stats() + logger.info(f"Qdrant collection: {stats.get('collection_name')}, " + f"points: {stats.get('points_count')}") + except Exception as e: + logger.warning(f"Could not fetch Qdrant stats: {e}") + + iteration = 0 + + while not _shutdown_requested: + iteration += 1 + logger.info(f"--- Iteration {iteration} ---") + + try: + stats = self.run_iteration() + + # Log summary + if stats['processed'] > 0: + logger.info(f"Processed {stats['processed']} rooms " + f"({stats['success']} success, {stats['failed']} failed) " + f"in {stats['duration_seconds']:.1f}s") + + except Exception as e: + logger.exception(f"Error in worker iteration: {e}") + + # Sleep until next iteration + if not _shutdown_requested: + logger.debug(f"Sleeping for {self.update_interval}s") + for _ in range(self.update_interval): + if _shutdown_requested: + break + time.sleep(1) + + logger.info("Embedding worker shutting down gracefully") + + +def signal_handler(signum, frame): + """Handle shutdown signals gracefully.""" + global _shutdown_requested + logger.info(f"Received signal {signum}, initiating shutdown...") + _shutdown_requested = True + + +def main(): + """Main entry point for the embedding worker.""" + parser = argparse.ArgumentParser( + description='Background worker for incremental canvas embedding generation' + ) + parser.add_argument( + '--interval', + type=int, + default=DEFAULT_UPDATE_INTERVAL, + help=f'Update check interval in seconds (default: {DEFAULT_UPDATE_INTERVAL})' + ) + parser.add_argument( + '--debounce', + type=int, + default=DEFAULT_DEBOUNCE_PERIOD, + help=f'Debounce period in seconds (default: {DEFAULT_DEBOUNCE_PERIOD})' + ) + parser.add_argument( + '--batch-size', + type=int, + default=DEFAULT_BATCH_SIZE, + help=f'Maximum rooms per batch (default: {DEFAULT_BATCH_SIZE})' + ) + parser.add_argument( + '--once', + action='store_true', + help='Run once and exit (for testing)' + ) + + args = parser.parse_args() + + # Set up signal handlers for graceful shutdown + signal.signal(signal.SIGINT, signal_handler) + signal.signal(signal.SIGTERM, signal_handler) + + # Create and run worker + worker = EmbeddingWorker( + update_interval=args.interval, + debounce_period=args.debounce, + batch_size=args.batch_size + ) + + if args.once: + logger.info("Running in single-iteration mode") + stats = worker.run_iteration() + logger.info(f"Single iteration complete: {stats}") + else: + worker.run() + + logger.info("Embedding worker exited") + + +if __name__ == '__main__': + main() diff --git a/frontend/src/components/Canvas.js b/frontend/src/components/Canvas.js index c08dbbeb..aa46bb54 100644 --- a/frontend/src/components/Canvas.js +++ b/frontend/src/components/Canvas.js @@ -42,6 +42,7 @@ import { getUsername } from '../utils/getUsername'; import { getAuthUser } from '../utils/getAuthUser'; import { resetMyStacks } from '../api/rooms'; import { TEMPLATE_LIBRARY } from '../data/templates'; +import { API_BASE } from '../config/apiConfig'; class UserData { constructor(userId, username) { @@ -208,6 +209,7 @@ function Canvas({ const roomClipboardRef = useRef({}); const roomClearedAtRef = useRef({}); const drawAllDrawingsRef = useRef(null); // Store reference to drawAllDrawings function + const thumbnailUploadTimerRef = useRef(null); // Debounce timer for thumbnail uploads useEffect(() => { if (!currentRoomId) return; @@ -799,6 +801,90 @@ function Canvas({ `drawing_${Date.now()}_${Math.random().toString(36).substr(2, 5)}`; const serverCountRef = useRef(0); + // Upload canvas thumbnail for visual search embeddings + const uploadThumbnail = (roomId) => { + if (!roomId || !auth?.token) { + console.debug('Skipping thumbnail upload: no roomId or token'); + return; + } + + const canvas = canvasRef.current; + if (!canvas) { + console.debug('Skipping thumbnail upload: no canvas ref'); + return; + } + + console.log(`🎨 Uploading thumbnail for room ${roomId}...`); + + // Generate thumbnail synchronously - create small version for faster upload + let dataURL; + try { + // Create a smaller thumbnail canvas (max 800x600) to reduce file size + const maxWidth = 800; + const maxHeight = 600; + const scale = Math.min(1, maxWidth / canvas.width, maxHeight / canvas.height); + + const thumbCanvas = document.createElement('canvas'); + thumbCanvas.width = canvas.width * scale; + thumbCanvas.height = canvas.height * scale; + const thumbCtx = thumbCanvas.getContext('2d'); + thumbCtx.drawImage(canvas, 0, 0, thumbCanvas.width, thumbCanvas.height); + + // Use JPEG with lower quality for much smaller file size + dataURL = thumbCanvas.toDataURL('image/jpeg', 0.3); + console.log(`Generated thumbnail: ${thumbCanvas.width}x${thumbCanvas.height}, ${dataURL.length} chars (~${Math.round(dataURL.length * 0.75 / 1024)}KB)`); + } catch (error) { + console.error('Failed to generate canvas thumbnail:', error); + return; + } + + // DEBUG: Log the full URL and payload size + const url = `${API_BASE}/rooms/${roomId}/thumbnail`; + const payload = JSON.stringify({ thumbnail: dataURL }); + console.log(`🔍 DEBUG - About to POST to: ${url}`); + console.log(`🔍 DEBUG - Payload size: ${payload.length} bytes (${Math.round(payload.length / 1024)}KB)`); + console.log(`🔍 DEBUG - Auth token present: ${!!auth?.token}`); + + // Upload asynchronously with keepalive flag (survives page navigation) + fetch(url, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${auth.token}` + }, + body: JSON.stringify({ thumbnail: dataURL }), + keepalive: true // Critical: allows request to complete even after page unload + }) + .then(response => { + if (response.ok) { + return response.json(); + } else { + console.warn(`Thumbnail upload failed: ${response.status} ${response.statusText}`); + return response.text().then(text => { + console.warn(`Error details: ${text.substring(0, 200)}`); + return null; + }); + } + }) + .then(result => { + if (result) { + console.log('✅ Thumbnail uploaded for visual search:', { + roomId: result.roomId, + size: result.thumbnailSize, + format: result.format + }); + } + }) + .catch(error => { + console.error('Failed to upload thumbnail:', error); + console.error('Error details:', { + message: error.message, + name: error.name, + stack: error.stack?.substring(0, 200) + }); + }); + }; + // Helper function to update filter state const updateFilterState = () => { // Use setUserData callback to read current state accurately @@ -3876,6 +3962,19 @@ function Canvas({ // eslint-disable-next-line react-hooks/exhaustive-deps }, [undoStack, redoStack]); + // Upload thumbnail when leaving the canvas (component unmount or room change) + useEffect(() => { + const roomIdSnapshot = currentRoomId; + + return () => { + // Upload thumbnail on cleanup (when navigating away) + if (roomIdSnapshot) { + uploadThumbnail(roomIdSnapshot); + } + }; + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [currentRoomId]); + const [showToolbar, setShowToolbar] = useState(true); const [hoverToolbar, setHoverToolbar] = useState(false); @@ -3925,6 +4024,10 @@ function Canvas({ } catch (e) { /* swallow if state setters changed */ } + // Upload thumbnail before exiting + if (currentRoomId) { + uploadThumbnail(currentRoomId); + } onExitRoom(); }} sx={{ ml: 1 }} From 053d23f97dc7773099956992a4e5b34e55b0fb44 Mon Sep 17 00:00:00 2001 From: sttruji Date: Wed, 17 Dec 2025 07:35:38 -0800 Subject: [PATCH 5/5] Remove NEXT_STEPS.md content Removed detailed next steps for AI search implementation, including completed tasks, missing features, and implementation order. --- backend/NEXT_STEPS.md | 360 ------------------------------------------ 1 file changed, 360 deletions(-) diff --git a/backend/NEXT_STEPS.md b/backend/NEXT_STEPS.md index d66d3040..8b137891 100644 --- a/backend/NEXT_STEPS.md +++ b/backend/NEXT_STEPS.md @@ -1,361 +1 @@ -# Next Steps: Complete AI Search Implementation -## What's Done ✅ - -1. **Core Infrastructure** ✅ - - Qdrant vector database setup - - `vector_search_service.py` - Complete CRUD operations - - `search_algorithms.py` - Real semantic search (no more stubs) - - Docker Compose configuration - - All dependencies added - -2. **Integration** ✅ - - Jay's API endpoint (`search_ai.py`) → Your search logic - - Jay's frontend UI → Backend API - - Your embedding service → Vector search service - ---- - -## What's Missing ❌ - -### Priority 1: Canvas Snapshot Generation (Required for Image Embeddings) - -**Problem:** To generate embeddings for canvas content, you need to convert stroke data to images. - -**Options:** - -#### Option A: Text-Based Embeddings (Simplest - Start Here) -Use canvas name + description for embeddings instead of visual content. - -```python -# backend/scripts/populate_embeddings.py -from services.db import rooms_coll -from services.embedding_service import embed_text -from services.vector_search_service import store_canvas_embedding - -def populate_text_embeddings(): - """Generate embeddings from room metadata (name + description).""" - rooms = rooms_coll.find({"archived": {"$ne": True}}) - - for room in rooms: - room_id = str(room['_id']) - name = room.get('name', '') - desc = room.get('description', '') - - # Combine name and description for richer embedding - text = f"{name}. {desc}" if desc else name - - if text.strip(): - embedding = embed_text([text]) - - store_canvas_embedding( - room_id=room_id, - embedding=embedding, - metadata={ - 'name': name, - 'description': desc, - 'type': room.get('type'), - 'ownerName': room.get('ownerName') - } - ) - print(f"✓ Stored embedding for {room_id}: {name}") - -if __name__ == "__main__": - populate_text_embeddings() -``` - -**Pros:** Works immediately, no canvas rendering needed -**Cons:** Doesn't capture visual content -**Use case:** "Find rooms about trees" works, "Find rooms similar to this sketch" won't - -#### Option B: Server-Side Canvas Rendering (Better, More Complex) - -Render strokes to PNG using Pillow: - -```python -# backend/services/canvas_renderer.py -from PIL import Image, ImageDraw -from services.db import strokes_coll - -def render_canvas_to_image(room_id: str, width=800, height=600) -> str: - """Render canvas strokes to PNG file, return path.""" - # Fetch strokes - strokes = list(strokes_coll.find({"roomId": room_id}).sort("ts", 1)) - - # Create image - img = Image.new('RGB', (width, height), 'white') - draw = ImageDraw.Draw(img) - - for stroke in strokes: - points = stroke.get('points', []) - color = stroke.get('color', '#000000') - width = stroke.get('width', 2) - - # Draw lines between points - for i in range(len(points) - 1): - x1, y1 = points[i]['x'], points[i]['y'] - x2, y2 = points[i+1]['x'], points[i+1]['y'] - draw.line([(x1, y1), (x2, y2)], fill=color, width=int(width)) - - # Save to temp file - path = f"/tmp/canvas_{room_id}.png" - img.save(path) - return path -``` - -**Pros:** Captures visual content -**Cons:** Need to understand stroke data format, coordinate systems -**Recommendation:** Start with Option A, add this later - -#### Option C: Frontend Thumbnail Export (Hybrid Approach) - -Let the frontend generate thumbnails and upload them: - -1. Add endpoint: `POST /api/v1/rooms/{room_id}/snapshot` -2. Frontend captures canvas as base64 PNG -3. Backend generates embedding and stores it - -**Pros:** Frontend already knows how to render -**Cons:** Requires frontend changes, manual trigger - ---- - -### Priority 2: Background Embedding Worker - -**Current State:** Embeddings are NOT auto-generated on canvas create/update - -**Solution:** Create a periodic batch processor (simplest approach) - -```python -# backend/workers/embedding_worker.py -import time -import logging -from services.db import rooms_coll -from services.embedding_service import embed_text -from services.vector_search_service import store_canvas_embedding, get_collection_stats - -logger = logging.getLogger(__name__) - -def sync_embeddings_batch(): - """ - Sync embeddings for all canvases that don't have them yet. - Run this periodically (e.g., every 5 minutes). - """ - # Get all room IDs in Qdrant - stats = get_collection_stats() - existing_count = stats.get('points_count', 0) - - # Get all rooms from MongoDB - rooms = list(rooms_coll.find({"archived": {"$ne": True}})) - total_rooms = len(rooms) - - logger.info(f"Found {total_rooms} rooms, {existing_count} embeddings exist") - - new_embeddings = 0 - for room in rooms: - room_id = str(room['_id']) - - # Simple approach: Always regenerate (or add logic to check if exists) - name = room.get('name', '') - desc = room.get('description', '') - text = f"{name}. {desc}" if desc else name - - if text.strip(): - embedding = embed_text([text]) - success = store_canvas_embedding( - room_id=room_id, - embedding=embedding, - metadata={ - 'name': name, - 'description': desc, - 'type': room.get('type'), - 'ownerName': room.get('ownerName') - } - ) - if success: - new_embeddings += 1 - - logger.info(f"Synced {new_embeddings} new embeddings") - return new_embeddings - -def run_worker(interval_seconds=300): - """Run worker in loop.""" - logger.info(f"Starting embedding worker (interval={interval_seconds}s)") - while True: - try: - sync_embeddings_batch() - except Exception as e: - logger.exception(f"Worker error: {e}") - - time.sleep(interval_seconds) - -if __name__ == "__main__": - run_worker() -``` - -**How to run:** -```bash -# In separate terminal -python backend/workers/embedding_worker.py - -# Or add to supervisor/systemd/docker-compose -``` - -**Alternative (Production):** Use Celery for more robust job scheduling - ---- - -### Priority 3: Hook into Canvas Updates - -Trigger embedding regeneration when canvases change: - -```python -# In backend/routes/rooms.py (after canvas update) - -from services.embedding_service import embed_text -from services.vector_search_service import store_canvas_embedding - -@rooms_bp.route('/api/v1/rooms/', methods=['PATCH']) -@require_auth -def update_room(room_id): - # ... existing update logic ... - - # After successful update, regenerate embedding - try: - name = updated_room.get('name', '') - desc = updated_room.get('description', '') - text = f"{name}. {desc}" if desc else name - - if text.strip(): - embedding = embed_text([text]) - store_canvas_embedding( - room_id=room_id, - embedding=embedding, - metadata={ - 'name': name, - 'description': desc, - 'type': updated_room.get('type'), - 'ownerName': updated_room.get('ownerName') - } - ) - except Exception as e: - logger.warning(f"Failed to update embedding for {room_id}: {e}") - - return jsonify(updated_room) -``` - ---- - -### Priority 4: Database Indexes (Performance) - -Add indexes for faster queries: - -```python -# In backend/services/db.py (add to existing indexes) - -# For search filtering -rooms_coll.create_index([("type", 1), ("archived", 1)]) -rooms_coll.create_index([("ownerId", 1), ("archived", 1)]) -``` - ---- - -## Recommended Implementation Order - -### Week 1: Get It Working -1. ✅ Setup Qdrant (Done!) -2. ✅ Implement vector_search_service.py (Done!) -3. ✅ Update search_algorithms.py (Done!) -4. ⏳ **Create `populate_embeddings.py` script** (Option A - Text-based) -5. ⏳ **Test search from UI** - -### Week 2: Automate -6. ⏳ Create `embedding_worker.py` (periodic batch sync) -7. ⏳ Add hooks to `rooms.py` for real-time updates -8. ⏳ Add canvas deletion → embedding cleanup - -### Week 3: Visual Search -9. ⏳ Implement canvas rendering (Option B or C) -10. ⏳ Update embeddings to use visual content -11. ⏳ Test image-based search - -### Week 4: Polish -12. ⏳ Add monitoring/logging -13. ⏳ Performance tuning -14. ⏳ Error handling improvements - ---- - -## Quick Test Script - -Save as `backend/scripts/test_vector_search.py`: - -```python -#!/usr/bin/env python3 -"""Quick test script for vector search functionality.""" - -from services.embedding_service import embed_text, embed_image -from services.vector_search_service import ( - store_canvas_embedding, - search_by_embedding, - get_collection_stats -) -import numpy as np - -def test_basic_flow(): - print("🧪 Testing Vector Search...") - - # 1. Store test embeddings - test_data = [ - ("room1", "A beautiful landscape with mountains and trees"), - ("room2", "Abstract geometric shapes in bright colors"), - ("room3", "Portrait of a person with blue eyes"), - ("room4", "Forest scene with tall pine trees"), - ] - - print("\n📝 Storing test embeddings...") - for room_id, description in test_data: - emb = embed_text([description]) - store_canvas_embedding(room_id, emb, {"description": description}) - print(f" ✓ {room_id}: {description[:50]}...") - - # 2. Check stats - print("\n📊 Collection stats:") - stats = get_collection_stats() - print(f" Points: {stats.get('points_count')}") - print(f" Dimension: {stats.get('config', {}).get('dimension')}") - - # 3. Search - print("\n🔍 Searching for 'trees'...") - query_emb = embed_text(["trees"]) - results = search_by_embedding(query_emb, top_k=5) - - print(f"\n Found {len(results)} results:") - for i, r in enumerate(results, 1): - print(f" {i}. {r['room_id']} (score: {r['score']:.3f})") - print(f" {r.get('description', '')[:60]}...") - - print("\n✅ Test complete!") - -if __name__ == "__main__": - test_basic_flow() -``` - -Run with: -```bash -cd backend -python scripts/test_vector_search.py -``` - ---- - -## Summary - -**You have:** Complete Qdrant integration, working vector search, connected UI -**You need:** Populate embeddings (start with text-based), then add automation - -**Fastest path to demo:** -1. Run the test script above -2. Create `populate_embeddings.py` for real rooms -3. Test search in the UI -4. Show Jay it works! 🎉