sumansahoo1 · sumansahoo1 · May 11, 2025 · May 21, 2025
diff --git a/frontend/src/assets/default_examples/default_values.json b/frontend/src/assets/default_examples/default_values.json
@@ -0,0 +1,28 @@
+[
+  {
+    "image": "example1.png",
+    "values": {
+      "manufacturer": "Example Manufacturer",
+      "productName": "Example Product",
+      "ingredients": "Example Ingredients",
+      "manufacturingDate": "2023-01-01",
+      "expiryDate": "2024-01-01",
+      "netWeight": "100g",
+      "barcode": "123456789",
+      "otherDetails": "Example Details"
+    }
+  },
+  {
+    "image": "example2.png",
+    "values": {
+      "manufacturer": "Another Manufacturer",
+      "productName": "Another Product",
+      "ingredients": "Another Ingredients",
+      "manufacturingDate": "2023-02-01",
+      "expiryDate": "2024-02-01",
+      "netWeight": "200g",
+      "barcode": "987654321",
+      "otherDetails": "Another Details"
+    }
+  }
+]
diff --git a/frontend/src/components/Sidebar.jsx b/frontend/src/components/Sidebar.jsx
@@ -0,0 +1,18 @@
+import { Link } from 'react-router-dom';
+
+function Sidebar() {
+  return (
+    <nav className="bg-secondary flex max-w-20 flex-shrink-0">
+      <ul className="space-y-2 p-4">
+        <li>
+          <Link to="/" className="text-white block py-2 px-4 rounded hover:bg-highlight">Dashboard</Link>
+        </li>
+        <li>
+          <Link to="/test" className="text-white block py-2 px-4 rounded hover:bg-highlight">Test</Link>
+        </li>
+      </ul>
+    </nav>
+  );
+}
+
+export default Sidebar;
diff --git a/utils 19-33-32-223/image_process.py b/utils 19-33-32-223/image_process.py
@@ -0,0 +1,30 @@
+import cv2
+import numpy as np
+
+def preprocess_image(img_path):
+    # Read the image
+    img = cv2.imread(img_path)
+
+    # Normalize brightness and contrast using histogram equalization
+    img_yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV)
+    img_yuv[:,:,0] = cv2.equalizeHist(img_yuv[:,:,0])
+    img_normalized = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2BGR)
+
+    # Apply bilateral filtering to reduce noise but preserve edges
+    img_filtered = cv2.bilateralFilter(img_normalized, 9, 75, 75)
+
+    # Segmentation using GrabCut
+    mask = np.zeros(img.shape[:2], np.uint8)
+    bgdModel = np.zeros((1, 65), np.float64)
+    fgdModel = np.zeros((1, 65), np.float64)
+    rect = (50, 50, img.shape[1]-50, img.shape[0]-50)  # Rectangle for the object
+    cv2.grabCut(img_filtered, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT)
+    mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
+    img_segmented = img_filtered * mask2[:, :, np.newaxis]
+
+    return img_segmented
+
+
+# Example usage
+img_filtered = preprocess_image('dataset/object_1/image.png')
+cv2.imwrite('preprocessed_image.jpg', img_filtered)
diff --git a/vision/config/api_keys.py b/vision/config/api_keys.py
@@ -7,12 +7,12 @@
 langchain_api_key = os.getenv("LANGCHAIN_API_KEY")
 roboflow_api_key = os.getenv("ROBOFLOW_API_KEY")
 
-if google_api_key is None or langchain_api_key is None:
+if google_api_key is None or langchain_api_key is None or roboflow_api_key is None:
     raise ValueError(
-        "Environment variables GOOGLE_API_KEY and LANGCHAIN_API_KEY must be set"
+        "Environment variables GOOGLE_API_KEY and LANGCHAIN_API_KEY and ROBOFLOW_API_KEY must be set"
     )
 
 os.environ["GOOGLE_API_KEY"] = google_api_key
 os.environ["LANGCHAIN_TRACING_V2"] = "true"
 os.environ["LANGCHAIN_API_KEY"] = langchain_api_key
-os.environ["ROBOFLOW_API_KEY"] = roboflow_api_key
+os.environ["ROBOFLOW_API_KEY"] = roboflow_api_key
diff --git a/vision/routes.py b/vision/routes.py
@@ -1,6 +1,6 @@
 import time
 import os
-from fastapi import APIRouter, UploadFile, File, Form, HTTPException
+from fastapi import APIRouter, UploadFile, File, Form, HTTPException, Path
 from fastapi.responses import JSONResponse
 from .config.logging_config import configure_logging
 from .config.mongo import db
@@ -32,9 +32,17 @@ async def process_ocr(image: UploadFile = File(...), expected_values: str = Form
 
         expected_values = parse_json_content(expected_values)
 
-        process_ocr_task.delay(temp_image_path, image.content_type, expected_values)
+        # Start the Celery task
+        task = process_ocr_task.delay(temp_image_path, image.content_type, expected_values)
 
-        return JSONResponse(content={"status": "success"})
+        return JSONResponse(
+            content={
+                "status": "success",
+                "message": "OCR processing started successfully",
+                "task_id": task.id,
+                "processing_note": "The OCR data will be processed and compared with expected values if provided."
+            }
+        )
     except Exception as e:
         logger.error("Error during OCR processing at process_ocr: %s", str(e))
         raise HTTPException(status_code=500, detail="Internal Server Error")
@@ -45,8 +53,10 @@ async def get_orders(page: int = 1, limit: int = 10):
         skip = (page - 1) * limit
         total_orders = db["logs"].count_documents({})
         orders = list(db["logs"].find().skip(skip).limit(limit))
+
         for order in orders:
             order["_id"] = str(order["_id"])
+
         return JSONResponse(
             content={
                 "orders": orders,

diff --git a/vision/tasks/process_ocr_task.py b/vision/tasks/process_ocr_task.py
@@ -1,9 +1,10 @@
 from ..config.celery_worker import celery_app
 import os
+import json
 from ..utils.image_processing import segment_image, encode_image_to_base64, draw_bounding_boxes
 from ..utils.llm_invoke import LLMInvoker
-from ..utils.sanitize import strip_json_markers, parse_json_content
-from ..utils.prompt.load_prompt import load_input_prompt
+from ..utils.sanitize import parse_json_content
+from ..utils.prompt.load_prompt import load_input_prompt, load_comparison_prompt
 from ..constants import MODEL_NAMES
 from ..utils.db_operations import store_order_log_in_db, get_next_order_id
 from ..config.roboflow import get_roboflow_client
@@ -30,17 +31,33 @@ def process_ocr_task(temp_image_path, image_content_type, expected_values):
     encoded_image_base64 = encode_image_to_base64(output_image_path)
     logger.info("Image encoding completed")
 
-    # Call LLM
+    # Call LLM for OCR extraction
     input_prompt = load_input_prompt()
     llm_invoker = LLMInvoker(MODEL_NAMES["GEMINI_FLASH_LITE"])
     ai_msg = llm_invoker.invoke(input_prompt, image_content_type, encoded_image_base64)
     actual_values = parse_json_content(ai_msg.content)
-    logger.info("AI message received from LLM")
+    logger.info("AI message received from LLM for OCR extraction")
+
+    # Perform comparison with expected values if provided
+    review_result = None
+    if expected_values:
+        logger.info("Expected values provided, performing comparison")
+        comparison_prompt = load_comparison_prompt()
+
+        # Create a text-only comparison prompt with expected and actual values
+        comparison_text = f"{comparison_prompt}\n\nEXPECTED VALUES:\n{json.dumps(expected_values, indent=2)}\n\nACTUAL VALUES:\n{json.dumps(actual_values, indent=2)}"
+
+        comparison_invoker = LLMInvoker(MODEL_NAMES["GEMINI_FLASH_LITE"])
+        comparison_msg = comparison_invoker.invoke_text_only(comparison_text)
+        logger.info(f"Comparison message: {comparison_msg}")
+        review_result = parse_json_content(comparison_msg.content)
+        logger.info(f"Comparison result: {review_result}")
+        logger.info(f"Comparison completed with review status: {review_result.get('overall_review', {}).get('status', 'unknown')}")
 
     # Clean up
     os.remove(output_image_path)
     os.remove(segmented_image_path)
     os.remove(temp_image_path)
 
     # Store results in DB
-    store_order_log_in_db(order_id, expected_values, actual_values)
+    store_order_log_in_db(order_id, expected_values, actual_values, review_result)
diff --git a/vision/utils/db_operations.py b/vision/utils/db_operations.py
@@ -4,10 +4,22 @@ def get_next_order_id() -> int:
     """Get the next order ID by counting existing documents"""
     return db["logs"].count_documents({}) + 1
 
-def store_order_log_in_db(order_id: int, expected_values: list, actual_values: dict) -> None:
-    """Insert a new log record into the database"""
-    db["logs"].insert_one({
+def store_order_log_in_db(order_id: int, expected_values: list, actual_values: dict, review_result: dict = None) -> None:
+    """Insert a new log record into the database
+
+    Args:
+        order_id: Unique identifier for the order
+        expected_values: Expected values provided by the user
+        actual_values: Actual values extracted by OCR
+        review_result: Comparison results and review status from AI comparison
+    """
+    log_entry = {
         "order_id": order_id,
         "expected_values": expected_values,
         "actual_values": actual_values
-    })
+    }
+
+    if review_result:
+        log_entry["review"] = review_result
+
+    db["logs"].insert_one(log_entry)
diff --git a/vision/utils/llm_invoke.py b/vision/utils/llm_invoke.py
@@ -20,3 +20,21 @@ def invoke(self, prompt: str, image_content_type: str, segmented_image_base64: s
         )
         ai_msg = self.llm.invoke([message])
         return ai_msg
+
+    def invoke_text_only(self, prompt: str):
+        """
+        Invoke the LLM with text-only prompt, no image.
+
+        Args:
+            prompt (str): The text prompt to send to the LLM
+
+        Returns:
+            The AI message response
+        """
+        message = HumanMessage(
+            content=[
+                {"type": "text", "text": prompt}
+            ]
+        )
+        ai_msg = self.llm.invoke([message])
+        return ai_msg
diff --git a/vision/utils/prompt/comparison_prompt.txt b/vision/utils/prompt/comparison_prompt.txt
@@ -0,0 +1,63 @@
+You are a sophisticated AI system designed to compare expected data with actual extracted data from product packaging and fruit analysis. Your task is to determine if the actual values match the expected values semantically, even if they are not exact text matches.
+
+TASK:
+1. Compare each field in the expected values with the corresponding field in the actual values.
+2. Consider semantic equivalence rather than exact string matching.
+3. Analyze each comparison pair and determine if they are:
+   - MATCH: The values are semantically equivalent even if written differently
+   - PARTIAL MATCH: The values have some overlapping information but don't fully match
+   - MISMATCH: The values have significantly different meanings or critical information is missing
+
+REVIEW PROCESS:
+1. Field-by-Field Comparison:
+   - For each field in the expected values, find its corresponding field in the actual values
+   - Compare the values, accounting for:
+     * Different formatting (dates, weights, measurements)
+     * Synonyms or alternative phrasing
+     * Abbreviated vs. full forms
+     * Case differences
+     * Minor spelling variations
+
+2. Intelligent Analysis:
+   - For ingredients lists, check if all important ingredients are represented (order may differ)
+   - For dates, normalize formats before comparison (YYYY-MM-DD)
+   - For weights/measures, normalize units before comparison (convert g to grams, etc.)
+   - For brand/manufacturer names, consider parent companies and subsidiaries as matches
+
+3. Overall Review Status:
+   - Based on the field-by-field comparison, determine an overall status:
+     * "approved" - All critical fields match or have acceptable minor variations
+     * "needs_review" - Some fields have partial matches or minor discrepancies requiring human review
+     * "unmatched" - Critical fields have significant discrepancies
+
+RESPONSE FORMAT:
+Provide your analysis as a JSON object with the following structure:
+```
+{
+  "field_comparisons": [
+    {
+      "field": "Manufacturer",
+      "expected": "Original expected value",
+      "actual": "Original actual value",
+      "status": "MATCH/PARTIAL_MATCH/MISMATCH",
+      "reasoning": "Brief explanation of why this status was assigned"
+    },
+    ...
+  ],
+  "overall_review": {
+    "status": "approved/needs_review/unmatched",
+    "confidence_score": 0.XX,
+    "explanation": "Brief explanation of the overall status determination",
+    "critical_issues": ["List any critical issues that led to needs_review or unmatched status"]
+  }
+}
+```
+
+IMPORTANT CONSIDERATIONS:
+- Prioritize critical fields (product name, expiry date, ingredients) over less important ones
+- For fruit freshness, consider classification within 1 stage as a partial match (e.g., "Ripe" vs "Overripe")
+- Be lenient with formatting differences but strict with numerical values in critical fields
+- Consider cultural and regional variations in product naming and description
+- Factor in common OCR errors and misspellings in your comparison
+
+Your goal is to provide a reliable assessment that would align with a human expert's judgment on whether the actual extracted data sufficiently matches what was expected.
diff --git a/vision/utils/prompt/load_prompt.py b/vision/utils/prompt/load_prompt.py
@@ -1,5 +1,3 @@
-
-
 import os
 
 def load_input_prompt():
@@ -12,3 +10,14 @@ def load_input_prompt():
     with open(prompt_path, "r") as file:
         prompt = file.read().strip()
     return prompt
+
+def load_comparison_prompt():
+    """
+    Loads the comparison prompt from the comparison_prompt.txt file located in the same directory as this script.
+    Returns:
+        str: The contents of the comparison prompt file.
+    """
+    prompt_path = os.path.join(os.path.dirname(__file__), "comparison_prompt.txt")
+    with open(prompt_path, "r") as file:
+        prompt = file.read().strip()
+    return prompt