Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions frontend/src/assets/default_examples/default_values.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
[
{
"image": "example1.png",
"values": {
"manufacturer": "Example Manufacturer",
"productName": "Example Product",
"ingredients": "Example Ingredients",
"manufacturingDate": "2023-01-01",
"expiryDate": "2024-01-01",
"netWeight": "100g",
"barcode": "123456789",
"otherDetails": "Example Details"
}
},
{
"image": "example2.png",
"values": {
"manufacturer": "Another Manufacturer",
"productName": "Another Product",
"ingredients": "Another Ingredients",
"manufacturingDate": "2023-02-01",
"expiryDate": "2024-02-01",
"netWeight": "200g",
"barcode": "987654321",
"otherDetails": "Another Details"
}
}
]
18 changes: 18 additions & 0 deletions frontend/src/components/Sidebar.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import { Link } from 'react-router-dom';

function Sidebar() {
return (
<nav className="bg-secondary flex max-w-20 flex-shrink-0">
<ul className="space-y-2 p-4">
<li>
<Link to="/" className="text-white block py-2 px-4 rounded hover:bg-highlight">Dashboard</Link>
</li>
<li>
<Link to="/test" className="text-white block py-2 px-4 rounded hover:bg-highlight">Test</Link>
</li>
</ul>
</nav>
);
}

export default Sidebar;
30 changes: 30 additions & 0 deletions utils 19-33-32-223/image_process.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import cv2
import numpy as np

def preprocess_image(img_path):
# Read the image
img = cv2.imread(img_path)

# Normalize brightness and contrast using histogram equalization
img_yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV)
img_yuv[:,:,0] = cv2.equalizeHist(img_yuv[:,:,0])
img_normalized = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2BGR)

# Apply bilateral filtering to reduce noise but preserve edges
img_filtered = cv2.bilateralFilter(img_normalized, 9, 75, 75)

# Segmentation using GrabCut
mask = np.zeros(img.shape[:2], np.uint8)
bgdModel = np.zeros((1, 65), np.float64)
fgdModel = np.zeros((1, 65), np.float64)
rect = (50, 50, img.shape[1]-50, img.shape[0]-50) # Rectangle for the object
cv2.grabCut(img_filtered, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT)
mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')
img_segmented = img_filtered * mask2[:, :, np.newaxis]

return img_segmented


# Example usage
img_filtered = preprocess_image('dataset/object_1/image.png')
cv2.imwrite('preprocessed_image.jpg', img_filtered)
6 changes: 3 additions & 3 deletions vision/config/api_keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
langchain_api_key = os.getenv("LANGCHAIN_API_KEY")
roboflow_api_key = os.getenv("ROBOFLOW_API_KEY")

if google_api_key is None or langchain_api_key is None:
if google_api_key is None or langchain_api_key is None or roboflow_api_key is None:
raise ValueError(
"Environment variables GOOGLE_API_KEY and LANGCHAIN_API_KEY must be set"
"Environment variables GOOGLE_API_KEY and LANGCHAIN_API_KEY and ROBOFLOW_API_KEY must be set"
)

os.environ["GOOGLE_API_KEY"] = google_api_key
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = langchain_api_key
os.environ["ROBOFLOW_API_KEY"] = roboflow_api_key
os.environ["ROBOFLOW_API_KEY"] = roboflow_api_key
16 changes: 13 additions & 3 deletions vision/routes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import time
import os
from fastapi import APIRouter, UploadFile, File, Form, HTTPException
from fastapi import APIRouter, UploadFile, File, Form, HTTPException, Path
from fastapi.responses import JSONResponse
from .config.logging_config import configure_logging
from .config.mongo import db
Expand Down Expand Up @@ -32,9 +32,17 @@ async def process_ocr(image: UploadFile = File(...), expected_values: str = Form

expected_values = parse_json_content(expected_values)

process_ocr_task.delay(temp_image_path, image.content_type, expected_values)
# Start the Celery task
task = process_ocr_task.delay(temp_image_path, image.content_type, expected_values)

return JSONResponse(content={"status": "success"})
return JSONResponse(
content={
"status": "success",
"message": "OCR processing started successfully",
"task_id": task.id,
"processing_note": "The OCR data will be processed and compared with expected values if provided."
}
)
except Exception as e:
logger.error("Error during OCR processing at process_ocr: %s", str(e))
raise HTTPException(status_code=500, detail="Internal Server Error")
Expand All @@ -45,8 +53,10 @@ async def get_orders(page: int = 1, limit: int = 10):
skip = (page - 1) * limit
total_orders = db["logs"].count_documents({})
orders = list(db["logs"].find().skip(skip).limit(limit))

for order in orders:
order["_id"] = str(order["_id"])

return JSONResponse(
content={
"orders": orders,
Expand Down
27 changes: 22 additions & 5 deletions vision/tasks/process_ocr_task.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from ..config.celery_worker import celery_app
import os
import json
from ..utils.image_processing import segment_image, encode_image_to_base64, draw_bounding_boxes
from ..utils.llm_invoke import LLMInvoker
from ..utils.sanitize import strip_json_markers, parse_json_content
from ..utils.prompt.load_prompt import load_input_prompt
from ..utils.sanitize import parse_json_content
from ..utils.prompt.load_prompt import load_input_prompt, load_comparison_prompt
from ..constants import MODEL_NAMES
from ..utils.db_operations import store_order_log_in_db, get_next_order_id
from ..config.roboflow import get_roboflow_client
Expand All @@ -30,17 +31,33 @@ def process_ocr_task(temp_image_path, image_content_type, expected_values):
encoded_image_base64 = encode_image_to_base64(output_image_path)
logger.info("Image encoding completed")

# Call LLM
# Call LLM for OCR extraction
input_prompt = load_input_prompt()
llm_invoker = LLMInvoker(MODEL_NAMES["GEMINI_FLASH_LITE"])
ai_msg = llm_invoker.invoke(input_prompt, image_content_type, encoded_image_base64)
actual_values = parse_json_content(ai_msg.content)
logger.info("AI message received from LLM")
logger.info("AI message received from LLM for OCR extraction")

# Perform comparison with expected values if provided
review_result = None
if expected_values:
logger.info("Expected values provided, performing comparison")
comparison_prompt = load_comparison_prompt()

# Create a text-only comparison prompt with expected and actual values
comparison_text = f"{comparison_prompt}\n\nEXPECTED VALUES:\n{json.dumps(expected_values, indent=2)}\n\nACTUAL VALUES:\n{json.dumps(actual_values, indent=2)}"

comparison_invoker = LLMInvoker(MODEL_NAMES["GEMINI_FLASH_LITE"])
comparison_msg = comparison_invoker.invoke_text_only(comparison_text)
logger.info(f"Comparison message: {comparison_msg}")
review_result = parse_json_content(comparison_msg.content)
logger.info(f"Comparison result: {review_result}")
logger.info(f"Comparison completed with review status: {review_result.get('overall_review', {}).get('status', 'unknown')}")

# Clean up
os.remove(output_image_path)
os.remove(segmented_image_path)
os.remove(temp_image_path)

# Store results in DB
store_order_log_in_db(order_id, expected_values, actual_values)
store_order_log_in_db(order_id, expected_values, actual_values, review_result)
20 changes: 16 additions & 4 deletions vision/utils/db_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,22 @@ def get_next_order_id() -> int:
"""Get the next order ID by counting existing documents"""
return db["logs"].count_documents({}) + 1

def store_order_log_in_db(order_id: int, expected_values: list, actual_values: dict) -> None:
"""Insert a new log record into the database"""
db["logs"].insert_one({
def store_order_log_in_db(order_id: int, expected_values: list, actual_values: dict, review_result: dict = None) -> None:
"""Insert a new log record into the database

Args:
order_id: Unique identifier for the order
expected_values: Expected values provided by the user
actual_values: Actual values extracted by OCR
review_result: Comparison results and review status from AI comparison
"""
log_entry = {
"order_id": order_id,
"expected_values": expected_values,
"actual_values": actual_values
})
}

if review_result:
log_entry["review"] = review_result

db["logs"].insert_one(log_entry)
18 changes: 18 additions & 0 deletions vision/utils/llm_invoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,21 @@ def invoke(self, prompt: str, image_content_type: str, segmented_image_base64: s
)
ai_msg = self.llm.invoke([message])
return ai_msg

def invoke_text_only(self, prompt: str):
"""
Invoke the LLM with text-only prompt, no image.

Args:
prompt (str): The text prompt to send to the LLM

Returns:
The AI message response
"""
message = HumanMessage(
content=[
{"type": "text", "text": prompt}
]
)
ai_msg = self.llm.invoke([message])
return ai_msg
63 changes: 63 additions & 0 deletions vision/utils/prompt/comparison_prompt.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
You are a sophisticated AI system designed to compare expected data with actual extracted data from product packaging and fruit analysis. Your task is to determine if the actual values match the expected values semantically, even if they are not exact text matches.

TASK:
1. Compare each field in the expected values with the corresponding field in the actual values.
2. Consider semantic equivalence rather than exact string matching.
3. Analyze each comparison pair and determine if they are:
- MATCH: The values are semantically equivalent even if written differently
- PARTIAL MATCH: The values have some overlapping information but don't fully match
- MISMATCH: The values have significantly different meanings or critical information is missing

REVIEW PROCESS:
1. Field-by-Field Comparison:
- For each field in the expected values, find its corresponding field in the actual values
- Compare the values, accounting for:
* Different formatting (dates, weights, measurements)
* Synonyms or alternative phrasing
* Abbreviated vs. full forms
* Case differences
* Minor spelling variations

2. Intelligent Analysis:
- For ingredients lists, check if all important ingredients are represented (order may differ)
- For dates, normalize formats before comparison (YYYY-MM-DD)
- For weights/measures, normalize units before comparison (convert g to grams, etc.)
- For brand/manufacturer names, consider parent companies and subsidiaries as matches

3. Overall Review Status:
- Based on the field-by-field comparison, determine an overall status:
* "approved" - All critical fields match or have acceptable minor variations
* "needs_review" - Some fields have partial matches or minor discrepancies requiring human review
* "unmatched" - Critical fields have significant discrepancies

RESPONSE FORMAT:
Provide your analysis as a JSON object with the following structure:
```
{
"field_comparisons": [
{
"field": "Manufacturer",
"expected": "Original expected value",
"actual": "Original actual value",
"status": "MATCH/PARTIAL_MATCH/MISMATCH",
"reasoning": "Brief explanation of why this status was assigned"
},
...
],
"overall_review": {
"status": "approved/needs_review/unmatched",
"confidence_score": 0.XX,
"explanation": "Brief explanation of the overall status determination",
"critical_issues": ["List any critical issues that led to needs_review or unmatched status"]
}
}
```

IMPORTANT CONSIDERATIONS:
- Prioritize critical fields (product name, expiry date, ingredients) over less important ones
- For fruit freshness, consider classification within 1 stage as a partial match (e.g., "Ripe" vs "Overripe")
- Be lenient with formatting differences but strict with numerical values in critical fields
- Consider cultural and regional variations in product naming and description
- Factor in common OCR errors and misspellings in your comparison

Your goal is to provide a reliable assessment that would align with a human expert's judgment on whether the actual extracted data sufficiently matches what was expected.
13 changes: 11 additions & 2 deletions vision/utils/prompt/load_prompt.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@


import os

def load_input_prompt():
Expand All @@ -12,3 +10,14 @@ def load_input_prompt():
with open(prompt_path, "r") as file:
prompt = file.read().strip()
return prompt

def load_comparison_prompt():
"""
Loads the comparison prompt from the comparison_prompt.txt file located in the same directory as this script.
Returns:
str: The contents of the comparison prompt file.
"""
prompt_path = os.path.join(os.path.dirname(__file__), "comparison_prompt.txt")
with open(prompt_path, "r") as file:
prompt = file.read().strip()
return prompt