buildfastwithai · Akshat-kush007 · Sep 28, 2024 · Sep 29, 2024
diff --git a/gesture-mate/README.md b/gesture-mate/README.md
@@ -0,0 +1,101 @@
+
+# Setup Scripts
+
+## React setup
+
+### `npm i`
+This command will install all required packages for Frontend project
+
+## ADD Gemini API
+
+Create `.env` file in root. In this file ,Add API-KEY as:  
+
+`REACT_APP_API_KEY = Your_API_Key_Here`
+
+
+**Get your api key from Google Ai Studio**  
+
+[Visit : Google AI STUDIO](https://aistudio.google.com/)
+
+---
+### `npm start`
+
+Runs the app in the development mode automatically.\
+Open [http://localhost:3000](http://localhost:3000) to view it in your browser.
+
+---
+## Gesture detection model setup
+This project using an virtual environment to run python scripts to run gesture-detection model
+
+## Steps to create virtual environment:
+
+ 1. Open the **new terminal** in the root of this project and navigate to the API folder:  
+ `cd api`
+
+1. Install virtual environment of python in this folder
+
+    `api> python -m venv venv`
+
+2. Activate the virtual environment:
+
+    `.\venv\Scripts\Activate`
+
+
+    ### If this not works, Change execution policy
+    ---
+
+    **Step 1: Check the Current Execution Policy**
+
+    First, check your current execution policy by running the following command in PowerShell:
+
+    `Get-ExecutionPolicy`
+
+
+    You will likely see `Restricted`, which means that scripts cannot be run.
+
+    **Step 2: Change the Execution Policy**
+
+    Run the following command **as an Administrator** (open PowerShell as Administrator):
+
+    `Set-ExecutionPolicy RemoteSigned -Scope CurrentUse`
+
+    **Step 3: Activate the Virtual Environment Again**
+
+    Now, try activating your virtual environment again:
+
+    `.\venv\Scripts\Activate`
+
+
+    **Step 4: Revert the Execution Policy (Optional)**
+
+    If you want to revert to the original execution policy after you finish your work, you can run:
+
+    `Set-ExecutionPolicy Restricted -Scope CurrentUser`
+
+    ---
+4. Install required packages:
+
+    `pip install flask`
+
+    `pip install python-dotenv` 
+
+    There are .env or .flaskenv files present. Do "pip install python-dotenv" to use them.
+
+    `pip install opencv-python`
+
+    `pip install mediapipe`
+
+    `pip install sklearn`
+
+---
+5. Run flask using 
+
+    `flask run`
+
+6. Deactivate virtual environment, use Command:
+`deactivate`
+
+---
+
+**We are using the Gesture Recognition Model that we trained using sklearn python library.**
+
diff --git a/gesture-mate/api/__pycache__/api.cpython-310.pyc b/gesture-mate/api/__pycache__/api.cpython-310.pyc
diff --git a/gesture-mate/api/__pycache__/api.cpython-312.pyc b/gesture-mate/api/__pycache__/api.cpython-312.pyc
diff --git a/gesture-mate/api/api.py b/gesture-mate/api/api.py
@@ -0,0 +1,147 @@
+from flask import Flask, jsonify
+import pickle
+import cv2
+import mediapipe as mp
+import numpy as np
+import time
+
+app = Flask(__name__)
+
+
+@app.route('/get-query', methods=['GET'])
+def get_query():
+    try:
+        with open('./model.p', 'rb') as f:
+            model_dict = pickle.load(f)
+        model = model_dict['model']
+        print("Model loaded successfully.")
+    except Exception as e:
+        print(f"Error loading model: {e}")
+        exit()
+
+
+
+    # Initialize Video Capture (try different indices if 0 doesn't work)
+    cap = cv2.VideoCapture(0)
+
+    if not cap.isOpened():
+        print("Error: Could not open the camera.")
+        exit()
+
+    print("Camera opened successfully. Starting prediction. Press 'q' to quit.")
+
+    # Initialize MediaPipe Hands
+    mp_hands = mp.solutions.hands
+    mp_drawing = mp.solutions.drawing_utils
+    mp_drawing_styles = mp.solutions.drawing_styles
+
+    hands = mp_hands.Hands(
+        static_image_mode=False,  # Optimized for video
+        max_num_hands=1,          # Process one hand at a time
+        min_detection_confidence=0.3
+    )
+
+    # Define label dictionary for predicted gestures
+    labels_dict = {0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E',
+              5: 'F', 6: 'G', 7: 'H', 8: 'I', 9: 'J',
+              10: 'K', 11: 'L', 12: 'M', 13: 'N', 14: 'O',
+              15: 'P', 16: 'Q', 17: 'R', 18: 'S', 19: 'T',
+              20: 'U', 21: 'V', 22: 'W', 23: 'X', 24: 'Y',
+              25: 'Z', 26: 'q', 27: ' '}
+
+    statement = ""  # This will hold the string of detected gestures
+    last_predicted_character = None  # To track the last gesture to detect changes
+
+    while True:
+        ret, frame = cap.read()
+
+        if not ret:
+            print("Error: Failed to capture image.")
+            break
+
+        # Get frame dimensions
+        H, W, _ = frame.shape
+
+        # Convert the BGR image to RGB (MediaPipe processes RGB)
+        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+
+        # Process the frame and detect hands
+        results = hands.process(frame_rgb)
+
+        if results.multi_hand_landmarks:
+            for hand_landmarks in results.multi_hand_landmarks:
+                # Draw hand landmarks on the frame
+                mp_drawing.draw_landmarks(
+                    frame,
+                    hand_landmarks,
+                    mp_hands.HAND_CONNECTIONS,
+                    mp_drawing_styles.get_default_hand_landmarks_style(),
+                    mp_drawing_styles.get_default_hand_connections_style()
+                )
+
+                # Initialize lists to hold normalized coordinates
+                data_aux = []
+                x_coords = []
+                y_coords = []
+
+                # Extract x and y coordinates for all landmarks
+                for landmark in hand_landmarks.landmark:
+                    x_coords.append(landmark.x)
+                    y_coords.append(landmark.y)
+
+                # Calculate minimum x and y for normalization
+                min_x = min(x_coords) if x_coords else 0
+                min_y = min(y_coords) if y_coords else 0
+
+                # Normalize and append coordinates to data_aux
+                for x, y in zip(x_coords, y_coords):
+                    normalized_x = x - min_x
+                    normalized_y = y - min_y
+                    data_aux.append(normalized_x)
+                    data_aux.append(normalized_y)
+
+                # Ensure exactly 42 features (21 landmarks * 2)
+                if len(data_aux) == 42:
+                    try:
+                        # Predict the gesture using the model
+                        prediction = model.predict([np.asarray(data_aux)])
+                        predicted_character = labels_dict.get(int(prediction[0]), 'Unknown')
+
+                        if predicted_character == 'q' :
+                            return jsonify({"query": statement}) 
+
+                        # Only append if the gesture has changed
+                        if predicted_character != last_predicted_character:
+                            statement += predicted_character  # Add to statement
+                            last_predicted_character = predicted_character  # Update last gesture
+                            print(f"Predicted character: {predicted_character}")
+                            print(f"***** ALERT: Given String: {statement} *****")
+
+                    except Exception as e:
+                        print(f"Prediction error: {e}")
+                        predicted_character = 'Error'
+
+                    # Calculate bounding box for annotation
+                    x1 = int(min(x_coords) * W) - 10
+                    y1 = int(min(y_coords) * H) - 10
+                    x2 = int(max(x_coords) * W) + 10
+                    y2 = int(max(y_coords) * H) + 10
+
+                    # Draw rectangle and predicted character on the frame
+                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)
+                    cv2.putText(frame, predicted_character, (x1, y1 - 10),
+                                cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3, cv2.LINE_AA)
+                else:
+                    print(f"Skipping prediction: Expected 42 features, got {len(data_aux)}.")
+
+        # Display the frame with hand landmarks and gesture annotations
+        cv2.imshow('frame', frame)
+
+        # Exit the loop when 'q' is pressed
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+            print("Exiting prediction loop.")
+            break
+    return jsonify({"query": statement})
+    # Release the video capture and close windows
+    cap.release()
+    cv2.destroyAllWindows()
diff --git a/gesture-mate/api/data.pickle b/gesture-mate/api/data.pickle
diff --git a/gesture-mate/api/inference_classifier.py b/gesture-mate/api/inference_classifier.py
@@ -0,0 +1,133 @@
+import pickle
+import cv2
+import mediapipe as mp
+import numpy as np
+import warnings
+
+
+# Suppress specific user warnings regarding deprecated methods in protobuf
+warnings.filterwarnings("ignore", category=UserWarning, message="SymbolDatabase.GetPrototype() is deprecated.")
+
+# Load the pre-trained model
+try:
+    with open('./model.p', 'rb') as f:
+        model_dict = pickle.load(f)
+    model = model_dict['model']
+    print("Model loaded successfully.")
+except Exception as e:
+    print(f"Error loading model: {e}")
+    exit()
+
+# Initialize Video Capture (try different indices if 0 doesn't work)
+cap = cv2.VideoCapture(0)
+
+if not cap.isOpened():
+    print("Error: Could not open the camera.")
+    exit()
+
+print("Camera opened successfully. Starting prediction. Press 'q' to quit.")
+
+# Initialize MediaPipe Hands
+mp_hands = mp.solutions.hands
+mp_drawing = mp.solutions.drawing_utils
+mp_drawing_styles = mp.solutions.drawing_styles
+
+hands = mp_hands.Hands(
+    static_image_mode=False,  # Optimized for video
+    max_num_hands=1,          # Process one hand at a time
+    min_detection_confidence=0.3
+)
+
+# Define label dictionary
+labels_dict = {0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E',
+              5: 'F', 6: 'G', 7: 'H', 8: 'I', 9: 'J',
+              10: 'K', 11: 'L', 12: 'M', 13: 'N', 14: 'O',
+              15: 'P', 16: 'Q', 17: 'R', 18: 'S', 19: 'T',
+              20: 'U', 21: 'V', 22: 'W', 23: 'X', 24: 'Y',
+              25: 'Z', 26: 'q', 27: 'space'}
+
+while True:
+    ret, frame = cap.read()
+
+    if not ret:
+        print("Error: Failed to capture image.")
+        break
+
+    # Get frame dimensions
+    H, W, _ = frame.shape
+
+    # Convert the BGR image to RGB
+    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+
+    # Process the frame and detect hands
+    results = hands.process(frame_rgb)
+
+    if results.multi_hand_landmarks:
+        for hand_landmarks in results.multi_hand_landmarks:
+            # Draw hand landmarks on the frame
+            mp_drawing.draw_landmarks(
+                frame, 
+                hand_landmarks, 
+                mp_hands.HAND_CONNECTIONS,
+                mp_drawing_styles.get_default_hand_landmarks_style(),
+                mp_drawing_styles.get_default_hand_connections_style()
+            )
+
+            # Initialize lists to hold normalized coordinates
+            data_aux = []
+            x_coords = []
+            y_coords = []
+
+            # Extract x and y coordinates for all landmarks
+            for landmark in hand_landmarks.landmark:
+                x_coords.append(landmark.x)
+                y_coords.append(landmark.y)
+
+            # Calculate minimum x and y for normalization
+            min_x = min(x_coords) if x_coords else 0
+            min_y = min(y_coords) if y_coords else 0
+
+            # Normalize and append coordinates
+            for x, y in zip(x_coords, y_coords):
+                normalized_x = x - min_x
+                normalized_y = y - min_y
+                data_aux.append(normalized_x)
+                data_aux.append(normalized_y)
+
+            # Ensure exactly 42 features (21 landmarks * 2)
+            if len(data_aux) == 42:
+                # Calculate bounding box for annotation
+                x1 = int(min(x_coords) * W) - 10
+                y1 = int(min(y_coords) * H) - 10
+                x2 = int(max(x_coords) * W) + 10
+                y2 = int(max(y_coords) * H) + 10
+
+                # Predict using the model
+                try:
+                    prediction = model.predict([np.asarray(data_aux)])
+                    predicted_character = labels_dict.get(int(prediction[0]), 'Unknown')
+                    if predicted_character == 'q' :
+                        print("Exiting prediction loop.") 
+                        exit(1)
+                except Exception as e:
+                    print(f"Prediction error: {e}")
+                    predicted_character = 'Error'
+
+                # Draw rectangle and predicted character
+                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 0), 4)
+                cv2.putText(frame, predicted_character, (x1, y1 - 10),
+                            cv2.FONT_HERSHEY_SIMPLEX, 1.3, (0, 0, 0), 3, cv2.LINE_AA)
+            else:
+                print(f"Skipping prediction: Expected 42 features, got {len(data_aux)}.")
+
+    # Display the frame
+    cv2.imshow('frame', frame)
+
+    # Exit when 'q' is pressed
+    if cv2.waitKey(1) & 0xFF == ord('q'):
+        print("Exiting prediction loop.")
+        break
+
+# Release resources
+cap.release()
+cv2.destroyAllWindows()
diff --git a/gesture-mate/api/model.p b/gesture-mate/api/model.p
diff --git a/gesture-mate/api/readme.md b/gesture-mate/api/readme.md
@@ -0,0 +1,6 @@
+# Packages used by Gesture detection Model
+
+pip install opencv-python  
+pip install mediapipe  
+pip install pickle  
+pip install sklearn