diff --git a/Documents/Chatbot Database.drawio b/Documents/Chatbot Database.drawio new file mode 100644 index 00000000..6f155eaf --- /dev/null +++ b/Documents/Chatbot Database.drawio @@ -0,0 +1,64 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Documents/Mes_Copilot_ Chatbot Database Modelling.pptx b/Documents/Mes_Copilot_ Chatbot Database Modelling.pptx new file mode 100644 index 00000000..d38e40fa Binary files /dev/null and b/Documents/Mes_Copilot_ Chatbot Database Modelling.pptx differ diff --git a/Documents/chatbot history-Workflow_02 2.pptx b/Documents/chatbot history-Workflow_02 2.pptx new file mode 100644 index 00000000..44eb9662 Binary files /dev/null and b/Documents/chatbot history-Workflow_02 2.pptx differ diff --git a/Documents/diagrams/Chatbot Database.drawio b/Documents/diagrams/Chatbot Database.drawio new file mode 100644 index 00000000..d46cde7c --- /dev/null +++ b/Documents/diagrams/Chatbot Database.drawio @@ -0,0 +1,67 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Documents/diagrams/Mes_Copilot_ Chatbot Database Modelling.pptx b/Documents/diagrams/Mes_Copilot_ Chatbot Database Modelling.pptx new file mode 100644 index 00000000..d38e40fa Binary files /dev/null and b/Documents/diagrams/Mes_Copilot_ Chatbot Database Modelling.pptx differ diff --git a/code/nb_files/krishna_doc.txt b/code/nb_files/krishna_doc.txt new file mode 100644 index 00000000..5b2f1b2f --- /dev/null +++ b/code/nb_files/krishna_doc.txt @@ -0,0 +1,3 @@ +Krishna Dipayan Bhunia is a Senior Software Engineer. +He works in Capgemini as a Senior Data Engineer. +In 2012 he was working for Godrej and Boyce as Full stack Developer. diff --git a/code/nb_files/krishna_resume.pdf b/code/nb_files/krishna_resume.pdf new file mode 100644 index 00000000..8e525b3c Binary files /dev/null and b/code/nb_files/krishna_resume.pdf differ diff --git a/code/nb_files/rag.ipynb b/code/nb_files/rag.ipynb new file mode 100644 index 00000000..53d6bc09 --- /dev/null +++ b/code/nb_files/rag.ipynb @@ -0,0 +1,823 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: faiss-cpu in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (1.9.0)\n", + "Requirement already satisfied: transformers in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (4.45.2)\n", + "Requirement already satisfied: sentence-transformers in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (3.2.0)\n", + "Requirement already satisfied: numpy<3.0,>=1.25.0 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from faiss-cpu) (1.26.4)\n", + "Requirement already satisfied: packaging in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from faiss-cpu) (24.1)\n", + "Requirement already satisfied: filelock in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from transformers) (3.16.1)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.23.2 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from transformers) (0.25.1)\n", + "Requirement already satisfied: pyyaml>=5.1 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from transformers) (6.0.2)\n", + "Requirement already satisfied: regex!=2019.12.17 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from transformers) (2024.9.11)\n", + "Requirement already satisfied: requests in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from transformers) (2.32.3)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from transformers) (0.4.5)\n", + "Requirement already satisfied: tokenizers<0.21,>=0.20 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from transformers) (0.20.0)\n", + "Requirement already satisfied: tqdm>=4.27 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from transformers) (4.66.5)\n", + "Requirement already satisfied: torch>=1.11.0 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from sentence-transformers) (2.4.1)\n", + "Requirement already satisfied: scikit-learn in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from sentence-transformers) (1.5.2)\n", + "Requirement already satisfied: scipy in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from sentence-transformers) (1.14.1)\n", + "Requirement already satisfied: Pillow in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from sentence-transformers) (10.4.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from huggingface-hub<1.0,>=0.23.2->transformers) (2024.9.0)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from huggingface-hub<1.0,>=0.23.2->transformers) (4.12.2)\n", + "Requirement already satisfied: sympy in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers) (1.13.3)\n", + "Requirement already satisfied: networkx in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers) (3.4.1)\n", + "Requirement already satisfied: jinja2 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers) (3.1.4)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers) (12.1.105)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers) (9.1.0.70)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers) (12.1.3.1)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers) (11.0.2.54)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers) (10.3.2.106)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers) (11.4.5.107)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers) (12.1.0.106)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.20.5 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers) (2.20.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers) (12.1.105)\n", + "Requirement already satisfied: triton==3.0.0 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence-transformers) (3.0.0)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.11.0->sentence-transformers) (12.6.77)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from requests->transformers) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from requests->transformers) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from requests->transformers) (2.2.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from requests->transformers) (2024.8.30)\n", + "Requirement already satisfied: joblib>=1.2.0 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from scikit-learn->sentence-transformers) (1.4.2)\n", + "Requirement already satisfied: threadpoolctl>=3.1.0 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from scikit-learn->sentence-transformers) (3.5.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from jinja2->torch>=1.11.0->sentence-transformers) (3.0.1)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from sympy->torch>=1.11.0->sentence-transformers) (1.3.0)\n", + "Requirement already satisfied: sentence_transformers in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (3.2.0)\n", + "Requirement already satisfied: transformers<5.0.0,>=4.41.0 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from sentence_transformers) (4.45.2)\n", + "Requirement already satisfied: tqdm in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from sentence_transformers) (4.66.5)\n", + "Requirement already satisfied: torch>=1.11.0 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from sentence_transformers) (2.4.1)\n", + "Requirement already satisfied: scikit-learn in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from sentence_transformers) (1.5.2)\n", + "Requirement already satisfied: scipy in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from sentence_transformers) (1.14.1)\n", + "Requirement already satisfied: huggingface-hub>=0.20.0 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from sentence_transformers) (0.25.1)\n", + "Requirement already satisfied: Pillow in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from sentence_transformers) (10.4.0)\n", + "Requirement already satisfied: filelock in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from huggingface-hub>=0.20.0->sentence_transformers) (3.16.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from huggingface-hub>=0.20.0->sentence_transformers) (2024.9.0)\n", + "Requirement already satisfied: packaging>=20.9 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from huggingface-hub>=0.20.0->sentence_transformers) (24.1)\n", + "Requirement already satisfied: pyyaml>=5.1 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from huggingface-hub>=0.20.0->sentence_transformers) (6.0.2)\n", + "Requirement already satisfied: requests in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from huggingface-hub>=0.20.0->sentence_transformers) (2.32.3)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from huggingface-hub>=0.20.0->sentence_transformers) (4.12.2)\n", + "Requirement already satisfied: sympy in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence_transformers) (1.13.3)\n", + "Requirement already satisfied: networkx in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence_transformers) (3.4.1)\n", + "Requirement already satisfied: jinja2 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence_transformers) (3.1.4)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence_transformers) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence_transformers) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence_transformers) (12.1.105)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence_transformers) (9.1.0.70)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence_transformers) (12.1.3.1)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence_transformers) (11.0.2.54)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence_transformers) (10.3.2.106)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence_transformers) (11.4.5.107)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence_transformers) (12.1.0.106)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.20.5 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence_transformers) (2.20.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence_transformers) (12.1.105)\n", + "Requirement already satisfied: triton==3.0.0 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from torch>=1.11.0->sentence_transformers) (3.0.0)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.11.0->sentence_transformers) (12.6.77)\n", + "Requirement already satisfied: numpy>=1.17 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from transformers<5.0.0,>=4.41.0->sentence_transformers) (1.26.4)\n", + "Requirement already satisfied: regex!=2019.12.17 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from transformers<5.0.0,>=4.41.0->sentence_transformers) (2024.9.11)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from transformers<5.0.0,>=4.41.0->sentence_transformers) (0.4.5)\n", + "Requirement already satisfied: tokenizers<0.21,>=0.20 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from transformers<5.0.0,>=4.41.0->sentence_transformers) (0.20.0)\n", + "Requirement already satisfied: joblib>=1.2.0 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from scikit-learn->sentence_transformers) (1.4.2)\n", + "Requirement already satisfied: threadpoolctl>=3.1.0 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from scikit-learn->sentence_transformers) (3.5.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from jinja2->torch>=1.11.0->sentence_transformers) (3.0.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from requests->huggingface-hub>=0.20.0->sentence_transformers) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from requests->huggingface-hub>=0.20.0->sentence_transformers) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from requests->huggingface-hub>=0.20.0->sentence_transformers) (2.2.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from requests->huggingface-hub>=0.20.0->sentence_transformers) (2024.8.30)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from sympy->torch>=1.11.0->sentence_transformers) (1.3.0)\n" + ] + } + ], + "source": [ + "! pip install faiss-cpu transformers sentence-transformers\n", + "\n", + "! pip install sentence_transformers" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`SentenceTransformer._target_device` has been deprecated, please use `SentenceTransformer.device` instead.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 6.76568523e-02 6.34959117e-02 4.87131737e-02 7.93049857e-02\n", + " 3.74480374e-02 2.65278690e-03 3.93749885e-02 -7.09843170e-03\n", + " 5.93614578e-02 3.15370336e-02 6.00980371e-02 -5.29051535e-02\n", + " 4.06068042e-02 -2.59308834e-02 2.98428070e-02 1.12692104e-03\n", + " 7.35148638e-02 -5.03819548e-02 -1.22386672e-01 2.37029027e-02\n", + " 2.97264531e-02 4.24768589e-02 2.56337505e-02 1.99519075e-03\n", + " -5.69191128e-02 -2.71598138e-02 -3.29036042e-02 6.60248548e-02\n", + " 1.19007125e-01 -4.58790474e-02 -7.26214498e-02 -3.25839408e-02\n", + " 5.23414090e-02 4.50553074e-02 8.25307518e-03 3.67023982e-02\n", + " -1.39415488e-02 6.53919429e-02 -2.64272522e-02 2.06378274e-04\n", + " -1.36643331e-02 -3.62810642e-02 -1.95043199e-02 -2.89738514e-02\n", + " 3.94270048e-02 -8.84090886e-02 2.62423395e-03 1.36713609e-02\n", + " 4.83062416e-02 -3.11565381e-02 -1.17329188e-01 -5.11690006e-02\n", + " -8.85287821e-02 -2.18963344e-02 1.42985992e-02 4.44168001e-02\n", + " -1.34815527e-02 7.43392482e-02 2.66382527e-02 -1.98762529e-02\n", + " 1.79191548e-02 -1.06052700e-02 -9.04262960e-02 2.13269386e-02\n", + " 1.41204819e-01 -6.47169538e-03 -1.40383583e-03 -1.53609812e-02\n", + " -8.73571709e-02 7.22174346e-02 2.01403201e-02 4.25587930e-02\n", + " -3.49013992e-02 3.19583225e-04 -8.02970901e-02 -3.27472650e-02\n", + " 2.85268333e-02 -5.13658002e-02 1.09389149e-01 8.19328055e-02\n", + " -9.84040275e-02 -9.34095234e-02 -1.51292169e-02 4.51248661e-02\n", + " 4.94171530e-02 -2.51868218e-02 1.57077685e-02 -1.29290849e-01\n", + " 5.31888893e-03 4.02342947e-03 -2.34572385e-02 -6.72983006e-02\n", + " 2.92281136e-02 -2.60844957e-02 1.30625134e-02 -3.11663132e-02\n", + " -4.82713766e-02 -5.58859594e-02 -3.87505405e-02 1.20010786e-01\n", + " -1.03923846e-02 4.89705354e-02 5.53536788e-02 4.49358746e-02\n", + " -4.00972459e-03 -1.02959722e-01 -2.92968489e-02 -5.83402403e-02\n", + " 2.70472486e-02 -2.20169798e-02 -7.22241625e-02 -4.13869508e-02\n", + " -1.93298627e-02 2.73333024e-03 2.76970823e-04 -9.67588052e-02\n", + " -1.00574777e-01 -1.41922375e-02 -8.07891935e-02 4.53925766e-02\n", + " 2.45041642e-02 5.97614162e-02 -7.38185942e-02 1.19843995e-02\n", + " -6.63403869e-02 -7.69044608e-02 3.85157540e-02 -5.59361962e-33\n", + " 2.80013494e-02 -5.60784712e-02 -4.86601889e-02 2.15569288e-02\n", + " 6.01980388e-02 -4.81402874e-02 -3.50246802e-02 1.93313621e-02\n", + " -1.75151881e-02 -3.89210396e-02 -3.81067069e-03 -1.70287490e-02\n", + " 2.82100495e-02 1.28290346e-02 4.71601337e-02 6.21030182e-02\n", + " -6.43588603e-02 1.29285663e-01 -1.31230969e-02 5.23068644e-02\n", + " -3.73680368e-02 2.89094206e-02 -1.68981794e-02 -2.37330589e-02\n", + " -3.33491936e-02 -5.16762733e-02 1.55356722e-02 2.08803043e-02\n", + " -1.25371413e-02 4.59579043e-02 3.72720510e-02 2.80567184e-02\n", + " -5.90005554e-02 -1.16987983e-02 4.92182523e-02 4.70328368e-02\n", + " 7.35487938e-02 -3.70529667e-02 3.98458168e-03 1.06412461e-02\n", + " -1.61518197e-04 -5.27166203e-02 2.75927819e-02 -3.92921604e-02\n", + " 8.44718069e-02 4.86860536e-02 -4.85872431e-03 1.79948155e-02\n", + " -4.28570211e-02 1.23375189e-02 6.39956072e-03 4.04822268e-02\n", + " 1.48887532e-02 -1.53941112e-02 7.62947872e-02 2.37043556e-02\n", + " 4.45237122e-02 5.08196130e-02 -2.31251237e-03 -1.88737400e-02\n", + " -1.23335691e-02 4.66002040e-02 -5.63438274e-02 6.29927516e-02\n", + " -3.15535292e-02 3.24912705e-02 2.34673917e-02 -6.55437931e-02\n", + " 2.01710071e-02 2.57082209e-02 -1.23868510e-02 -8.36490560e-03\n", + " -6.64377436e-02 9.43074226e-02 -3.57092991e-02 -3.42483111e-02\n", + " -6.66356552e-03 -8.01526196e-03 -3.09711043e-02 4.33012322e-02\n", + " -8.21400341e-03 -1.50794983e-01 3.07692345e-02 4.00719047e-02\n", + " -3.79293561e-02 1.93219632e-03 4.00530398e-02 -8.77074450e-02\n", + " -3.68491784e-02 8.57956614e-03 -3.19251828e-02 -1.25258118e-02\n", + " 7.35538602e-02 1.34734251e-03 2.05918197e-02 2.71097760e-33\n", + " -5.18577099e-02 5.78360707e-02 -9.18985382e-02 3.94421555e-02\n", + " 1.05576530e-01 -1.96912363e-02 6.18402474e-02 -7.63465017e-02\n", + " 2.40880344e-02 9.40049514e-02 -1.16535492e-01 3.71198282e-02\n", + " 5.22425212e-02 -3.95854376e-03 5.72215021e-02 5.32860495e-03\n", + " 1.24016851e-01 1.39022358e-02 -1.10249920e-02 3.56053188e-02\n", + " -3.30754668e-02 8.16574320e-02 -1.52003858e-02 6.05585389e-02\n", + " -6.01397417e-02 3.26102600e-02 -3.48296501e-02 -1.69882085e-02\n", + " -9.74907279e-02 -2.71483976e-02 1.74711330e-03 -7.68982694e-02\n", + " -4.31858189e-02 -1.89985577e-02 -2.91660726e-02 5.77488095e-02\n", + " 2.41822079e-02 -1.16901658e-02 -6.21435530e-02 2.84351911e-02\n", + " -2.37499102e-04 -2.51783542e-02 4.39631986e-03 8.12840015e-02\n", + " 3.64184640e-02 -6.04006015e-02 -3.65517512e-02 -7.93748498e-02\n", + " -5.08535048e-03 6.69699535e-02 -1.17784351e-01 3.23743261e-02\n", + " -4.71251607e-02 -1.34459957e-02 -9.48445350e-02 8.24948866e-03\n", + " -1.06748808e-02 -6.81881532e-02 1.11816369e-03 2.48019788e-02\n", + " -6.35889247e-02 2.84492653e-02 -2.61303429e-02 8.58111084e-02\n", + " 1.14682280e-01 -5.35345152e-02 -5.63588329e-02 4.26009111e-02\n", + " 1.09454487e-02 2.09578704e-02 1.00131206e-01 3.26050818e-02\n", + " -1.84208795e-01 -3.93208489e-02 -6.91454709e-02 -6.38105348e-02\n", + " -6.56386167e-02 -6.41252473e-03 -4.79612872e-02 -7.68133253e-02\n", + " 2.95383800e-02 -2.29948424e-02 4.17037308e-02 -2.50048414e-02\n", + " -4.54513635e-03 -4.17136997e-02 -1.32289371e-02 -6.38357699e-02\n", + " -2.46475753e-03 -1.37337763e-02 1.68977026e-02 -6.30398020e-02\n", + " 8.98880735e-02 4.18171100e-02 -1.85687505e-02 -1.80442150e-08\n", + " -1.67998057e-02 -3.21578160e-02 6.30384088e-02 -4.13091965e-02\n", + " 4.44819257e-02 2.02469388e-03 6.29592761e-02 -5.17375208e-03\n", + " -1.00444201e-02 -3.05640679e-02 3.52672674e-02 5.58581576e-02\n", + " -4.67125401e-02 3.45103554e-02 3.29577848e-02 4.30114232e-02\n", + " 2.94361431e-02 -3.03164031e-02 -1.71107929e-02 7.37485513e-02\n", + " -5.47909215e-02 2.77515315e-02 6.20162580e-03 1.58800222e-02\n", + " 3.42978388e-02 -5.15752286e-03 2.35079695e-02 7.53135383e-02\n", + " 1.92843173e-02 3.36197019e-02 5.09103611e-02 1.52497053e-01\n", + " 1.64207406e-02 2.70528439e-02 3.75162140e-02 2.18553394e-02\n", + " 5.66334017e-02 -3.95747647e-02 7.12312981e-02 -5.41377217e-02\n", + " 1.03768252e-03 2.11853310e-02 -3.56309079e-02 1.09017029e-01\n", + " 2.76525831e-03 3.13996561e-02 1.38419587e-03 -3.45738269e-02\n", + " -4.59278077e-02 2.88083404e-02 7.16903480e-03 4.84684594e-02\n", + " 2.61018649e-02 -9.44074150e-03 2.82169096e-02 3.48723419e-02\n", + " 3.69098410e-02 -8.58953316e-03 -3.53206135e-02 -2.47857086e-02\n", + " -1.91921573e-02 3.80707793e-02 5.99654242e-02 -4.22286279e-02]\n", + " [ 8.64385888e-02 1.02762669e-01 5.39454352e-03 2.04439717e-03\n", + " -9.96333454e-03 2.53854915e-02 4.92875911e-02 -3.06265950e-02\n", + " 6.87254667e-02 1.01365801e-02 7.75397792e-02 -9.00807530e-02\n", + " 6.10616244e-03 -5.69898821e-02 1.41715091e-02 2.80491188e-02\n", + " -8.68465081e-02 7.64399245e-02 -1.03491284e-01 -6.77437633e-02\n", + " 6.99947476e-02 8.44251141e-02 -7.24922586e-03 1.04770530e-02\n", + " 1.34020345e-02 6.77577332e-02 -9.42086279e-02 -3.71690057e-02\n", + " 5.22617288e-02 -3.10853794e-02 -9.63406563e-02 1.57717131e-02\n", + " 2.57866811e-02 7.85244629e-02 7.89949521e-02 1.91516373e-02\n", + " 1.64356586e-02 3.10083316e-03 3.81311215e-02 2.37090923e-02\n", + " 1.05389431e-02 -4.40644920e-02 4.41738516e-02 -2.58728024e-02\n", + " 6.15378767e-02 -4.05427851e-02 -8.64140391e-02 3.19722965e-02\n", + " -8.90688854e-04 -2.44437270e-02 -9.19721127e-02 2.33939439e-02\n", + " -8.30293670e-02 4.41510528e-02 -2.49692425e-02 6.23020194e-02\n", + " -1.30353542e-03 7.51395598e-02 2.46384963e-02 -6.47244453e-02\n", + " -1.17727824e-01 3.83392125e-02 -9.11767334e-02 6.35446087e-02\n", + " 7.62739927e-02 -8.80241469e-02 9.54556745e-03 -4.69717644e-02\n", + " -8.41740668e-02 3.88823487e-02 -1.14393622e-01 6.28858525e-03\n", + " -3.49361412e-02 2.39750277e-02 -3.31317298e-02 -1.57244261e-02\n", + " -3.78955677e-02 -8.81245825e-03 7.06118718e-02 3.28066461e-02\n", + " 2.03675241e-03 -1.12278953e-01 6.79714512e-03 1.22765331e-02\n", + " 3.35303508e-02 -1.36200525e-02 -2.25490145e-02 -2.25229003e-02\n", + " -2.03194283e-02 5.04297540e-02 -7.48652816e-02 -8.22821930e-02\n", + " 7.65962303e-02 4.93392237e-02 -3.75553444e-02 1.44635085e-02\n", + " -5.72457612e-02 -1.79954618e-02 1.09697953e-01 1.19462796e-01\n", + " 8.09210294e-04 6.17057718e-02 3.26322354e-02 -1.30780086e-01\n", + " -1.48636639e-01 -6.16232865e-02 4.33885790e-02 2.67129429e-02\n", + " 1.39786145e-02 -3.94002497e-02 -2.52711698e-02 3.87744559e-03\n", + " 3.58664878e-02 -6.15420379e-02 3.76660749e-02 2.67564934e-02\n", + " -3.82658839e-02 -3.54793631e-02 -2.39227246e-02 8.67977589e-02\n", + " -1.84063427e-02 7.71039277e-02 1.39867724e-03 7.00383112e-02\n", + " -4.77877818e-02 -7.89820105e-02 5.10814302e-02 -2.99868315e-33\n", + " -3.91646437e-02 -2.56212265e-03 1.65210143e-02 9.48938914e-03\n", + " -5.66219911e-02 6.57783300e-02 -4.77002971e-02 1.11661898e-02\n", + " -5.73558584e-02 -9.16258153e-03 -2.17521247e-02 -5.59531599e-02\n", + " -1.11422865e-02 9.32793617e-02 1.66765228e-02 -1.36723584e-02\n", + " 4.34389114e-02 1.87244674e-03 7.29951682e-03 5.16332127e-02\n", + " 4.80608195e-02 1.35341436e-01 -1.71739627e-02 -1.29697947e-02\n", + " -7.50109777e-02 2.61107851e-02 2.69802269e-02 7.83087627e-04\n", + " -4.87269908e-02 1.17842462e-02 -4.59579900e-02 -4.83213626e-02\n", + " -1.95671022e-02 1.93889253e-02 1.98807400e-02 1.67432614e-02\n", + " 9.87801179e-02 -2.74087936e-02 2.34809201e-02 3.70231015e-03\n", + " -6.14514686e-02 -1.21230714e-03 -9.50471032e-03 9.25154891e-03\n", + " 2.38444228e-02 8.61232057e-02 2.26789694e-02 5.45149611e-04\n", + " 3.47129591e-02 6.25463249e-03 -6.92776917e-03 3.92400734e-02\n", + " 1.15674837e-02 3.26280035e-02 6.22155368e-02 2.76114475e-02\n", + " 1.86883584e-02 3.55805792e-02 4.11795825e-02 1.54782468e-02\n", + " 4.22690809e-02 3.82248424e-02 1.00313211e-02 -2.83246431e-02\n", + " 4.47052307e-02 -4.10459004e-02 -4.50545037e-03 -5.44734299e-02\n", + " 2.62321532e-02 1.79862473e-02 -1.23118803e-01 -4.66952287e-02\n", + " -1.35912690e-02 6.46710396e-02 3.57351894e-03 -1.22233517e-02\n", + " -1.79382414e-02 -2.55501885e-02 2.37224083e-02 4.08665510e-03\n", + " -6.51476011e-02 4.43652198e-02 4.68596071e-02 -3.25174108e-02\n", + " 4.02274309e-03 -3.97600094e-03 1.11939590e-02 -9.95597914e-02\n", + " 3.33168320e-02 8.01060945e-02 9.42692161e-02 -6.38293922e-02\n", + " 3.23151797e-02 -5.13553321e-02 -7.49880122e-03 5.30049091e-34\n", + " -4.13194261e-02 9.49646682e-02 -1.06401391e-01 4.96590249e-02\n", + " -3.41913551e-02 -3.16745862e-02 -1.71556287e-02 1.70094497e-03\n", + " 5.79758063e-02 -1.21777563e-03 -1.68536212e-02 -5.16912788e-02\n", + " 5.52998781e-02 -3.42647471e-02 3.08179744e-02 -3.10480539e-02\n", + " 9.27532539e-02 3.72663513e-02 -2.37398036e-02 4.45893966e-02\n", + " 1.46153420e-02 1.16239339e-01 -5.00112474e-02 3.88716199e-02\n", + " 4.24742932e-03 2.56976634e-02 3.27243805e-02 4.29907553e-02\n", + " -1.36144590e-02 2.56122500e-02 1.06262406e-02 -8.46864581e-02\n", + " -9.52982232e-02 1.08399898e-01 -7.51599967e-02 -1.37773855e-02\n", + " 6.37338161e-02 -4.49671596e-03 -3.25321406e-02 6.23613857e-02\n", + " 3.48052830e-02 -3.54922377e-02 -2.00222060e-02 3.66608016e-02\n", + " -2.48836726e-02 1.01818843e-02 -7.01232702e-02 -4.31950651e-02\n", + " 2.95332391e-02 -2.94990052e-04 -3.45386267e-02 1.46675557e-02\n", + " -9.83970016e-02 -4.70488369e-02 -8.85495543e-03 -8.89914110e-02\n", + " 3.50995995e-02 -1.29602015e-01 -4.98866141e-02 -6.12047426e-02\n", + " -5.97797111e-02 9.46318731e-03 4.91218604e-02 -7.75026456e-02\n", + " 8.09726790e-02 -4.79257181e-02 2.34379782e-03 7.57031217e-02\n", + " -2.40175463e-02 -1.52545972e-02 4.86738533e-02 -3.85968722e-02\n", + " -7.04831555e-02 -1.20348306e-02 -3.88791077e-02 -7.76016787e-02\n", + " -1.07243843e-02 1.04187569e-02 -2.13753488e-02 -9.17386264e-02\n", + " -1.11345164e-02 -2.96065696e-02 2.46458240e-02 4.65709856e-03\n", + " -1.63450222e-02 -3.95220071e-02 7.73373917e-02 -2.84733102e-02\n", + " -3.69938603e-03 8.27665105e-02 -1.10408636e-02 3.13984044e-02\n", + " 5.35094254e-02 5.75145818e-02 -3.17621902e-02 -1.52911284e-08\n", + " -7.99661428e-02 -4.76797298e-02 -8.59788731e-02 5.69616258e-02\n", + " -4.08866294e-02 2.23832484e-02 -4.64440649e-03 -3.80130857e-02\n", + " -3.10670827e-02 -1.07277837e-02 1.97699182e-02 7.76994461e-03\n", + " -6.09474257e-03 -3.86376418e-02 2.80272178e-02 6.78138286e-02\n", + " -2.35351361e-02 3.21747549e-02 8.02537985e-03 -2.39107087e-02\n", + " -1.21995481e-03 3.14599276e-02 -5.24924360e-02 -8.06814339e-03\n", + " 3.14773852e-03 5.11496216e-02 -4.44104448e-02 6.36013076e-02\n", + " 3.85084227e-02 3.30433287e-02 -4.18726495e-03 4.95592579e-02\n", + " -5.69604672e-02 -6.49712561e-03 -2.49793250e-02 -1.60867665e-02\n", + " 6.62289411e-02 -2.06310600e-02 1.08045787e-01 1.68547314e-02\n", + " 1.43813081e-02 -1.32127125e-02 -1.29387423e-01 6.95216581e-02\n", + " -5.55773005e-02 -6.75414056e-02 -5.45821106e-03 -6.13592425e-03\n", + " 3.90840955e-02 -6.28779829e-02 3.74063067e-02 -1.16570676e-02\n", + " 1.29150422e-02 -5.52495494e-02 5.16075790e-02 -4.30837413e-03\n", + " 5.80247082e-02 1.86944716e-02 2.27810573e-02 3.21665481e-02\n", + " 5.37978783e-02 7.02849030e-02 7.49312267e-02 -8.41775239e-02]]\n" + ] + }, + { + "ename": "NameError", + "evalue": "name 'generator' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[18], line 52\u001b[0m\n\u001b[1;32m 49\u001b[0m query \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWhat is the document about?\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 51\u001b[0m \u001b[38;5;66;03m# Generate response using RAG\u001b[39;00m\n\u001b[0;32m---> 52\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mrag_generate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 53\u001b[0m \u001b[38;5;28mprint\u001b[39m(response)\n", + "Cell \u001b[0;32mIn[18], line 44\u001b[0m, in \u001b[0;36mrag_generate\u001b[0;34m(query)\u001b[0m\n\u001b[1;32m 41\u001b[0m augmented_query \u001b[38;5;241m=\u001b[39m query \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(retrieved_chunks)\n\u001b[1;32m 43\u001b[0m \u001b[38;5;66;03m# Generate the response using the generative model\u001b[39;00m\n\u001b[0;32m---> 44\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mgenerator\u001b[49m(augmented_query, max_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m100\u001b[39m, num_return_sequences\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 46\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response[\u001b[38;5;241m0\u001b[39m][\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mgenerated_text\u001b[39m\u001b[38;5;124m'\u001b[39m]\n", + "\u001b[0;31mNameError\u001b[0m: name 'generator' is not defined" + ] + } + ], + "source": [ + "import faiss\n", + "import numpy as np\n", + "from sentence_transformers import SentenceTransformer\n", + "from transformers import pipeline\n", + "\n", + "# Load pre-trained models\n", + "# embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') # Model for creating embeddings\n", + "# generator = pipeline('text-generation', model='gpt-3.5-turbo') # GPT for text generation\n", + "model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')\n", + "sentences = [\"This is an example sentence\", \"Each sentence is converted\"]\n", + "embeddings = HuggingFaceEmbeddings(model_name = model)\n", + "print(embeddings)\n", + "\n", + "# Document to process\n", + "document = [\n", + " \"Paragraph 1: This is the first part of the document.\",\n", + " \"Paragraph 2: This is the second part, which contains more information.\",\n", + " \"Paragraph 3: The third paragraph has additional details.\",\n", + "]\n", + "\n", + "# Step 1: Create embeddings for document chunks\n", + "document_embeddings = embedding_model.encode(document)\n", + "\n", + "# Step 2: Store embeddings in FAISS index\n", + "dimension = document_embeddings.shape[1] # Get embedding dimension\n", + "index = faiss.IndexFlatL2(dimension) # Create FAISS index\n", + "index.add(np.array(document_embeddings)) # Add embeddings to index\n", + "\n", + "# Step 3: Define a function for retrieval based on a query\n", + "def retrieve_relevant_chunks(query, top_k=2):\n", + " query_embedding = embedding_model.encode([query]) # Encode the query\n", + " distances, indices = index.search(query_embedding, top_k) # Search for top-k closest chunks\n", + " return [document[i] for i in indices[0]] # Retrieve the corresponding document chunks\n", + "\n", + "# Step 4: Define the RAG function to generate response\n", + "def rag_generate(query):\n", + " # Retrieve relevant chunks\n", + " retrieved_chunks = retrieve_relevant_chunks(query)\n", + " \n", + " # Augment the query with retrieved document chunks\n", + " augmented_query = query + \"\\n\" + \"\\n\".join(retrieved_chunks)\n", + " \n", + " # Generate the response using the generative model\n", + " response = generator(augmented_query, max_length=100, num_return_sequences=1)\n", + " \n", + " return response[0]['generated_text']\n", + "\n", + "# Example query\n", + "query = \"What is the document about?\"\n", + "\n", + "# Generate response using RAG\n", + "response = rag_generate(query)\n", + "print(response)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 6.76568523e-02 6.34959117e-02 4.87131737e-02 7.93049857e-02\n", + " 3.74480374e-02 2.65278690e-03 3.93749885e-02 -7.09843170e-03\n", + " 5.93614578e-02 3.15370336e-02 6.00980371e-02 -5.29051535e-02\n", + " 4.06068042e-02 -2.59308834e-02 2.98428070e-02 1.12692104e-03\n", + " 7.35148638e-02 -5.03819548e-02 -1.22386672e-01 2.37029027e-02\n", + " 2.97264531e-02 4.24768589e-02 2.56337505e-02 1.99519075e-03\n", + " -5.69191128e-02 -2.71598138e-02 -3.29036042e-02 6.60248548e-02\n", + " 1.19007125e-01 -4.58790474e-02 -7.26214498e-02 -3.25839408e-02\n", + " 5.23414090e-02 4.50553074e-02 8.25307518e-03 3.67023982e-02\n", + " -1.39415488e-02 6.53919429e-02 -2.64272522e-02 2.06378274e-04\n", + " -1.36643331e-02 -3.62810642e-02 -1.95043199e-02 -2.89738514e-02\n", + " 3.94270048e-02 -8.84090886e-02 2.62423395e-03 1.36713609e-02\n", + " 4.83062416e-02 -3.11565381e-02 -1.17329188e-01 -5.11690006e-02\n", + " -8.85287821e-02 -2.18963344e-02 1.42985992e-02 4.44168001e-02\n", + " -1.34815527e-02 7.43392482e-02 2.66382527e-02 -1.98762529e-02\n", + " 1.79191548e-02 -1.06052700e-02 -9.04262960e-02 2.13269386e-02\n", + " 1.41204819e-01 -6.47169538e-03 -1.40383583e-03 -1.53609812e-02\n", + " -8.73571709e-02 7.22174346e-02 2.01403201e-02 4.25587930e-02\n", + " -3.49013992e-02 3.19583225e-04 -8.02970901e-02 -3.27472650e-02\n", + " 2.85268333e-02 -5.13658002e-02 1.09389149e-01 8.19328055e-02\n", + " -9.84040275e-02 -9.34095234e-02 -1.51292169e-02 4.51248661e-02\n", + " 4.94171530e-02 -2.51868218e-02 1.57077685e-02 -1.29290849e-01\n", + " 5.31888893e-03 4.02342947e-03 -2.34572385e-02 -6.72983006e-02\n", + " 2.92281136e-02 -2.60844957e-02 1.30625134e-02 -3.11663132e-02\n", + " -4.82713766e-02 -5.58859594e-02 -3.87505405e-02 1.20010786e-01\n", + " -1.03923846e-02 4.89705354e-02 5.53536788e-02 4.49358746e-02\n", + " -4.00972459e-03 -1.02959722e-01 -2.92968489e-02 -5.83402403e-02\n", + " 2.70472486e-02 -2.20169798e-02 -7.22241625e-02 -4.13869508e-02\n", + " -1.93298627e-02 2.73333024e-03 2.76970823e-04 -9.67588052e-02\n", + " -1.00574777e-01 -1.41922375e-02 -8.07891935e-02 4.53925766e-02\n", + " 2.45041642e-02 5.97614162e-02 -7.38185942e-02 1.19843995e-02\n", + " -6.63403869e-02 -7.69044608e-02 3.85157540e-02 -5.59361962e-33\n", + " 2.80013494e-02 -5.60784712e-02 -4.86601889e-02 2.15569288e-02\n", + " 6.01980388e-02 -4.81402874e-02 -3.50246802e-02 1.93313621e-02\n", + " -1.75151881e-02 -3.89210396e-02 -3.81067069e-03 -1.70287490e-02\n", + " 2.82100495e-02 1.28290346e-02 4.71601337e-02 6.21030182e-02\n", + " -6.43588603e-02 1.29285663e-01 -1.31230969e-02 5.23068644e-02\n", + " -3.73680368e-02 2.89094206e-02 -1.68981794e-02 -2.37330589e-02\n", + " -3.33491936e-02 -5.16762733e-02 1.55356722e-02 2.08803043e-02\n", + " -1.25371413e-02 4.59579043e-02 3.72720510e-02 2.80567184e-02\n", + " -5.90005554e-02 -1.16987983e-02 4.92182523e-02 4.70328368e-02\n", + " 7.35487938e-02 -3.70529667e-02 3.98458168e-03 1.06412461e-02\n", + " -1.61518197e-04 -5.27166203e-02 2.75927819e-02 -3.92921604e-02\n", + " 8.44718069e-02 4.86860536e-02 -4.85872431e-03 1.79948155e-02\n", + " -4.28570211e-02 1.23375189e-02 6.39956072e-03 4.04822268e-02\n", + " 1.48887532e-02 -1.53941112e-02 7.62947872e-02 2.37043556e-02\n", + " 4.45237122e-02 5.08196130e-02 -2.31251237e-03 -1.88737400e-02\n", + " -1.23335691e-02 4.66002040e-02 -5.63438274e-02 6.29927516e-02\n", + " -3.15535292e-02 3.24912705e-02 2.34673917e-02 -6.55437931e-02\n", + " 2.01710071e-02 2.57082209e-02 -1.23868510e-02 -8.36490560e-03\n", + " -6.64377436e-02 9.43074226e-02 -3.57092991e-02 -3.42483111e-02\n", + " -6.66356552e-03 -8.01526196e-03 -3.09711043e-02 4.33012322e-02\n", + " -8.21400341e-03 -1.50794983e-01 3.07692345e-02 4.00719047e-02\n", + " -3.79293561e-02 1.93219632e-03 4.00530398e-02 -8.77074450e-02\n", + " -3.68491784e-02 8.57956614e-03 -3.19251828e-02 -1.25258118e-02\n", + " 7.35538602e-02 1.34734251e-03 2.05918197e-02 2.71097760e-33\n", + " -5.18577099e-02 5.78360707e-02 -9.18985382e-02 3.94421555e-02\n", + " 1.05576530e-01 -1.96912363e-02 6.18402474e-02 -7.63465017e-02\n", + " 2.40880344e-02 9.40049514e-02 -1.16535492e-01 3.71198282e-02\n", + " 5.22425212e-02 -3.95854376e-03 5.72215021e-02 5.32860495e-03\n", + " 1.24016851e-01 1.39022358e-02 -1.10249920e-02 3.56053188e-02\n", + " -3.30754668e-02 8.16574320e-02 -1.52003858e-02 6.05585389e-02\n", + " -6.01397417e-02 3.26102600e-02 -3.48296501e-02 -1.69882085e-02\n", + " -9.74907279e-02 -2.71483976e-02 1.74711330e-03 -7.68982694e-02\n", + " -4.31858189e-02 -1.89985577e-02 -2.91660726e-02 5.77488095e-02\n", + " 2.41822079e-02 -1.16901658e-02 -6.21435530e-02 2.84351911e-02\n", + " -2.37499102e-04 -2.51783542e-02 4.39631986e-03 8.12840015e-02\n", + " 3.64184640e-02 -6.04006015e-02 -3.65517512e-02 -7.93748498e-02\n", + " -5.08535048e-03 6.69699535e-02 -1.17784351e-01 3.23743261e-02\n", + " -4.71251607e-02 -1.34459957e-02 -9.48445350e-02 8.24948866e-03\n", + " -1.06748808e-02 -6.81881532e-02 1.11816369e-03 2.48019788e-02\n", + " -6.35889247e-02 2.84492653e-02 -2.61303429e-02 8.58111084e-02\n", + " 1.14682280e-01 -5.35345152e-02 -5.63588329e-02 4.26009111e-02\n", + " 1.09454487e-02 2.09578704e-02 1.00131206e-01 3.26050818e-02\n", + " -1.84208795e-01 -3.93208489e-02 -6.91454709e-02 -6.38105348e-02\n", + " -6.56386167e-02 -6.41252473e-03 -4.79612872e-02 -7.68133253e-02\n", + " 2.95383800e-02 -2.29948424e-02 4.17037308e-02 -2.50048414e-02\n", + " -4.54513635e-03 -4.17136997e-02 -1.32289371e-02 -6.38357699e-02\n", + " -2.46475753e-03 -1.37337763e-02 1.68977026e-02 -6.30398020e-02\n", + " 8.98880735e-02 4.18171100e-02 -1.85687505e-02 -1.80442150e-08\n", + " -1.67998057e-02 -3.21578160e-02 6.30384088e-02 -4.13091965e-02\n", + " 4.44819257e-02 2.02469388e-03 6.29592761e-02 -5.17375208e-03\n", + " -1.00444201e-02 -3.05640679e-02 3.52672674e-02 5.58581576e-02\n", + " -4.67125401e-02 3.45103554e-02 3.29577848e-02 4.30114232e-02\n", + " 2.94361431e-02 -3.03164031e-02 -1.71107929e-02 7.37485513e-02\n", + " -5.47909215e-02 2.77515315e-02 6.20162580e-03 1.58800222e-02\n", + " 3.42978388e-02 -5.15752286e-03 2.35079695e-02 7.53135383e-02\n", + " 1.92843173e-02 3.36197019e-02 5.09103611e-02 1.52497053e-01\n", + " 1.64207406e-02 2.70528439e-02 3.75162140e-02 2.18553394e-02\n", + " 5.66334017e-02 -3.95747647e-02 7.12312981e-02 -5.41377217e-02\n", + " 1.03768252e-03 2.11853310e-02 -3.56309079e-02 1.09017029e-01\n", + " 2.76525831e-03 3.13996561e-02 1.38419587e-03 -3.45738269e-02\n", + " -4.59278077e-02 2.88083404e-02 7.16903480e-03 4.84684594e-02\n", + " 2.61018649e-02 -9.44074150e-03 2.82169096e-02 3.48723419e-02\n", + " 3.69098410e-02 -8.58953316e-03 -3.53206135e-02 -2.47857086e-02\n", + " -1.91921573e-02 3.80707793e-02 5.99654242e-02 -4.22286279e-02]\n", + " [ 8.64385888e-02 1.02762669e-01 5.39454352e-03 2.04439717e-03\n", + " -9.96333454e-03 2.53854915e-02 4.92875911e-02 -3.06265950e-02\n", + " 6.87254667e-02 1.01365801e-02 7.75397792e-02 -9.00807530e-02\n", + " 6.10616244e-03 -5.69898821e-02 1.41715091e-02 2.80491188e-02\n", + " -8.68465081e-02 7.64399245e-02 -1.03491284e-01 -6.77437633e-02\n", + " 6.99947476e-02 8.44251141e-02 -7.24922586e-03 1.04770530e-02\n", + " 1.34020345e-02 6.77577332e-02 -9.42086279e-02 -3.71690057e-02\n", + " 5.22617288e-02 -3.10853794e-02 -9.63406563e-02 1.57717131e-02\n", + " 2.57866811e-02 7.85244629e-02 7.89949521e-02 1.91516373e-02\n", + " 1.64356586e-02 3.10083316e-03 3.81311215e-02 2.37090923e-02\n", + " 1.05389431e-02 -4.40644920e-02 4.41738516e-02 -2.58728024e-02\n", + " 6.15378767e-02 -4.05427851e-02 -8.64140391e-02 3.19722965e-02\n", + " -8.90688854e-04 -2.44437270e-02 -9.19721127e-02 2.33939439e-02\n", + " -8.30293670e-02 4.41510528e-02 -2.49692425e-02 6.23020194e-02\n", + " -1.30353542e-03 7.51395598e-02 2.46384963e-02 -6.47244453e-02\n", + " -1.17727824e-01 3.83392125e-02 -9.11767334e-02 6.35446087e-02\n", + " 7.62739927e-02 -8.80241469e-02 9.54556745e-03 -4.69717644e-02\n", + " -8.41740668e-02 3.88823487e-02 -1.14393622e-01 6.28858525e-03\n", + " -3.49361412e-02 2.39750277e-02 -3.31317298e-02 -1.57244261e-02\n", + " -3.78955677e-02 -8.81245825e-03 7.06118718e-02 3.28066461e-02\n", + " 2.03675241e-03 -1.12278953e-01 6.79714512e-03 1.22765331e-02\n", + " 3.35303508e-02 -1.36200525e-02 -2.25490145e-02 -2.25229003e-02\n", + " -2.03194283e-02 5.04297540e-02 -7.48652816e-02 -8.22821930e-02\n", + " 7.65962303e-02 4.93392237e-02 -3.75553444e-02 1.44635085e-02\n", + " -5.72457612e-02 -1.79954618e-02 1.09697953e-01 1.19462796e-01\n", + " 8.09210294e-04 6.17057718e-02 3.26322354e-02 -1.30780086e-01\n", + " -1.48636639e-01 -6.16232865e-02 4.33885790e-02 2.67129429e-02\n", + " 1.39786145e-02 -3.94002497e-02 -2.52711698e-02 3.87744559e-03\n", + " 3.58664878e-02 -6.15420379e-02 3.76660749e-02 2.67564934e-02\n", + " -3.82658839e-02 -3.54793631e-02 -2.39227246e-02 8.67977589e-02\n", + " -1.84063427e-02 7.71039277e-02 1.39867724e-03 7.00383112e-02\n", + " -4.77877818e-02 -7.89820105e-02 5.10814302e-02 -2.99868315e-33\n", + " -3.91646437e-02 -2.56212265e-03 1.65210143e-02 9.48938914e-03\n", + " -5.66219911e-02 6.57783300e-02 -4.77002971e-02 1.11661898e-02\n", + " -5.73558584e-02 -9.16258153e-03 -2.17521247e-02 -5.59531599e-02\n", + " -1.11422865e-02 9.32793617e-02 1.66765228e-02 -1.36723584e-02\n", + " 4.34389114e-02 1.87244674e-03 7.29951682e-03 5.16332127e-02\n", + " 4.80608195e-02 1.35341436e-01 -1.71739627e-02 -1.29697947e-02\n", + " -7.50109777e-02 2.61107851e-02 2.69802269e-02 7.83087627e-04\n", + " -4.87269908e-02 1.17842462e-02 -4.59579900e-02 -4.83213626e-02\n", + " -1.95671022e-02 1.93889253e-02 1.98807400e-02 1.67432614e-02\n", + " 9.87801179e-02 -2.74087936e-02 2.34809201e-02 3.70231015e-03\n", + " -6.14514686e-02 -1.21230714e-03 -9.50471032e-03 9.25154891e-03\n", + " 2.38444228e-02 8.61232057e-02 2.26789694e-02 5.45149611e-04\n", + " 3.47129591e-02 6.25463249e-03 -6.92776917e-03 3.92400734e-02\n", + " 1.15674837e-02 3.26280035e-02 6.22155368e-02 2.76114475e-02\n", + " 1.86883584e-02 3.55805792e-02 4.11795825e-02 1.54782468e-02\n", + " 4.22690809e-02 3.82248424e-02 1.00313211e-02 -2.83246431e-02\n", + " 4.47052307e-02 -4.10459004e-02 -4.50545037e-03 -5.44734299e-02\n", + " 2.62321532e-02 1.79862473e-02 -1.23118803e-01 -4.66952287e-02\n", + " -1.35912690e-02 6.46710396e-02 3.57351894e-03 -1.22233517e-02\n", + " -1.79382414e-02 -2.55501885e-02 2.37224083e-02 4.08665510e-03\n", + " -6.51476011e-02 4.43652198e-02 4.68596071e-02 -3.25174108e-02\n", + " 4.02274309e-03 -3.97600094e-03 1.11939590e-02 -9.95597914e-02\n", + " 3.33168320e-02 8.01060945e-02 9.42692161e-02 -6.38293922e-02\n", + " 3.23151797e-02 -5.13553321e-02 -7.49880122e-03 5.30049091e-34\n", + " -4.13194261e-02 9.49646682e-02 -1.06401391e-01 4.96590249e-02\n", + " -3.41913551e-02 -3.16745862e-02 -1.71556287e-02 1.70094497e-03\n", + " 5.79758063e-02 -1.21777563e-03 -1.68536212e-02 -5.16912788e-02\n", + " 5.52998781e-02 -3.42647471e-02 3.08179744e-02 -3.10480539e-02\n", + " 9.27532539e-02 3.72663513e-02 -2.37398036e-02 4.45893966e-02\n", + " 1.46153420e-02 1.16239339e-01 -5.00112474e-02 3.88716199e-02\n", + " 4.24742932e-03 2.56976634e-02 3.27243805e-02 4.29907553e-02\n", + " -1.36144590e-02 2.56122500e-02 1.06262406e-02 -8.46864581e-02\n", + " -9.52982232e-02 1.08399898e-01 -7.51599967e-02 -1.37773855e-02\n", + " 6.37338161e-02 -4.49671596e-03 -3.25321406e-02 6.23613857e-02\n", + " 3.48052830e-02 -3.54922377e-02 -2.00222060e-02 3.66608016e-02\n", + " -2.48836726e-02 1.01818843e-02 -7.01232702e-02 -4.31950651e-02\n", + " 2.95332391e-02 -2.94990052e-04 -3.45386267e-02 1.46675557e-02\n", + " -9.83970016e-02 -4.70488369e-02 -8.85495543e-03 -8.89914110e-02\n", + " 3.50995995e-02 -1.29602015e-01 -4.98866141e-02 -6.12047426e-02\n", + " -5.97797111e-02 9.46318731e-03 4.91218604e-02 -7.75026456e-02\n", + " 8.09726790e-02 -4.79257181e-02 2.34379782e-03 7.57031217e-02\n", + " -2.40175463e-02 -1.52545972e-02 4.86738533e-02 -3.85968722e-02\n", + " -7.04831555e-02 -1.20348306e-02 -3.88791077e-02 -7.76016787e-02\n", + " -1.07243843e-02 1.04187569e-02 -2.13753488e-02 -9.17386264e-02\n", + " -1.11345164e-02 -2.96065696e-02 2.46458240e-02 4.65709856e-03\n", + " -1.63450222e-02 -3.95220071e-02 7.73373917e-02 -2.84733102e-02\n", + " -3.69938603e-03 8.27665105e-02 -1.10408636e-02 3.13984044e-02\n", + " 5.35094254e-02 5.75145818e-02 -3.17621902e-02 -1.52911284e-08\n", + " -7.99661428e-02 -4.76797298e-02 -8.59788731e-02 5.69616258e-02\n", + " -4.08866294e-02 2.23832484e-02 -4.64440649e-03 -3.80130857e-02\n", + " -3.10670827e-02 -1.07277837e-02 1.97699182e-02 7.76994461e-03\n", + " -6.09474257e-03 -3.86376418e-02 2.80272178e-02 6.78138286e-02\n", + " -2.35351361e-02 3.21747549e-02 8.02537985e-03 -2.39107087e-02\n", + " -1.21995481e-03 3.14599276e-02 -5.24924360e-02 -8.06814339e-03\n", + " 3.14773852e-03 5.11496216e-02 -4.44104448e-02 6.36013076e-02\n", + " 3.85084227e-02 3.30433287e-02 -4.18726495e-03 4.95592579e-02\n", + " -5.69604672e-02 -6.49712561e-03 -2.49793250e-02 -1.60867665e-02\n", + " 6.62289411e-02 -2.06310600e-02 1.08045787e-01 1.68547314e-02\n", + " 1.43813081e-02 -1.32127125e-02 -1.29387423e-01 6.95216581e-02\n", + " -5.55773005e-02 -6.75414056e-02 -5.45821106e-03 -6.13592425e-03\n", + " 3.90840955e-02 -6.28779829e-02 3.74063067e-02 -1.16570676e-02\n", + " 1.29150422e-02 -5.52495494e-02 5.16075790e-02 -4.30837413e-03\n", + " 5.80247082e-02 1.86944716e-02 2.27810573e-02 3.21665481e-02\n", + " 5.37978783e-02 7.02849030e-02 7.49312267e-02 -8.41775239e-02]]\n" + ] + } + ], + "source": [ + "from sentence_transformers import SentenceTransformer\n", + "sentences = [\"This is an example sentence\", \"Each sentence is converted\"]\n", + "\n", + "model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')\n", + "embeddings = model.encode(sentences)\n", + "print(embeddings)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sentence embeddings:\n", + "tensor([[ 6.7657e-02, 6.3496e-02, 4.8713e-02, 7.9305e-02, 3.7448e-02,\n", + " 2.6528e-03, 3.9375e-02, -7.0984e-03, 5.9361e-02, 3.1537e-02,\n", + " 6.0098e-02, -5.2905e-02, 4.0607e-02, -2.5931e-02, 2.9843e-02,\n", + " 1.1269e-03, 7.3515e-02, -5.0382e-02, -1.2239e-01, 2.3703e-02,\n", + " 2.9726e-02, 4.2477e-02, 2.5634e-02, 1.9952e-03, -5.6919e-02,\n", + " -2.7160e-02, -3.2904e-02, 6.6025e-02, 1.1901e-01, -4.5879e-02,\n", + " -7.2621e-02, -3.2584e-02, 5.2341e-02, 4.5055e-02, 8.2531e-03,\n", + " 3.6702e-02, -1.3942e-02, 6.5392e-02, -2.6427e-02, 2.0638e-04,\n", + " -1.3664e-02, -3.6281e-02, -1.9504e-02, -2.8974e-02, 3.9427e-02,\n", + " -8.8409e-02, 2.6242e-03, 1.3671e-02, 4.8306e-02, -3.1157e-02,\n", + " -1.1733e-01, -5.1169e-02, -8.8529e-02, -2.1896e-02, 1.4299e-02,\n", + " 4.4417e-02, -1.3482e-02, 7.4339e-02, 2.6638e-02, -1.9876e-02,\n", + " 1.7919e-02, -1.0605e-02, -9.0426e-02, 2.1327e-02, 1.4120e-01,\n", + " -6.4717e-03, -1.4038e-03, -1.5361e-02, -8.7357e-02, 7.2217e-02,\n", + " 2.0140e-02, 4.2559e-02, -3.4901e-02, 3.1958e-04, -8.0297e-02,\n", + " -3.2747e-02, 2.8527e-02, -5.1366e-02, 1.0939e-01, 8.1933e-02,\n", + " -9.8404e-02, -9.3410e-02, -1.5129e-02, 4.5125e-02, 4.9417e-02,\n", + " -2.5187e-02, 1.5708e-02, -1.2929e-01, 5.3189e-03, 4.0234e-03,\n", + " -2.3457e-02, -6.7298e-02, 2.9228e-02, -2.6084e-02, 1.3063e-02,\n", + " -3.1166e-02, -4.8271e-02, -5.5886e-02, -3.8751e-02, 1.2001e-01,\n", + " -1.0392e-02, 4.8971e-02, 5.5354e-02, 4.4936e-02, -4.0097e-03,\n", + " -1.0296e-01, -2.9297e-02, -5.8340e-02, 2.7047e-02, -2.2017e-02,\n", + " -7.2224e-02, -4.1387e-02, -1.9330e-02, 2.7333e-03, 2.7697e-04,\n", + " -9.6759e-02, -1.0057e-01, -1.4192e-02, -8.0789e-02, 4.5393e-02,\n", + " 2.4504e-02, 5.9761e-02, -7.3819e-02, 1.1984e-02, -6.6340e-02,\n", + " -7.6904e-02, 3.8516e-02, -5.5936e-33, 2.8001e-02, -5.6078e-02,\n", + " -4.8660e-02, 2.1557e-02, 6.0198e-02, -4.8140e-02, -3.5025e-02,\n", + " 1.9331e-02, -1.7515e-02, -3.8921e-02, -3.8107e-03, -1.7029e-02,\n", + " 2.8210e-02, 1.2829e-02, 4.7160e-02, 6.2103e-02, -6.4359e-02,\n", + " 1.2929e-01, -1.3123e-02, 5.2307e-02, -3.7368e-02, 2.8909e-02,\n", + " -1.6898e-02, -2.3733e-02, -3.3349e-02, -5.1676e-02, 1.5536e-02,\n", + " 2.0880e-02, -1.2537e-02, 4.5958e-02, 3.7272e-02, 2.8057e-02,\n", + " -5.9001e-02, -1.1699e-02, 4.9218e-02, 4.7033e-02, 7.3549e-02,\n", + " -3.7053e-02, 3.9846e-03, 1.0641e-02, -1.6152e-04, -5.2717e-02,\n", + " 2.7593e-02, -3.9292e-02, 8.4472e-02, 4.8686e-02, -4.8587e-03,\n", + " 1.7995e-02, -4.2857e-02, 1.2338e-02, 6.3996e-03, 4.0482e-02,\n", + " 1.4889e-02, -1.5394e-02, 7.6295e-02, 2.3704e-02, 4.4524e-02,\n", + " 5.0820e-02, -2.3125e-03, -1.8874e-02, -1.2334e-02, 4.6600e-02,\n", + " -5.6344e-02, 6.2993e-02, -3.1554e-02, 3.2491e-02, 2.3467e-02,\n", + " -6.5544e-02, 2.0171e-02, 2.5708e-02, -1.2387e-02, -8.3649e-03,\n", + " -6.6438e-02, 9.4307e-02, -3.5709e-02, -3.4248e-02, -6.6636e-03,\n", + " -8.0153e-03, -3.0971e-02, 4.3301e-02, -8.2140e-03, -1.5079e-01,\n", + " 3.0769e-02, 4.0072e-02, -3.7929e-02, 1.9322e-03, 4.0053e-02,\n", + " -8.7707e-02, -3.6849e-02, 8.5796e-03, -3.1925e-02, -1.2526e-02,\n", + " 7.3554e-02, 1.3473e-03, 2.0592e-02, 2.7110e-33, -5.1858e-02,\n", + " 5.7836e-02, -9.1899e-02, 3.9442e-02, 1.0558e-01, -1.9691e-02,\n", + " 6.1840e-02, -7.6347e-02, 2.4088e-02, 9.4005e-02, -1.1654e-01,\n", + " 3.7120e-02, 5.2243e-02, -3.9585e-03, 5.7222e-02, 5.3286e-03,\n", + " 1.2402e-01, 1.3902e-02, -1.1025e-02, 3.5605e-02, -3.3075e-02,\n", + " 8.1657e-02, -1.5200e-02, 6.0559e-02, -6.0140e-02, 3.2610e-02,\n", + " -3.4830e-02, -1.6988e-02, -9.7491e-02, -2.7148e-02, 1.7471e-03,\n", + " -7.6898e-02, -4.3186e-02, -1.8999e-02, -2.9166e-02, 5.7749e-02,\n", + " 2.4182e-02, -1.1690e-02, -6.2144e-02, 2.8435e-02, -2.3750e-04,\n", + " -2.5178e-02, 4.3963e-03, 8.1284e-02, 3.6418e-02, -6.0401e-02,\n", + " -3.6552e-02, -7.9375e-02, -5.0854e-03, 6.6970e-02, -1.1778e-01,\n", + " 3.2374e-02, -4.7125e-02, -1.3446e-02, -9.4845e-02, 8.2495e-03,\n", + " -1.0675e-02, -6.8188e-02, 1.1182e-03, 2.4802e-02, -6.3589e-02,\n", + " 2.8449e-02, -2.6130e-02, 8.5811e-02, 1.1468e-01, -5.3535e-02,\n", + " -5.6359e-02, 4.2601e-02, 1.0945e-02, 2.0958e-02, 1.0013e-01,\n", + " 3.2605e-02, -1.8421e-01, -3.9321e-02, -6.9145e-02, -6.3811e-02,\n", + " -6.5639e-02, -6.4125e-03, -4.7961e-02, -7.6813e-02, 2.9538e-02,\n", + " -2.2995e-02, 4.1704e-02, -2.5005e-02, -4.5451e-03, -4.1714e-02,\n", + " -1.3229e-02, -6.3836e-02, -2.4648e-03, -1.3734e-02, 1.6898e-02,\n", + " -6.3040e-02, 8.9888e-02, 4.1817e-02, -1.8569e-02, -1.8044e-08,\n", + " -1.6800e-02, -3.2158e-02, 6.3038e-02, -4.1309e-02, 4.4482e-02,\n", + " 2.0247e-03, 6.2959e-02, -5.1738e-03, -1.0044e-02, -3.0564e-02,\n", + " 3.5267e-02, 5.5858e-02, -4.6713e-02, 3.4510e-02, 3.2958e-02,\n", + " 4.3011e-02, 2.9436e-02, -3.0316e-02, -1.7111e-02, 7.3749e-02,\n", + " -5.4791e-02, 2.7752e-02, 6.2016e-03, 1.5880e-02, 3.4298e-02,\n", + " -5.1575e-03, 2.3508e-02, 7.5314e-02, 1.9284e-02, 3.3620e-02,\n", + " 5.0910e-02, 1.5250e-01, 1.6421e-02, 2.7053e-02, 3.7516e-02,\n", + " 2.1855e-02, 5.6633e-02, -3.9575e-02, 7.1231e-02, -5.4138e-02,\n", + " 1.0377e-03, 2.1185e-02, -3.5631e-02, 1.0902e-01, 2.7653e-03,\n", + " 3.1400e-02, 1.3842e-03, -3.4574e-02, -4.5928e-02, 2.8808e-02,\n", + " 7.1690e-03, 4.8468e-02, 2.6102e-02, -9.4407e-03, 2.8217e-02,\n", + " 3.4872e-02, 3.6910e-02, -8.5895e-03, -3.5321e-02, -2.4786e-02,\n", + " -1.9192e-02, 3.8071e-02, 5.9965e-02, -4.2229e-02],\n", + " [ 8.6439e-02, 1.0276e-01, 5.3945e-03, 2.0444e-03, -9.9633e-03,\n", + " 2.5385e-02, 4.9288e-02, -3.0627e-02, 6.8725e-02, 1.0137e-02,\n", + " 7.7540e-02, -9.0081e-02, 6.1062e-03, -5.6990e-02, 1.4172e-02,\n", + " 2.8049e-02, -8.6847e-02, 7.6440e-02, -1.0349e-01, -6.7744e-02,\n", + " 6.9995e-02, 8.4425e-02, -7.2492e-03, 1.0477e-02, 1.3402e-02,\n", + " 6.7758e-02, -9.4209e-02, -3.7169e-02, 5.2262e-02, -3.1085e-02,\n", + " -9.6341e-02, 1.5772e-02, 2.5787e-02, 7.8524e-02, 7.8995e-02,\n", + " 1.9152e-02, 1.6436e-02, 3.1008e-03, 3.8131e-02, 2.3709e-02,\n", + " 1.0539e-02, -4.4064e-02, 4.4174e-02, -2.5873e-02, 6.1538e-02,\n", + " -4.0543e-02, -8.6414e-02, 3.1972e-02, -8.9069e-04, -2.4444e-02,\n", + " -9.1972e-02, 2.3394e-02, -8.3029e-02, 4.4151e-02, -2.4969e-02,\n", + " 6.2302e-02, -1.3035e-03, 7.5140e-02, 2.4638e-02, -6.4724e-02,\n", + " -1.1773e-01, 3.8339e-02, -9.1177e-02, 6.3545e-02, 7.6274e-02,\n", + " -8.8024e-02, 9.5456e-03, -4.6972e-02, -8.4174e-02, 3.8882e-02,\n", + " -1.1439e-01, 6.2886e-03, -3.4936e-02, 2.3975e-02, -3.3132e-02,\n", + " -1.5724e-02, -3.7896e-02, -8.8125e-03, 7.0612e-02, 3.2807e-02,\n", + " 2.0368e-03, -1.1228e-01, 6.7971e-03, 1.2277e-02, 3.3530e-02,\n", + " -1.3620e-02, -2.2549e-02, -2.2523e-02, -2.0319e-02, 5.0430e-02,\n", + " -7.4865e-02, -8.2282e-02, 7.6596e-02, 4.9339e-02, -3.7555e-02,\n", + " 1.4464e-02, -5.7246e-02, -1.7995e-02, 1.0970e-01, 1.1946e-01,\n", + " 8.0921e-04, 6.1706e-02, 3.2632e-02, -1.3078e-01, -1.4864e-01,\n", + " -6.1623e-02, 4.3389e-02, 2.6713e-02, 1.3979e-02, -3.9400e-02,\n", + " -2.5271e-02, 3.8774e-03, 3.5866e-02, -6.1542e-02, 3.7666e-02,\n", + " 2.6756e-02, -3.8266e-02, -3.5479e-02, -2.3923e-02, 8.6798e-02,\n", + " -1.8406e-02, 7.7104e-02, 1.3987e-03, 7.0038e-02, -4.7788e-02,\n", + " -7.8982e-02, 5.1081e-02, -2.9987e-33, -3.9165e-02, -2.5621e-03,\n", + " 1.6521e-02, 9.4894e-03, -5.6622e-02, 6.5778e-02, -4.7700e-02,\n", + " 1.1166e-02, -5.7356e-02, -9.1626e-03, -2.1752e-02, -5.5953e-02,\n", + " -1.1142e-02, 9.3279e-02, 1.6677e-02, -1.3672e-02, 4.3439e-02,\n", + " 1.8724e-03, 7.2995e-03, 5.1633e-02, 4.8061e-02, 1.3534e-01,\n", + " -1.7174e-02, -1.2970e-02, -7.5011e-02, 2.6111e-02, 2.6980e-02,\n", + " 7.8309e-04, -4.8727e-02, 1.1784e-02, -4.5958e-02, -4.8321e-02,\n", + " -1.9567e-02, 1.9389e-02, 1.9881e-02, 1.6743e-02, 9.8780e-02,\n", + " -2.7409e-02, 2.3481e-02, 3.7023e-03, -6.1451e-02, -1.2123e-03,\n", + " -9.5047e-03, 9.2515e-03, 2.3844e-02, 8.6123e-02, 2.2679e-02,\n", + " 5.4515e-04, 3.4713e-02, 6.2546e-03, -6.9278e-03, 3.9240e-02,\n", + " 1.1567e-02, 3.2628e-02, 6.2216e-02, 2.7611e-02, 1.8688e-02,\n", + " 3.5581e-02, 4.1180e-02, 1.5478e-02, 4.2269e-02, 3.8225e-02,\n", + " 1.0031e-02, -2.8325e-02, 4.4705e-02, -4.1046e-02, -4.5055e-03,\n", + " -5.4473e-02, 2.6232e-02, 1.7986e-02, -1.2312e-01, -4.6695e-02,\n", + " -1.3591e-02, 6.4671e-02, 3.5735e-03, -1.2223e-02, -1.7938e-02,\n", + " -2.5550e-02, 2.3722e-02, 4.0867e-03, -6.5148e-02, 4.4365e-02,\n", + " 4.6860e-02, -3.2517e-02, 4.0227e-03, -3.9760e-03, 1.1194e-02,\n", + " -9.9560e-02, 3.3317e-02, 8.0106e-02, 9.4269e-02, -6.3829e-02,\n", + " 3.2315e-02, -5.1355e-02, -7.4988e-03, 5.3005e-34, -4.1319e-02,\n", + " 9.4965e-02, -1.0640e-01, 4.9659e-02, -3.4191e-02, -3.1675e-02,\n", + " -1.7156e-02, 1.7009e-03, 5.7976e-02, -1.2178e-03, -1.6854e-02,\n", + " -5.1691e-02, 5.5300e-02, -3.4265e-02, 3.0818e-02, -3.1048e-02,\n", + " 9.2753e-02, 3.7266e-02, -2.3740e-02, 4.4589e-02, 1.4615e-02,\n", + " 1.1624e-01, -5.0011e-02, 3.8872e-02, 4.2474e-03, 2.5698e-02,\n", + " 3.2724e-02, 4.2991e-02, -1.3614e-02, 2.5612e-02, 1.0626e-02,\n", + " -8.4686e-02, -9.5298e-02, 1.0840e-01, -7.5160e-02, -1.3777e-02,\n", + " 6.3734e-02, -4.4967e-03, -3.2532e-02, 6.2361e-02, 3.4805e-02,\n", + " -3.5492e-02, -2.0022e-02, 3.6661e-02, -2.4884e-02, 1.0182e-02,\n", + " -7.0123e-02, -4.3195e-02, 2.9533e-02, -2.9499e-04, -3.4539e-02,\n", + " 1.4668e-02, -9.8397e-02, -4.7049e-02, -8.8550e-03, -8.8991e-02,\n", + " 3.5100e-02, -1.2960e-01, -4.9887e-02, -6.1205e-02, -5.9780e-02,\n", + " 9.4632e-03, 4.9122e-02, -7.7503e-02, 8.0973e-02, -4.7926e-02,\n", + " 2.3438e-03, 7.5703e-02, -2.4018e-02, -1.5255e-02, 4.8674e-02,\n", + " -3.8597e-02, -7.0483e-02, -1.2035e-02, -3.8879e-02, -7.7602e-02,\n", + " -1.0724e-02, 1.0419e-02, -2.1375e-02, -9.1739e-02, -1.1135e-02,\n", + " -2.9607e-02, 2.4646e-02, 4.6571e-03, -1.6345e-02, -3.9522e-02,\n", + " 7.7337e-02, -2.8473e-02, -3.6994e-03, 8.2767e-02, -1.1041e-02,\n", + " 3.1398e-02, 5.3509e-02, 5.7515e-02, -3.1762e-02, -1.5291e-08,\n", + " -7.9966e-02, -4.7680e-02, -8.5979e-02, 5.6962e-02, -4.0887e-02,\n", + " 2.2383e-02, -4.6444e-03, -3.8013e-02, -3.1067e-02, -1.0728e-02,\n", + " 1.9770e-02, 7.7699e-03, -6.0947e-03, -3.8638e-02, 2.8027e-02,\n", + " 6.7814e-02, -2.3535e-02, 3.2175e-02, 8.0254e-03, -2.3911e-02,\n", + " -1.2200e-03, 3.1460e-02, -5.2492e-02, -8.0681e-03, 3.1477e-03,\n", + " 5.1150e-02, -4.4410e-02, 6.3601e-02, 3.8508e-02, 3.3043e-02,\n", + " -4.1873e-03, 4.9559e-02, -5.6960e-02, -6.4971e-03, -2.4979e-02,\n", + " -1.6087e-02, 6.6229e-02, -2.0631e-02, 1.0805e-01, 1.6855e-02,\n", + " 1.4381e-02, -1.3213e-02, -1.2939e-01, 6.9522e-02, -5.5577e-02,\n", + " -6.7541e-02, -5.4582e-03, -6.1359e-03, 3.9084e-02, -6.2878e-02,\n", + " 3.7406e-02, -1.1657e-02, 1.2915e-02, -5.5250e-02, 5.1608e-02,\n", + " -4.3084e-03, 5.8025e-02, 1.8694e-02, 2.2781e-02, 3.2167e-02,\n", + " 5.3798e-02, 7.0285e-02, 7.4931e-02, -8.4178e-02]])\n" + ] + } + ], + "source": [ + "from transformers import AutoTokenizer, AutoModel\n", + "import torch\n", + "import torch.nn.functional as F\n", + "\n", + "#Mean Pooling - Take attention mask into account for correct averaging\n", + "def mean_pooling(model_output, attention_mask):\n", + " token_embeddings = model_output[0] #First element of model_output contains all token embeddings\n", + " input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()\n", + " return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)\n", + "\n", + "\n", + "# Sentences we want sentence embeddings for\n", + "sentences = ['This is an example sentence', 'Each sentence is converted']\n", + "\n", + "# Load model from HuggingFace Hub\n", + "tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')\n", + "model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')\n", + "\n", + "# Tokenize sentences\n", + "encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')\n", + "\n", + "# Compute token embeddings\n", + "with torch.no_grad():\n", + " model_output = model(**encoded_input)\n", + "\n", + "# Perform pooling\n", + "sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])\n", + "\n", + "# Normalize embeddings\n", + "sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)\n", + "\n", + "print(\"Sentence embeddings:\")\n", + "print(sentence_embeddings)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv_kri_mes", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/code/nb_files/rag2.ipynb b/code/nb_files/rag2.ipynb new file mode 100644 index 00000000..c1927e74 --- /dev/null +++ b/code/nb_files/rag2.ipynb @@ -0,0 +1,292 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: langchain_groq in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (0.2.0)\n", + "Requirement already satisfied: groq<1,>=0.4.1 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from langchain_groq) (0.11.0)\n", + "Requirement already satisfied: langchain-core<0.4,>=0.3 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from langchain_groq) (0.3.9)\n", + "Requirement already satisfied: anyio<5,>=3.5.0 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from groq<1,>=0.4.1->langchain_groq) (4.6.0)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from groq<1,>=0.4.1->langchain_groq) (1.9.0)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from groq<1,>=0.4.1->langchain_groq) (0.27.2)\n", + "Requirement already satisfied: pydantic<3,>=1.9.0 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from groq<1,>=0.4.1->langchain_groq) (2.9.2)\n", + "Requirement already satisfied: sniffio in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from groq<1,>=0.4.1->langchain_groq) (1.3.1)\n", + "Requirement already satisfied: typing-extensions<5,>=4.7 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from groq<1,>=0.4.1->langchain_groq) (4.12.2)\n", + "Requirement already satisfied: PyYAML>=5.3 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from langchain-core<0.4,>=0.3->langchain_groq) (6.0.2)\n", + "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from langchain-core<0.4,>=0.3->langchain_groq) (1.33)\n", + "Requirement already satisfied: langsmith<0.2.0,>=0.1.125 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from langchain-core<0.4,>=0.3->langchain_groq) (0.1.132)\n", + "Requirement already satisfied: packaging<25,>=23.2 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from langchain-core<0.4,>=0.3->langchain_groq) (24.1)\n", + "Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.1.0 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from langchain-core<0.4,>=0.3->langchain_groq) (8.5.0)\n", + "Requirement already satisfied: idna>=2.8 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from anyio<5,>=3.5.0->groq<1,>=0.4.1->langchain_groq) (3.10)\n", + "Requirement already satisfied: exceptiongroup>=1.0.2 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from anyio<5,>=3.5.0->groq<1,>=0.4.1->langchain_groq) (1.2.2)\n", + "Requirement already satisfied: certifi in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from httpx<1,>=0.23.0->groq<1,>=0.4.1->langchain_groq) (2024.8.30)\n", + "Requirement already satisfied: httpcore==1.* in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from httpx<1,>=0.23.0->groq<1,>=0.4.1->langchain_groq) (1.0.6)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->groq<1,>=0.4.1->langchain_groq) (0.14.0)\n", + "Requirement already satisfied: jsonpointer>=1.9 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4,>=0.3->langchain_groq) (3.0.0)\n", + "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from langsmith<0.2.0,>=0.1.125->langchain-core<0.4,>=0.3->langchain_groq) (3.10.7)\n", + "Requirement already satisfied: requests<3,>=2 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from langsmith<0.2.0,>=0.1.125->langchain-core<0.4,>=0.3->langchain_groq) (2.32.3)\n", + "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from langsmith<0.2.0,>=0.1.125->langchain-core<0.4,>=0.3->langchain_groq) (1.0.0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->groq<1,>=0.4.1->langchain_groq) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.23.4 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->groq<1,>=0.4.1->langchain_groq) (2.23.4)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from requests<3,>=2->langsmith<0.2.0,>=0.1.125->langchain-core<0.4,>=0.3->langchain_groq) (3.3.2)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages (from requests<3,>=2->langsmith<0.2.0,>=0.1.125->langchain-core<0.4,>=0.3->langchain_groq) (2.2.3)\n" + ] + } + ], + "source": [ + "! pip install langchain_groq" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.tools import tool\n", + "\n", + "from langchain.agents import create_tool_calling_agent\n", + "from langchain.agents import AgentExecutor\n", + "\n", + "from langchain_groq import ChatGroq\n", + "\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "from dotenv import load_dotenv\n", + "\n", + "load_dotenv()\n", + "\n", + "from langchain_community.vectorstores import FAISS\n", + "from langchain_huggingface import HuggingFaceEmbeddings\n", + "from langchain_community.document_loaders import TextLoader\n", + "from langchain.text_splitter import CharacterTextSplitter\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/kb/MES_Copilot/POC/venv_kri_mes/lib/python3.10/site-packages/sentence_transformers/cross_encoder/CrossEncoder.py:13: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from tqdm.autonotebook import tqdm, trange\n" + ] + } + ], + "source": [ + "# Load documents\n", + "loader = TextLoader(\"krishna_doc.txt\")\n", + "documents = loader.load()\n", + "\n", + "# Split text into chunks\n", + "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", + "texts = text_splitter.split_documents(documents)\n", + "\n", + "embedding_model_name = \"sentence-transformers/all-MiniLM-L6-v2\"\n", + "embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)\n", + "\n", + "vector_store = FAISS.from_documents(texts, embeddings)\n", + "retriever = vector_store.as_retriever()\n", + "prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\"system\", \"You are a helpful assistant\"),\n", + " (\"placeholder\", \"{chat_history}\"),\n", + " (\"human\", \"{input}\"),\n", + " (\"placeholder\", \"{agent_scratchpad}\")\n", + " ]\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "llm = ChatGroq(\n", + " model=\"mixtral-8x7b-32768\",\n", + " temperature=0,\n", + " max_tokens=None,\n", + " timeout=None,\n", + " max_retries=2\n", + ")\n", + "\n", + "# llm = ChatGroq(model=\"llama3-70b-8192\", temperature=1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "@tool\n", + "def retrieve_documents(query: str) -> str:\n", + " \"\"\"Retrieve relevant documents based on the query\"\"\"\n", + " docs = retriever.invoke(query)\n", + " return \"\\n\".join([doc.page_content for doc in docs])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m\n", + "Invoking: `retrieve_documents` with `{'query': 'who is Krishna'}`\n", + "\n", + "\n", + "\u001b[0m\u001b[36;1m\u001b[1;3mKrishna Dipayan Bhunia is a Senior Software Engineer.\n", + "He works in Capgemini as a Senior Data Engineer.\n", + "In 2012 he was working for Godrej and Boyce as Full stack Developer.\u001b[0m\u001b[32;1m\u001b[1;3mKrishna Dipayan Bhunia is a Senior Software Engineer who works in Capgemini as a Senior Data Engineer. In 2012, he was working for Godrej and Boyce as a Full stack Developer.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "{'input': 'Retrieve relevant documents for: who is Krishna?',\n", + " 'output': 'Krishna Dipayan Bhunia is a Senior Software Engineer who works in Capgemini as a Senior Data Engineer. In 2012, he was working for Godrej and Boyce as a Full stack Developer.'}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tools = [retrieve_documents]\n", + "query = \"who is Krishna?\"\n", + "\n", + "agent = create_tool_calling_agent(llm, tools, prompt)\n", + "agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)\n", + "agent_executor.invoke({\"input\": f\"Retrieve relevant documents for: {query}\"})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m\n", + "Invoking: `retrieve_documents` with `{'query': 'where Krishna works'}`\n", + "\n", + "\n", + "\u001b[0m\u001b[36;1m\u001b[1;3mKrishna Dipayan Bhunia is a Senior Software Engineer.\n", + "He works in Capgemini as a Senior Data Engineer.\n", + "In 2012 he was working for Godrej and Boyce as Full stack Developer.\u001b[0m\u001b[32;1m\u001b[1;3mKrishna Dipayan Bhunia works at Capgemini as a Senior Data Engineer.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "{'input': 'Retrieve relevant documents for: Pls answer in one word, where Krishna works?',\n", + " 'output': 'Krishna Dipayan Bhunia works at Capgemini as a Senior Data Engineer.'}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tools = [retrieve_documents]\n", + "query = \"Pls answer in one word, where Krishna works?\"\n", + "\n", + "agent = create_tool_calling_agent(llm, tools, prompt)\n", + "agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)\n", + "agent_executor.invoke({\"input\": f\"Retrieve relevant documents for: {query}\"})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m\n", + "Invoking: `retrieve_documents` with `{'query': 'Krishna 2012 work location'}`\n", + "\n", + "\n", + "\u001b[0m\u001b[36;1m\u001b[1;3mKrishna Dipayan Bhunia is a Senior Software Engineer.\n", + "He works in Capgemini as a Senior Data Engineer.\n", + "In 2012 he was working for Godrej and Boyce as Full stack Developer.\u001b[0m\u001b[32;1m\u001b[1;3mKrishna Dipayan Bhunia was working for Godrej and Boyce as a Full stack Developer in 2012.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "{'input': 'Retrieve relevant documents for: where Krishna was working on 2012',\n", + " 'output': 'Krishna Dipayan Bhunia was working for Godrej and Boyce as a Full stack Developer in 2012.'}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tools = [retrieve_documents]\n", + "query = \"where Krishna was working on 2012\"\n", + "\n", + "agent = create_tool_calling_agent(llm, tools, prompt)\n", + "agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)\n", + "agent_executor.invoke({\"input\": f\"Retrieve relevant documents for: {query}\"})\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv_kri_mes", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/code/nb_files/rag2_pdf.ipynb b/code/nb_files/rag2_pdf.ipynb new file mode 100644 index 00000000..5c814f39 --- /dev/null +++ b/code/nb_files/rag2_pdf.ipynb @@ -0,0 +1,1331 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/bin/bash: /home/kb/MES/MES_POC/POC/venv_kri_mes/bin/pip: /home/kb/MES_Copilot/POC/venv_kri_mes/bin/python: bad interpreter: No such file or directory\n", + "/bin/bash: /home/kb/MES/MES_POC/POC/venv_kri_mes/bin/pip: /home/kb/MES_Copilot/POC/venv_kri_mes/bin/python: bad interpreter: No such file or directory\n" + ] + } + ], + "source": [ + "! pip install langchain_groq\n", + "! pip install pymupdf\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.tools import tool\n", + "from langchain.agents import create_tool_calling_agent, AgentExecutor\n", + "from langchain_groq import ChatGroq\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "from dotenv import load_dotenv\n", + "\n", + "load_dotenv()\n", + "\n", + "from langchain_community.vectorstores import FAISS\n", + "from langchain_huggingface import HuggingFaceEmbeddings\n", + "from langchain_community.document_loaders import TextLoader\n", + "from langchain.text_splitter import CharacterTextSplitter\n", + "\n", + "from langchain_community.document_loaders import PyMuPDFLoader" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/kb/MES/MES_POC/POC/venv_kri_mes/lib/python3.10/site-packages/sentence_transformers/cross_encoder/CrossEncoder.py:13: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from tqdm.autonotebook import tqdm, trange\n" + ] + } + ], + "source": [ + "# Load documents\n", + "pdf_loader = PyMuPDFLoader(\"krishna_resume.pdf\")\n", + "documents = pdf_loader.load()\n", + "\n", + "# Split text into chunks\n", + "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", + "texts = text_splitter.split_documents(documents)\n", + "\n", + "embedding_model_name = \"sentence-transformers/all-MiniLM-L6-v2\"\n", + "embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)\n", + "\n", + "vector_store = FAISS.from_documents(texts, embeddings)\n", + "retriever = vector_store.as_retriever()\n", + "prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\"system\", \"You are a helpful assistant\"),\n", + " (\"placeholder\", \"{chat_history}\"),\n", + " (\"human\", \"{input}\"),\n", + " (\"placeholder\", \"{agent_scratchpad}\")\n", + " ]\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# llm = ChatGroq(\n", + "# model=\"mixtral-8x7b-32768\",\n", + "# temperature=0,\n", + "# max_tokens=None,\n", + "# timeout=None,\n", + "# max_retries=2\n", + "# )\n", + "\n", + "\n", + "llm = ChatGroq(model=\"llama3-70b-8192\", temperature=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "@tool\n", + "def retrieve_documents(query: str) -> str:\n", + " \"\"\"Retrieve relevant documents based on the query\"\"\"\n", + " docs = retriever.invoke(query)\n", + " return \"\\n\".join([doc.page_content for doc in docs])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m\n", + "Invoking: `retrieve_documents` with `{'query': 'who is Krishna?'}`\n", + "\n", + "\n", + "\u001b[0m\u001b[36;1m\u001b[1;3mKRISHNA DIPAYAN BHUNIA \n", + "Senior Software Engineer | Senior Data Engineer | Python Developer \n", + "+91 865 200 7894 \n", + " \n", + "Github Link \n", + "LinkedIn \n", + " \n", + "kri.career@gmail.com \n", + "SUMMARY \n", + "● Seasoned Software Engineer/Full Stack Developer with 12+ years of experience across industries like CRM, Oil & Gas, Financial, and FMCG. Over \n", + "6 years of expertise in Python as a Data Engineer/Scientist, specializing in data engineering, analysis, and visualization. \n", + "● Proficient in writing clean, production-level Python code, with strong skills in Object-Oriented Programming, DSA, and relational databases. Used \n", + "Python libraries such as NumPy, and Pandas for advanced data analysis and engineering stuff. \n", + "● Experienced in designing software solutions using Python, C#, and back-end technologies. Adept in cloud platforms (AWS, Azure), containerization \n", + "(Docker), and database management (SQL, No-SQL), with a strong ability to analyze algorithms and collaborate effectively. \n", + "EDUCATION \n", + "● M.TECH (Computer Eng. with Specialization in Software Eng.) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Aug 2016 – Jun 2018 \n", + "Veermata Jijabai Technological Institute (VJTI), Mumbai \n", + "● B.E Computer Engineering (University of Mumbai) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Aug 2008 – Jun 2012 \n", + "TECHNICAL SKILLS \n", + "● Languages & Frameworks: Python, NumPy, Pandas, Java, JavaScript (jQuery), MS SQL, PL-SQL, Oracle, C# (C-Sharp), Docker, Github. \n", + "● Data Engineering & Analysis: Data Science and Data engineering processes, Data Analysis, Classification, Regression Analysis, Cluster Analysis, \n", + "Data Visualization, Statistics, AWS (Amazon Web Services), Azure. \n", + "● Databases: MS SQL Server (2019/2017), MySQL, MongoDB, Oracle 19c. \n", + "● Expertise: DSA (Data Structures & Algorithms), Algorithm Analysis (Space & Time Complexity), Distributed Systems & Architecture. \n", + "WORK EXPERIENCE & PROJECTS \n", + "Senior Data Engineer in Capgemini \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Oct 2021 – Present \n", + "Client - P&G(Procter & Gamble) - (Role - Data Engineer, Developer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Pune(Remote) \n", + "● Technical Stack: Python, Pandas, NumPy, MS-SQL 2022, JSON, VS Code, Azure, Azure Pipelines, Docker, GitHub. \n", + "● Proficient in Python for data engineering pipelines. \n", + "● Experienced with Pandas, NumPy for data processing and analysis. \n", + "● Skilled in client communication to gather data and project requirements. \n", + "● Set up and configured Azure pipelines for CI/CD of data pipelines. \n", + "● Created and optimized MS-SQL tables for efficient data storage. \n", + "● Populated metrics to Grafana Dashboard from manufacturing data. \n", + "● Designed ETL pipelines for data extraction from various sources. \n", + "● Applied statistical techniques and optimization for data contextualization. \n", + "● Delivered pipelines for leakage, defect detection, and insights. \n", + "Key Achievement - With accommodating data from multiple sources makes it easy to contextualize data for easy analysis purposes, which improves \n", + "the manufacturing lines are more adhered to not making defects/leakages in product manufacturing. The defects/leakages have been reduced to half \n", + "of the previous defects identified. \n", + "Software Engineer and Data Analyst in Larsen and Toubro Technological Services worked in Oil & Gas Domain \n", + " \n", + "Jan 2020 – Oct 2021 \n", + " (Role – Data Engineer, Developer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Vadodara \n", + " (Python, NumPy, Pandas, SciPy, MS SQL Server 2017, Oracle 11G and Oracle 10G, xlrd, xlwt, openpyxl (Anaconda Navigator Framework)). \n", + "Plant Portal Data Analysis [ Covestro, IPEP (Data analytics team)] \n", + "● Implementation for the process of Analysis (Deduplication, Data Seagration, File Reference Missing in Smart Plant portal, finding different \n", + "equipment properties in plant portal, Different levels of data missing. Different file formats, Segregation and separation of different language \n", + "formats, metadata, and migrated documents in smart plant portal). \n", + "● Used Task, Thread and Parallel process and optimized codes with Singleton and Factory Design pattern. Excel dynamic read and write with \n", + "formatting using closedXML, OpenXML, EPPlus, Excel Data Reader. \n", + "● Key Achievement – Computation has reduced time significantly for a large dataset and customer delivery satisfaction was achieved on-time. \n", + "SPI (Smart Plant Integration) and SPEC [ Covestro, IPEP (Data analytics team)] \n", + "● Data Comparison of migrating data from oracle to SQL Server. Generation of Excel Data and its comparison using EPPlus and generating reports. \n", + "Created Custom controls and User controls, Delegation of custom and user controls, Background task and multithreading with parameterized \n", + "arguments, async and await tasks, a comparison using multi and different primary keys also with dynamic and different key columns. Data view \n", + "sync. Comparison checks for BLOB data and other Hash File data. \n", + "● Implemented Task, Thread and Parallel process and optimized codes with Singleton and Factory Design pattern. Excel dynamic read and write \n", + "with formatting using closedXML, EPPlus, ExcelDataReader and Dataset in Visual Studio. Logging facility in python and used data frames. \n", + "● Key Achievement – With multithreading and parallel processing output time has been significantly improved by more than 90%. Worked closely \n", + "with cross-functional teams including plant engineers, data owners and site engineers to design, build and deliver end-to end solutions to their \n", + "individual data conversion and wrangling problems. \n", + "Software Engineer in Quantum Mutual Fund worked in Financial Domain \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Aug 2018 - Jan 2020 \n", + "(Role – Full Stack Developer/ Data Engineer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Mumba \n", + "Invest Online (INON) - Backend Developer \n", + "● Developed business logic for investment in Q-MF & transaction, Re-Purchase, Redemption, Switch, New Purchase, STP, SWP, SIP. \n", + "● Optimized backend SQL code for Investors, used to do KYC according to their citizenship with different criteria & process form in different queues. \n", + "Software Engineer in Smart Solutions worked in CRM, FMCG Domain \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Oct 2013 - July 2018 \n", + "(Role – Full Stack Developer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Mumbai \n", + "Inventory management and services - Backend Developer \n", + "● Capturing, managing, and maintaining inventories and managing. Assigning blocks and holding inventories. \n", + "Graduate Engineer Trainee in Godrej & Boyce worked in CRM Domain \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Oct 2012 - Sept 2013 \n", + "(Role – Full Stack Developer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Mumbai \n", + "Timespan- Attendance and Leave management system, Assets management system & Drawer depository safe - Application \n", + "● Capturing, managing, and maintaining attendance and leaves for the employee in the company. Migrating assets and system information to \n", + "standard format. \n", + "CERTIFICATION AND LINKS (Recent) \n", + "● LinkedIn & Pluralsight Certificates - #Using Python with Excel #Data Wrangling with Pandas for Data Engineers\u001b[0m\u001b[32;1m\u001b[1;3m\n", + "Invoking: `retrieve_documents` with `{'query': 'Krishna'}`\n", + "\n", + "\n", + "\u001b[0m\u001b[36;1m\u001b[1;3mKRISHNA DIPAYAN BHUNIA \n", + "Senior Software Engineer | Senior Data Engineer | Python Developer \n", + "+91 865 200 7894 \n", + " \n", + "Github Link \n", + "LinkedIn \n", + " \n", + "kri.career@gmail.com \n", + "SUMMARY \n", + "● Seasoned Software Engineer/Full Stack Developer with 12+ years of experience across industries like CRM, Oil & Gas, Financial, and FMCG. Over \n", + "6 years of expertise in Python as a Data Engineer/Scientist, specializing in data engineering, analysis, and visualization. \n", + "● Proficient in writing clean, production-level Python code, with strong skills in Object-Oriented Programming, DSA, and relational databases. Used \n", + "Python libraries such as NumPy, and Pandas for advanced data analysis and engineering stuff. \n", + "● Experienced in designing software solutions using Python, C#, and back-end technologies. Adept in cloud platforms (AWS, Azure), containerization \n", + "(Docker), and database management (SQL, No-SQL), with a strong ability to analyze algorithms and collaborate effectively. \n", + "EDUCATION \n", + "● M.TECH (Computer Eng. with Specialization in Software Eng.) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Aug 2016 – Jun 2018 \n", + "Veermata Jijabai Technological Institute (VJTI), Mumbai \n", + "● B.E Computer Engineering (University of Mumbai) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Aug 2008 – Jun 2012 \n", + "TECHNICAL SKILLS \n", + "● Languages & Frameworks: Python, NumPy, Pandas, Java, JavaScript (jQuery), MS SQL, PL-SQL, Oracle, C# (C-Sharp), Docker, Github. \n", + "● Data Engineering & Analysis: Data Science and Data engineering processes, Data Analysis, Classification, Regression Analysis, Cluster Analysis, \n", + "Data Visualization, Statistics, AWS (Amazon Web Services), Azure. \n", + "● Databases: MS SQL Server (2019/2017), MySQL, MongoDB, Oracle 19c. \n", + "● Expertise: DSA (Data Structures & Algorithms), Algorithm Analysis (Space & Time Complexity), Distributed Systems & Architecture. \n", + "WORK EXPERIENCE & PROJECTS \n", + "Senior Data Engineer in Capgemini \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Oct 2021 – Present \n", + "Client - P&G(Procter & Gamble) - (Role - Data Engineer, Developer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Pune(Remote) \n", + "● Technical Stack: Python, Pandas, NumPy, MS-SQL 2022, JSON, VS Code, Azure, Azure Pipelines, Docker, GitHub. \n", + "● Proficient in Python for data engineering pipelines. \n", + "● Experienced with Pandas, NumPy for data processing and analysis. \n", + "● Skilled in client communication to gather data and project requirements. \n", + "● Set up and configured Azure pipelines for CI/CD of data pipelines. \n", + "● Created and optimized MS-SQL tables for efficient data storage. \n", + "● Populated metrics to Grafana Dashboard from manufacturing data. \n", + "● Designed ETL pipelines for data extraction from various sources. \n", + "● Applied statistical techniques and optimization for data contextualization. \n", + "● Delivered pipelines for leakage, defect detection, and insights. \n", + "Key Achievement - With accommodating data from multiple sources makes it easy to contextualize data for easy analysis purposes, which improves \n", + "the manufacturing lines are more adhered to not making defects/leakages in product manufacturing. The defects/leakages have been reduced to half \n", + "of the previous defects identified. \n", + "Software Engineer and Data Analyst in Larsen and Toubro Technological Services worked in Oil & Gas Domain \n", + " \n", + "Jan 2020 – Oct 2021 \n", + " (Role – Data Engineer, Developer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Vadodara \n", + " (Python, NumPy, Pandas, SciPy, MS SQL Server 2017, Oracle 11G and Oracle 10G, xlrd, xlwt, openpyxl (Anaconda Navigator Framework)). \n", + "Plant Portal Data Analysis [ Covestro, IPEP (Data analytics team)] \n", + "● Implementation for the process of Analysis (Deduplication, Data Seagration, File Reference Missing in Smart Plant portal, finding different \n", + "equipment properties in plant portal, Different levels of data missing. Different file formats, Segregation and separation of different language \n", + "formats, metadata, and migrated documents in smart plant portal). \n", + "● Used Task, Thread and Parallel process and optimized codes with Singleton and Factory Design pattern. Excel dynamic read and write with \n", + "formatting using closedXML, OpenXML, EPPlus, Excel Data Reader. \n", + "● Key Achievement – Computation has reduced time significantly for a large dataset and customer delivery satisfaction was achieved on-time. \n", + "SPI (Smart Plant Integration) and SPEC [ Covestro, IPEP (Data analytics team)] \n", + "● Data Comparison of migrating data from oracle to SQL Server. Generation of Excel Data and its comparison using EPPlus and generating reports. \n", + "Created Custom controls and User controls, Delegation of custom and user controls, Background task and multithreading with parameterized \n", + "arguments, async and await tasks, a comparison using multi and different primary keys also with dynamic and different key columns. Data view \n", + "sync. Comparison checks for BLOB data and other Hash File data. \n", + "● Implemented Task, Thread and Parallel process and optimized codes with Singleton and Factory Design pattern. Excel dynamic read and write \n", + "with formatting using closedXML, EPPlus, ExcelDataReader and Dataset in Visual Studio. Logging facility in python and used data frames. \n", + "● Key Achievement – With multithreading and parallel processing output time has been significantly improved by more than 90%. Worked closely \n", + "with cross-functional teams including plant engineers, data owners and site engineers to design, build and deliver end-to end solutions to their \n", + "individual data conversion and wrangling problems. \n", + "Software Engineer in Quantum Mutual Fund worked in Financial Domain \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Aug 2018 - Jan 2020 \n", + "(Role – Full Stack Developer/ Data Engineer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Mumba \n", + "Invest Online (INON) - Backend Developer \n", + "● Developed business logic for investment in Q-MF & transaction, Re-Purchase, Redemption, Switch, New Purchase, STP, SWP, SIP. \n", + "● Optimized backend SQL code for Investors, used to do KYC according to their citizenship with different criteria & process form in different queues. \n", + "Software Engineer in Smart Solutions worked in CRM, FMCG Domain \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Oct 2013 - July 2018 \n", + "(Role – Full Stack Developer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Mumbai \n", + "Inventory management and services - Backend Developer \n", + "● Capturing, managing, and maintaining inventories and managing. Assigning blocks and holding inventories. \n", + "Graduate Engineer Trainee in Godrej & Boyce worked in CRM Domain \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Oct 2012 - Sept 2013 \n", + "(Role – Full Stack Developer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Mumbai \n", + "Timespan- Attendance and Leave management system, Assets management system & Drawer depository safe - Application \n", + "● Capturing, managing, and maintaining attendance and leaves for the employee in the company. Migrating assets and system information to \n", + "standard format. \n", + "CERTIFICATION AND LINKS (Recent) \n", + "● LinkedIn & Pluralsight Certificates - #Using Python with Excel #Data Wrangling with Pandas for Data Engineers\u001b[0m\u001b[32;1m\u001b[1;3mKrishna Dipayan Bhunia is a seasoned software engineer and data engineer with over 12 years of experience across various industries. He has expertise in Python, data engineering, analysis, and visualization. He has worked on several projects, including data pipelines, ETL, and data visualization, and has experience with cloud platforms, containerization, and database management.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "{'input': 'Retrieve relevant documents for: who is Krishna?',\n", + " 'output': 'Krishna Dipayan Bhunia is a seasoned software engineer and data engineer with over 12 years of experience across various industries. He has expertise in Python, data engineering, analysis, and visualization. He has worked on several projects, including data pipelines, ETL, and data visualization, and has experience with cloud platforms, containerization, and database management.'}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tools = [retrieve_documents]\n", + "query = \"who is Krishna?\"\n", + "\n", + "agent = create_tool_calling_agent(llm, tools, prompt)\n", + "agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)\n", + "agent_executor.invoke({\"input\": f\"Retrieve relevant documents for: {query}\"})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m\n", + "Invoking: `retrieve_documents` with `{'query': 'In which company Krishna works?'}`\n", + "\n", + "\n", + "\u001b[0m\u001b[36;1m\u001b[1;3mKRISHNA DIPAYAN BHUNIA \n", + "Senior Software Engineer | Senior Data Engineer | Python Developer \n", + "+91 865 200 7894 \n", + " \n", + "Github Link \n", + "LinkedIn \n", + " \n", + "kri.career@gmail.com \n", + "SUMMARY \n", + "● Seasoned Software Engineer/Full Stack Developer with 12+ years of experience across industries like CRM, Oil & Gas, Financial, and FMCG. Over \n", + "6 years of expertise in Python as a Data Engineer/Scientist, specializing in data engineering, analysis, and visualization. \n", + "● Proficient in writing clean, production-level Python code, with strong skills in Object-Oriented Programming, DSA, and relational databases. Used \n", + "Python libraries such as NumPy, and Pandas for advanced data analysis and engineering stuff. \n", + "● Experienced in designing software solutions using Python, C#, and back-end technologies. Adept in cloud platforms (AWS, Azure), containerization \n", + "(Docker), and database management (SQL, No-SQL), with a strong ability to analyze algorithms and collaborate effectively. \n", + "EDUCATION \n", + "● M.TECH (Computer Eng. with Specialization in Software Eng.) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Aug 2016 – Jun 2018 \n", + "Veermata Jijabai Technological Institute (VJTI), Mumbai \n", + "● B.E Computer Engineering (University of Mumbai) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Aug 2008 – Jun 2012 \n", + "TECHNICAL SKILLS \n", + "● Languages & Frameworks: Python, NumPy, Pandas, Java, JavaScript (jQuery), MS SQL, PL-SQL, Oracle, C# (C-Sharp), Docker, Github. \n", + "● Data Engineering & Analysis: Data Science and Data engineering processes, Data Analysis, Classification, Regression Analysis, Cluster Analysis, \n", + "Data Visualization, Statistics, AWS (Amazon Web Services), Azure. \n", + "● Databases: MS SQL Server (2019/2017), MySQL, MongoDB, Oracle 19c. \n", + "● Expertise: DSA (Data Structures & Algorithms), Algorithm Analysis (Space & Time Complexity), Distributed Systems & Architecture. \n", + "WORK EXPERIENCE & PROJECTS \n", + "Senior Data Engineer in Capgemini \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Oct 2021 – Present \n", + "Client - P&G(Procter & Gamble) - (Role - Data Engineer, Developer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Pune(Remote) \n", + "● Technical Stack: Python, Pandas, NumPy, MS-SQL 2022, JSON, VS Code, Azure, Azure Pipelines, Docker, GitHub. \n", + "● Proficient in Python for data engineering pipelines. \n", + "● Experienced with Pandas, NumPy for data processing and analysis. \n", + "● Skilled in client communication to gather data and project requirements. \n", + "● Set up and configured Azure pipelines for CI/CD of data pipelines. \n", + "● Created and optimized MS-SQL tables for efficient data storage. \n", + "● Populated metrics to Grafana Dashboard from manufacturing data. \n", + "● Designed ETL pipelines for data extraction from various sources. \n", + "● Applied statistical techniques and optimization for data contextualization. \n", + "● Delivered pipelines for leakage, defect detection, and insights. \n", + "Key Achievement - With accommodating data from multiple sources makes it easy to contextualize data for easy analysis purposes, which improves \n", + "the manufacturing lines are more adhered to not making defects/leakages in product manufacturing. The defects/leakages have been reduced to half \n", + "of the previous defects identified. \n", + "Software Engineer and Data Analyst in Larsen and Toubro Technological Services worked in Oil & Gas Domain \n", + " \n", + "Jan 2020 – Oct 2021 \n", + " (Role – Data Engineer, Developer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Vadodara \n", + " (Python, NumPy, Pandas, SciPy, MS SQL Server 2017, Oracle 11G and Oracle 10G, xlrd, xlwt, openpyxl (Anaconda Navigator Framework)). \n", + "Plant Portal Data Analysis [ Covestro, IPEP (Data analytics team)] \n", + "● Implementation for the process of Analysis (Deduplication, Data Seagration, File Reference Missing in Smart Plant portal, finding different \n", + "equipment properties in plant portal, Different levels of data missing. Different file formats, Segregation and separation of different language \n", + "formats, metadata, and migrated documents in smart plant portal). \n", + "● Used Task, Thread and Parallel process and optimized codes with Singleton and Factory Design pattern. Excel dynamic read and write with \n", + "formatting using closedXML, OpenXML, EPPlus, Excel Data Reader. \n", + "● Key Achievement – Computation has reduced time significantly for a large dataset and customer delivery satisfaction was achieved on-time. \n", + "SPI (Smart Plant Integration) and SPEC [ Covestro, IPEP (Data analytics team)] \n", + "● Data Comparison of migrating data from oracle to SQL Server. Generation of Excel Data and its comparison using EPPlus and generating reports. \n", + "Created Custom controls and User controls, Delegation of custom and user controls, Background task and multithreading with parameterized \n", + "arguments, async and await tasks, a comparison using multi and different primary keys also with dynamic and different key columns. Data view \n", + "sync. Comparison checks for BLOB data and other Hash File data. \n", + "● Implemented Task, Thread and Parallel process and optimized codes with Singleton and Factory Design pattern. Excel dynamic read and write \n", + "with formatting using closedXML, EPPlus, ExcelDataReader and Dataset in Visual Studio. Logging facility in python and used data frames. \n", + "● Key Achievement – With multithreading and parallel processing output time has been significantly improved by more than 90%. Worked closely \n", + "with cross-functional teams including plant engineers, data owners and site engineers to design, build and deliver end-to end solutions to their \n", + "individual data conversion and wrangling problems. \n", + "Software Engineer in Quantum Mutual Fund worked in Financial Domain \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Aug 2018 - Jan 2020 \n", + "(Role – Full Stack Developer/ Data Engineer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Mumba \n", + "Invest Online (INON) - Backend Developer \n", + "● Developed business logic for investment in Q-MF & transaction, Re-Purchase, Redemption, Switch, New Purchase, STP, SWP, SIP. \n", + "● Optimized backend SQL code for Investors, used to do KYC according to their citizenship with different criteria & process form in different queues. \n", + "Software Engineer in Smart Solutions worked in CRM, FMCG Domain \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Oct 2013 - July 2018 \n", + "(Role – Full Stack Developer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Mumbai \n", + "Inventory management and services - Backend Developer \n", + "● Capturing, managing, and maintaining inventories and managing. Assigning blocks and holding inventories. \n", + "Graduate Engineer Trainee in Godrej & Boyce worked in CRM Domain \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Oct 2012 - Sept 2013 \n", + "(Role – Full Stack Developer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Mumbai \n", + "Timespan- Attendance and Leave management system, Assets management system & Drawer depository safe - Application \n", + "● Capturing, managing, and maintaining attendance and leaves for the employee in the company. Migrating assets and system information to \n", + "standard format. \n", + "CERTIFICATION AND LINKS (Recent) \n", + "● LinkedIn & Pluralsight Certificates - #Using Python with Excel #Data Wrangling with Pandas for Data Engineers\u001b[0m\u001b[32;1m\u001b[1;3mTool use failed: no valid JSON object found in message content\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "{'input': 'Retrieve relevant documents for: In which company Krishna works?',\n", + " 'output': 'Tool use failed: no valid JSON object found in message content'}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tools = [retrieve_documents]\n", + "query = \"In which company Krishna works?\"\n", + "\n", + "agent = create_tool_calling_agent(llm, tools, prompt)\n", + "agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)\n", + "agent_executor.invoke({\"input\": f\"Retrieve relevant documents for: {query}\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m\n", + "Invoking: `retrieve_documents` with `{'query': 'where Krishna works'}`\n", + "\n", + "\n", + "\u001b[0m\u001b[36;1m\u001b[1;3mKRISHNA DIPAYAN BHUNIA \n", + "Senior Software Engineer | Senior Data Engineer | Python Developer \n", + "+91 865 200 7894 \n", + " \n", + "Github Link \n", + "LinkedIn \n", + " \n", + "kri.career@gmail.com \n", + "SUMMARY \n", + "● Seasoned Software Engineer/Full Stack Developer with 12+ years of experience across industries like CRM, Oil & Gas, Financial, and FMCG. Over \n", + "6 years of expertise in Python as a Data Engineer/Scientist, specializing in data engineering, analysis, and visualization. \n", + "● Proficient in writing clean, production-level Python code, with strong skills in Object-Oriented Programming, DSA, and relational databases. Used \n", + "Python libraries such as NumPy, and Pandas for advanced data analysis and engineering stuff. \n", + "● Experienced in designing software solutions using Python, C#, and back-end technologies. Adept in cloud platforms (AWS, Azure), containerization \n", + "(Docker), and database management (SQL, No-SQL), with a strong ability to analyze algorithms and collaborate effectively. \n", + "EDUCATION \n", + "● M.TECH (Computer Eng. with Specialization in Software Eng.) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Aug 2016 – Jun 2018 \n", + "Veermata Jijabai Technological Institute (VJTI), Mumbai \n", + "● B.E Computer Engineering (University of Mumbai) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Aug 2008 – Jun 2012 \n", + "TECHNICAL SKILLS \n", + "● Languages & Frameworks: Python, NumPy, Pandas, Java, JavaScript (jQuery), MS SQL, PL-SQL, Oracle, C# (C-Sharp), Docker, Github. \n", + "● Data Engineering & Analysis: Data Science and Data engineering processes, Data Analysis, Classification, Regression Analysis, Cluster Analysis, \n", + "Data Visualization, Statistics, AWS (Amazon Web Services), Azure. \n", + "● Databases: MS SQL Server (2019/2017), MySQL, MongoDB, Oracle 19c. \n", + "● Expertise: DSA (Data Structures & Algorithms), Algorithm Analysis (Space & Time Complexity), Distributed Systems & Architecture. \n", + "WORK EXPERIENCE & PROJECTS \n", + "Senior Data Engineer in Capgemini \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Oct 2021 – Present \n", + "Client - P&G(Procter & Gamble) - (Role - Data Engineer, Developer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Pune(Remote) \n", + "● Technical Stack: Python, Pandas, NumPy, MS-SQL 2022, JSON, VS Code, Azure, Azure Pipelines, Docker, GitHub. \n", + "● Proficient in Python for data engineering pipelines. \n", + "● Experienced with Pandas, NumPy for data processing and analysis. \n", + "● Skilled in client communication to gather data and project requirements. \n", + "● Set up and configured Azure pipelines for CI/CD of data pipelines. \n", + "● Created and optimized MS-SQL tables for efficient data storage. \n", + "● Populated metrics to Grafana Dashboard from manufacturing data. \n", + "● Designed ETL pipelines for data extraction from various sources. \n", + "● Applied statistical techniques and optimization for data contextualization. \n", + "● Delivered pipelines for leakage, defect detection, and insights. \n", + "Key Achievement - With accommodating data from multiple sources makes it easy to contextualize data for easy analysis purposes, which improves \n", + "the manufacturing lines are more adhered to not making defects/leakages in product manufacturing. The defects/leakages have been reduced to half \n", + "of the previous defects identified. \n", + "Software Engineer and Data Analyst in Larsen and Toubro Technological Services worked in Oil & Gas Domain \n", + " \n", + "Jan 2020 – Oct 2021 \n", + " (Role – Data Engineer, Developer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Vadodara \n", + " (Python, NumPy, Pandas, SciPy, MS SQL Server 2017, Oracle 11G and Oracle 10G, xlrd, xlwt, openpyxl (Anaconda Navigator Framework)). \n", + "Plant Portal Data Analysis [ Covestro, IPEP (Data analytics team)] \n", + "● Implementation for the process of Analysis (Deduplication, Data Seagration, File Reference Missing in Smart Plant portal, finding different \n", + "equipment properties in plant portal, Different levels of data missing. Different file formats, Segregation and separation of different language \n", + "formats, metadata, and migrated documents in smart plant portal). \n", + "● Used Task, Thread and Parallel process and optimized codes with Singleton and Factory Design pattern. Excel dynamic read and write with \n", + "formatting using closedXML, OpenXML, EPPlus, Excel Data Reader. \n", + "● Key Achievement – Computation has reduced time significantly for a large dataset and customer delivery satisfaction was achieved on-time. \n", + "SPI (Smart Plant Integration) and SPEC [ Covestro, IPEP (Data analytics team)] \n", + "● Data Comparison of migrating data from oracle to SQL Server. Generation of Excel Data and its comparison using EPPlus and generating reports. \n", + "Created Custom controls and User controls, Delegation of custom and user controls, Background task and multithreading with parameterized \n", + "arguments, async and await tasks, a comparison using multi and different primary keys also with dynamic and different key columns. Data view \n", + "sync. Comparison checks for BLOB data and other Hash File data. \n", + "● Implemented Task, Thread and Parallel process and optimized codes with Singleton and Factory Design pattern. Excel dynamic read and write \n", + "with formatting using closedXML, EPPlus, ExcelDataReader and Dataset in Visual Studio. Logging facility in python and used data frames. \n", + "● Key Achievement – With multithreading and parallel processing output time has been significantly improved by more than 90%. Worked closely \n", + "with cross-functional teams including plant engineers, data owners and site engineers to design, build and deliver end-to end solutions to their \n", + "individual data conversion and wrangling problems. \n", + "Software Engineer in Quantum Mutual Fund worked in Financial Domain \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Aug 2018 - Jan 2020 \n", + "(Role – Full Stack Developer/ Data Engineer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Mumba \n", + "Invest Online (INON) - Backend Developer \n", + "● Developed business logic for investment in Q-MF & transaction, Re-Purchase, Redemption, Switch, New Purchase, STP, SWP, SIP. \n", + "● Optimized backend SQL code for Investors, used to do KYC according to their citizenship with different criteria & process form in different queues. \n", + "Software Engineer in Smart Solutions worked in CRM, FMCG Domain \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Oct 2013 - July 2018 \n", + "(Role – Full Stack Developer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Mumbai \n", + "Inventory management and services - Backend Developer \n", + "● Capturing, managing, and maintaining inventories and managing. Assigning blocks and holding inventories. \n", + "Graduate Engineer Trainee in Godrej & Boyce worked in CRM Domain \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Oct 2012 - Sept 2013 \n", + "(Role – Full Stack Developer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Mumbai \n", + "Timespan- Attendance and Leave management system, Assets management system & Drawer depository safe - Application \n", + "● Capturing, managing, and maintaining attendance and leaves for the employee in the company. Migrating assets and system information to \n", + "standard format. \n", + "CERTIFICATION AND LINKS (Recent) \n", + "● LinkedIn & Pluralsight Certificates - #Using Python with Excel #Data Wrangling with Pandas for Data Engineers\u001b[0m\u001b[32;1m\u001b[1;3mDwarka\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "{'input': 'Retrieve relevant documents for: Pls answer in one word, where Krishna works?',\n", + " 'output': 'Dwarka'}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tools = [retrieve_documents]\n", + "query = \"Pls answer in one word, where Krishna works?\"\n", + "\n", + "agent = create_tool_calling_agent(llm, tools, prompt)\n", + "agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)\n", + "agent_executor.invoke({\"input\": f\"Retrieve relevant documents for: {query}\"})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m\n", + "Invoking: `retrieve_documents` with `{'query': 'where Krishna was working on 2012'}`\n", + "\n", + "\n", + "\u001b[0m\u001b[36;1m\u001b[1;3mKRISHNA DIPAYAN BHUNIA \n", + "Senior Software Engineer | Senior Data Engineer | Python Developer \n", + "+91 865 200 7894 \n", + " \n", + "Github Link \n", + "LinkedIn \n", + " \n", + "kri.career@gmail.com \n", + "SUMMARY \n", + "● Seasoned Software Engineer/Full Stack Developer with 12+ years of experience across industries like CRM, Oil & Gas, Financial, and FMCG. Over \n", + "6 years of expertise in Python as a Data Engineer/Scientist, specializing in data engineering, analysis, and visualization. \n", + "● Proficient in writing clean, production-level Python code, with strong skills in Object-Oriented Programming, DSA, and relational databases. Used \n", + "Python libraries such as NumPy, and Pandas for advanced data analysis and engineering stuff. \n", + "● Experienced in designing software solutions using Python, C#, and back-end technologies. Adept in cloud platforms (AWS, Azure), containerization \n", + "(Docker), and database management (SQL, No-SQL), with a strong ability to analyze algorithms and collaborate effectively. \n", + "EDUCATION \n", + "● M.TECH (Computer Eng. with Specialization in Software Eng.) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Aug 2016 – Jun 2018 \n", + "Veermata Jijabai Technological Institute (VJTI), Mumbai \n", + "● B.E Computer Engineering (University of Mumbai) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Aug 2008 – Jun 2012 \n", + "TECHNICAL SKILLS \n", + "● Languages & Frameworks: Python, NumPy, Pandas, Java, JavaScript (jQuery), MS SQL, PL-SQL, Oracle, C# (C-Sharp), Docker, Github. \n", + "● Data Engineering & Analysis: Data Science and Data engineering processes, Data Analysis, Classification, Regression Analysis, Cluster Analysis, \n", + "Data Visualization, Statistics, AWS (Amazon Web Services), Azure. \n", + "● Databases: MS SQL Server (2019/2017), MySQL, MongoDB, Oracle 19c. \n", + "● Expertise: DSA (Data Structures & Algorithms), Algorithm Analysis (Space & Time Complexity), Distributed Systems & Architecture. \n", + "WORK EXPERIENCE & PROJECTS \n", + "Senior Data Engineer in Capgemini \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Oct 2021 – Present \n", + "Client - P&G(Procter & Gamble) - (Role - Data Engineer, Developer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Pune(Remote) \n", + "● Technical Stack: Python, Pandas, NumPy, MS-SQL 2022, JSON, VS Code, Azure, Azure Pipelines, Docker, GitHub. \n", + "● Proficient in Python for data engineering pipelines. \n", + "● Experienced with Pandas, NumPy for data processing and analysis. \n", + "● Skilled in client communication to gather data and project requirements. \n", + "● Set up and configured Azure pipelines for CI/CD of data pipelines. \n", + "● Created and optimized MS-SQL tables for efficient data storage. \n", + "● Populated metrics to Grafana Dashboard from manufacturing data. \n", + "● Designed ETL pipelines for data extraction from various sources. \n", + "● Applied statistical techniques and optimization for data contextualization. \n", + "● Delivered pipelines for leakage, defect detection, and insights. \n", + "Key Achievement - With accommodating data from multiple sources makes it easy to contextualize data for easy analysis purposes, which improves \n", + "the manufacturing lines are more adhered to not making defects/leakages in product manufacturing. The defects/leakages have been reduced to half \n", + "of the previous defects identified. \n", + "Software Engineer and Data Analyst in Larsen and Toubro Technological Services worked in Oil & Gas Domain \n", + " \n", + "Jan 2020 – Oct 2021 \n", + " (Role – Data Engineer, Developer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Vadodara \n", + " (Python, NumPy, Pandas, SciPy, MS SQL Server 2017, Oracle 11G and Oracle 10G, xlrd, xlwt, openpyxl (Anaconda Navigator Framework)). \n", + "Plant Portal Data Analysis [ Covestro, IPEP (Data analytics team)] \n", + "● Implementation for the process of Analysis (Deduplication, Data Seagration, File Reference Missing in Smart Plant portal, finding different \n", + "equipment properties in plant portal, Different levels of data missing. Different file formats, Segregation and separation of different language \n", + "formats, metadata, and migrated documents in smart plant portal). \n", + "● Used Task, Thread and Parallel process and optimized codes with Singleton and Factory Design pattern. Excel dynamic read and write with \n", + "formatting using closedXML, OpenXML, EPPlus, Excel Data Reader. \n", + "● Key Achievement – Computation has reduced time significantly for a large dataset and customer delivery satisfaction was achieved on-time. \n", + "SPI (Smart Plant Integration) and SPEC [ Covestro, IPEP (Data analytics team)] \n", + "● Data Comparison of migrating data from oracle to SQL Server. Generation of Excel Data and its comparison using EPPlus and generating reports. \n", + "Created Custom controls and User controls, Delegation of custom and user controls, Background task and multithreading with parameterized \n", + "arguments, async and await tasks, a comparison using multi and different primary keys also with dynamic and different key columns. Data view \n", + "sync. Comparison checks for BLOB data and other Hash File data. \n", + "● Implemented Task, Thread and Parallel process and optimized codes with Singleton and Factory Design pattern. Excel dynamic read and write \n", + "with formatting using closedXML, EPPlus, ExcelDataReader and Dataset in Visual Studio. Logging facility in python and used data frames. \n", + "● Key Achievement – With multithreading and parallel processing output time has been significantly improved by more than 90%. Worked closely \n", + "with cross-functional teams including plant engineers, data owners and site engineers to design, build and deliver end-to end solutions to their \n", + "individual data conversion and wrangling problems. \n", + "Software Engineer in Quantum Mutual Fund worked in Financial Domain \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Aug 2018 - Jan 2020 \n", + "(Role – Full Stack Developer/ Data Engineer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Mumba \n", + "Invest Online (INON) - Backend Developer \n", + "● Developed business logic for investment in Q-MF & transaction, Re-Purchase, Redemption, Switch, New Purchase, STP, SWP, SIP. \n", + "● Optimized backend SQL code for Investors, used to do KYC according to their citizenship with different criteria & process form in different queues. \n", + "Software Engineer in Smart Solutions worked in CRM, FMCG Domain \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Oct 2013 - July 2018 \n", + "(Role – Full Stack Developer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Mumbai \n", + "Inventory management and services - Backend Developer \n", + "● Capturing, managing, and maintaining inventories and managing. Assigning blocks and holding inventories. \n", + "Graduate Engineer Trainee in Godrej & Boyce worked in CRM Domain \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Oct 2012 - Sept 2013 \n", + "(Role – Full Stack Developer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Mumbai \n", + "Timespan- Attendance and Leave management system, Assets management system & Drawer depository safe - Application \n", + "● Capturing, managing, and maintaining attendance and leaves for the employee in the company. Migrating assets and system information to \n", + "standard format. \n", + "CERTIFICATION AND LINKS (Recent) \n", + "● LinkedIn & Pluralsight Certificates - #Using Python with Excel #Data Wrangling with Pandas for Data Engineers\u001b[0m\u001b[32;1m\u001b[1;3m\n", + "Invoking: `retrieve_documents` with `{'query': 'where Krishna was working on 2012'}`\n", + "\n", + "\n", + "\u001b[0m\u001b[36;1m\u001b[1;3mKRISHNA DIPAYAN BHUNIA \n", + "Senior Software Engineer | Senior Data Engineer | Python Developer \n", + "+91 865 200 7894 \n", + " \n", + "Github Link \n", + "LinkedIn \n", + " \n", + "kri.career@gmail.com \n", + "SUMMARY \n", + "● Seasoned Software Engineer/Full Stack Developer with 12+ years of experience across industries like CRM, Oil & Gas, Financial, and FMCG. Over \n", + "6 years of expertise in Python as a Data Engineer/Scientist, specializing in data engineering, analysis, and visualization. \n", + "● Proficient in writing clean, production-level Python code, with strong skills in Object-Oriented Programming, DSA, and relational databases. Used \n", + "Python libraries such as NumPy, and Pandas for advanced data analysis and engineering stuff. \n", + "● Experienced in designing software solutions using Python, C#, and back-end technologies. Adept in cloud platforms (AWS, Azure), containerization \n", + "(Docker), and database management (SQL, No-SQL), with a strong ability to analyze algorithms and collaborate effectively. \n", + "EDUCATION \n", + "● M.TECH (Computer Eng. with Specialization in Software Eng.) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Aug 2016 – Jun 2018 \n", + "Veermata Jijabai Technological Institute (VJTI), Mumbai \n", + "● B.E Computer Engineering (University of Mumbai) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Aug 2008 – Jun 2012 \n", + "TECHNICAL SKILLS \n", + "● Languages & Frameworks: Python, NumPy, Pandas, Java, JavaScript (jQuery), MS SQL, PL-SQL, Oracle, C# (C-Sharp), Docker, Github. \n", + "● Data Engineering & Analysis: Data Science and Data engineering processes, Data Analysis, Classification, Regression Analysis, Cluster Analysis, \n", + "Data Visualization, Statistics, AWS (Amazon Web Services), Azure. \n", + "● Databases: MS SQL Server (2019/2017), MySQL, MongoDB, Oracle 19c. \n", + "● Expertise: DSA (Data Structures & Algorithms), Algorithm Analysis (Space & Time Complexity), Distributed Systems & Architecture. \n", + "WORK EXPERIENCE & PROJECTS \n", + "Senior Data Engineer in Capgemini \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Oct 2021 – Present \n", + "Client - P&G(Procter & Gamble) - (Role - Data Engineer, Developer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Pune(Remote) \n", + "● Technical Stack: Python, Pandas, NumPy, MS-SQL 2022, JSON, VS Code, Azure, Azure Pipelines, Docker, GitHub. \n", + "● Proficient in Python for data engineering pipelines. \n", + "● Experienced with Pandas, NumPy for data processing and analysis. \n", + "● Skilled in client communication to gather data and project requirements. \n", + "● Set up and configured Azure pipelines for CI/CD of data pipelines. \n", + "● Created and optimized MS-SQL tables for efficient data storage. \n", + "● Populated metrics to Grafana Dashboard from manufacturing data. \n", + "● Designed ETL pipelines for data extraction from various sources. \n", + "● Applied statistical techniques and optimization for data contextualization. \n", + "● Delivered pipelines for leakage, defect detection, and insights. \n", + "Key Achievement - With accommodating data from multiple sources makes it easy to contextualize data for easy analysis purposes, which improves \n", + "the manufacturing lines are more adhered to not making defects/leakages in product manufacturing. The defects/leakages have been reduced to half \n", + "of the previous defects identified. \n", + "Software Engineer and Data Analyst in Larsen and Toubro Technological Services worked in Oil & Gas Domain \n", + " \n", + "Jan 2020 – Oct 2021 \n", + " (Role – Data Engineer, Developer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Vadodara \n", + " (Python, NumPy, Pandas, SciPy, MS SQL Server 2017, Oracle 11G and Oracle 10G, xlrd, xlwt, openpyxl (Anaconda Navigator Framework)). \n", + "Plant Portal Data Analysis [ Covestro, IPEP (Data analytics team)] \n", + "● Implementation for the process of Analysis (Deduplication, Data Seagration, File Reference Missing in Smart Plant portal, finding different \n", + "equipment properties in plant portal, Different levels of data missing. Different file formats, Segregation and separation of different language \n", + "formats, metadata, and migrated documents in smart plant portal). \n", + "● Used Task, Thread and Parallel process and optimized codes with Singleton and Factory Design pattern. Excel dynamic read and write with \n", + "formatting using closedXML, OpenXML, EPPlus, Excel Data Reader. \n", + "● Key Achievement – Computation has reduced time significantly for a large dataset and customer delivery satisfaction was achieved on-time. \n", + "SPI (Smart Plant Integration) and SPEC [ Covestro, IPEP (Data analytics team)] \n", + "● Data Comparison of migrating data from oracle to SQL Server. Generation of Excel Data and its comparison using EPPlus and generating reports. \n", + "Created Custom controls and User controls, Delegation of custom and user controls, Background task and multithreading with parameterized \n", + "arguments, async and await tasks, a comparison using multi and different primary keys also with dynamic and different key columns. Data view \n", + "sync. Comparison checks for BLOB data and other Hash File data. \n", + "● Implemented Task, Thread and Parallel process and optimized codes with Singleton and Factory Design pattern. Excel dynamic read and write \n", + "with formatting using closedXML, EPPlus, ExcelDataReader and Dataset in Visual Studio. Logging facility in python and used data frames. \n", + "● Key Achievement – With multithreading and parallel processing output time has been significantly improved by more than 90%. Worked closely \n", + "with cross-functional teams including plant engineers, data owners and site engineers to design, build and deliver end-to end solutions to their \n", + "individual data conversion and wrangling problems. \n", + "Software Engineer in Quantum Mutual Fund worked in Financial Domain \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Aug 2018 - Jan 2020 \n", + "(Role – Full Stack Developer/ Data Engineer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Mumba \n", + "Invest Online (INON) - Backend Developer \n", + "● Developed business logic for investment in Q-MF & transaction, Re-Purchase, Redemption, Switch, New Purchase, STP, SWP, SIP. \n", + "● Optimized backend SQL code for Investors, used to do KYC according to their citizenship with different criteria & process form in different queues. \n", + "Software Engineer in Smart Solutions worked in CRM, FMCG Domain \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Oct 2013 - July 2018 \n", + "(Role – Full Stack Developer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Mumbai \n", + "Inventory management and services - Backend Developer \n", + "● Capturing, managing, and maintaining inventories and managing. Assigning blocks and holding inventories. \n", + "Graduate Engineer Trainee in Godrej & Boyce worked in CRM Domain \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "Oct 2012 - Sept 2013 \n", + "(Role – Full Stack Developer) \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " Mumbai \n", + "Timespan- Attendance and Leave management system, Assets management system & Drawer depository safe - Application \n", + "● Capturing, managing, and maintaining attendance and leaves for the employee in the company. Migrating assets and system information to \n", + "standard format. \n", + "CERTIFICATION AND LINKS (Recent) \n", + "● LinkedIn & Pluralsight Certificates - #Using Python with Excel #Data Wrangling with Pandas for Data Engineers\u001b[0m\u001b[32;1m\u001b[1;3mKrishna Dipayan Bhunia was working as a Graduate Engineer Trainee in Godrej & Boyce in the CRM domain from October 2012 to September 2013.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "{'input': 'Retrieve relevant documents for: where Krishna was working on 2012',\n", + " 'output': 'Krishna Dipayan Bhunia was working as a Graduate Engineer Trainee in Godrej & Boyce in the CRM domain from October 2012 to September 2013.'}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tools = [retrieve_documents]\n", + "query = \"where Krishna was working on 2012\"\n", + "\n", + "agent = create_tool_calling_agent(llm, tools, prompt)\n", + "agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)\n", + "agent_executor.invoke({\"input\": f\"Retrieve relevant documents for: {query}\"})\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv_kri_mes", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..0408073b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +langchain +langchain_core +langchain_groq +langchain_community +langchain_huggingface +pymupdf \ No newline at end of file