diff --git a/.github/workflows/docker-build-workflow.yaml b/.github/workflows/docker-build-workflow.yaml index f9b9a713..116bc990 100644 --- a/.github/workflows/docker-build-workflow.yaml +++ b/.github/workflows/docker-build-workflow.yaml @@ -52,11 +52,8 @@ jobs: context: . file: ${{ matrix.file }} push: true - # tags: | - # ghcr.io/${{ github.repository_owner }}/${{ matrix.image_name }}:${{ github.sha }} - # ghcr.io/${{ github.repository_owner }}/${{ matrix.image_name }}:${{ inputs.tag }} tags: | - ghcr.io/denispalnitsky/${{ matrix.image_name }}:${{ github.sha }} - ghcr.io/denispalnitsky/${{ matrix.image_name }}:${{ inputs.tag }} + ghcr.io/${{ github.repository_owner }}/${{ matrix.image_name }}:${{ github.sha }} + ghcr.io/${{ github.repository_owner }}/${{ matrix.image_name }}:${{ inputs.tag }} platforms: linux/amd64,linux/arm64 target: ${{ matrix.target }} \ No newline at end of file diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index c4861f73..3aaeaaff 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -6,8 +6,12 @@ on: tags: - '*' +permissions: + contents: read + packages: write + jobs: call-docker-build: uses: ./.github/workflows/docker-build-workflow.yaml with: - tag: ${{ github.ref_name }} \ No newline at end of file + tag: ${{ github.ref_name }} diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..fdd08177 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Danny Avila + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index b2605520..e581c6d8 100644 --- a/README.md +++ b/README.md @@ -109,7 +109,7 @@ The `ATLAS_MONGO_DB_URI` could be the same or different from what is used by Lib } ``` -Follw one of the [four documented methods](https://www.mongodb.com/docs/atlas/atlas-vector-search/create-index/#procedure) to create the vector index. +Follow one of the [four documented methods](https://www.mongodb.com/docs/atlas/atlas-vector-search/create-index/#procedure) to create the vector index. ### Cloud Installation Settings: diff --git a/main.py b/main.py index 2a79210f..f916fdd2 100644 --- a/main.py +++ b/main.py @@ -34,6 +34,7 @@ UnstructuredXMLLoader, UnstructuredRSTLoader, UnstructuredExcelLoader, + UnstructuredPowerPointLoader, ) from models import ( @@ -285,6 +286,8 @@ def get_loader(filename: str, file_content_type: str, filepath: str): loader = UnstructuredRSTLoader(filepath, mode="elements") elif file_ext == "xml": loader = UnstructuredXMLLoader(filepath) + elif file_ext == "pptx": + loader = UnstructuredPowerPointLoader(filepath) elif file_ext == "md": loader = UnstructuredMarkdownLoader(filepath) elif file_content_type == "application/epub+zip": diff --git a/requirements.txt b/requirements.txt index 37cdaa2f..edf53ce5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,3 +26,5 @@ opencv-python-headless==4.9.0.80 pymongo==4.6.3 langchain-mongodb==0.1.3 cryptography==42.0.7 +python-magic==0.4.27 +python-pptx==0.6.23 diff --git a/store_factory.py b/store_factory.py index 35b77fbd..16b81ef6 100644 --- a/store_factory.py +++ b/store_factory.py @@ -25,7 +25,7 @@ def get_vector_store( elif mode == "atlas-mongo": mongo_db = MongoClient(connection_string).get_database() mong_collection = mongo_db[collection_name] - return AtlasMongoVector(collection=mong_collection, embedding=embeddings) + return AtlasMongoVector(collection=mong_collection, embedding=embeddings, index_name=collection_name) else: raise ValueError("Invalid mode specified. Choose 'sync' or 'async'.")