From 6cb177906951fdc947de6b9d5021fbeb81a67c7e Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Tue, 16 Jun 2037 22:55:00 +0300 Subject: [PATCH 01/39] more suggestion fixing --- main/chroma.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/chroma.py b/main/chroma.py index ffaef96..1a18fe7 100644 --- a/main/chroma.py +++ b/main/chroma.py @@ -40,7 +40,7 @@ def add_text_search_suggestion(self, suggestion_query: str) -> None: subsearches = suggestion_query.split() self.desc_collection.add( documents=[suggestion_query] + subsearches, - ids=[str(hash(suggestion_query))] + ids=[str(hash(query)) for query in [suggestion_query] + subsearches] ) def get_text_search_suggestions(self, search_query: str, top_k: int = 20) -> list[str]: From 079da0d56f0eaf6551b4b0526401000b9f11ecbd Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sat, 22 Jun 2024 17:25:18 +0300 Subject: [PATCH 02/39] add whisper support to inference --- docker-compose.yml | 2 +- inference/.env.dist | 1 + inference/Dockerfile | 8 +- inference/clip.py | 83 +++--- inference/deps.py | 7 + inference/download_whisper.sh | 9 + inference/main.py | 45 ++++ inference/models.py | 10 + inference/settings.py | 1 + inference/whisper.py | 18 ++ main/.env.dist | 2 +- main/chroma.py | 7 +- main/clip.py | 22 +- main/main.py | 19 +- main/models.py | 10 +- poetry.lock | 493 +++++++++++++++++++++++++++++++++- pyproject.toml | 1 + 17 files changed, 672 insertions(+), 66 deletions(-) create mode 100755 inference/download_whisper.sh create mode 100644 inference/main.py create mode 100644 inference/models.py create mode 100644 inference/whisper.py diff --git a/docker-compose.yml b/docker-compose.yml index 234190f..de94422 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -22,7 +22,7 @@ services: context: ./inference dockerfile: Dockerfile container_name: inference - command: uvicorn clip:app --host "0.0.0.0" --port 8040 + command: uvicorn main:app --host "0.0.0.0" --port 8040 restart: unless-stopped volumes: - inference-model-data:/app/model_data diff --git a/inference/.env.dist b/inference/.env.dist index 8b47efb..71f7798 100644 --- a/inference/.env.dist +++ b/inference/.env.dist @@ -1 +1,2 @@ CLIP_MODEL=laion/CLIP-ViT-g-14-laion2B-s12B-b42K +WHISPER_PATH=/app/model_data/ggml-large-v3.bin diff --git a/inference/Dockerfile b/inference/Dockerfile index cd80f92..1386abc 100644 --- a/inference/Dockerfile +++ b/inference/Dockerfile @@ -5,11 +5,15 @@ ENV PYTHONUNBUFFERED 1 WORKDIR /app -COPY requirements.txt /app/ +COPY download_whisper.sh /app/ +RUN ./download_whisper.sh RUN apt-get update && apt-get install ffmpeg -y + +COPY requirements.txt /app/ RUN python -m pip install --upgrade pip && pip install -r requirements.txt + COPY ./ /app/ EXPOSE 8040 -CMD uvicorn clip:app --port 8040 +CMD uvicorn main:app --port 8040 diff --git a/inference/clip.py b/inference/clip.py index f361947..cb2ed82 100644 --- a/inference/clip.py +++ b/inference/clip.py @@ -1,62 +1,49 @@ -import torch -from typing import Optional -from fastapi import FastAPI, HTTPException -from fastapi.responses import JSONResponse +from dataclasses import dataclass +from typing import Callable, Literal + from PIL import Image -from pydantic import BaseModel -from deps import Model, Processor, lifespan -from frame_video import create_key_frames_for_video - -app = FastAPI(lifespan=lifespan) - -class EncodeRequest(BaseModel): - link: Optional[str] = None - description: Optional[str] = None - -@app.get("/") -async def root(): - return JSONResponse(content={"ok": True}) - -@app.post("/encode") -async def encode(request: EncodeRequest, processor: Processor, model: Model): - if not any((request.description, request.link)): - raise HTTPException( - status_code=400, detail="Please provide either 'description' as string or 'link' as video URL, or both." - ) - - text_features, image_features = None, None - - if request.description: - text_inputs = processor(text=[request.description], return_tensors="pt", padding=True) +import torch +from transformers import CLIPModel, CLIPProcessor + +from inference.frame_video import VideoFrame, create_key_frames_for_video + + +@dataclass +class CLIP: + processor: CLIPProcessor + model: CLIPModel + + _create_key_frames_for_video: Callable[[str], list[VideoFrame]] = create_key_frames_for_video + + def __call__(self, encode_source: str, encode_type: Literal["text"] | Literal["video"]) -> list[float]: + if encode_type == "text": + return self._encode_text(encode_source) + + if encode_type == "video": + return self._encode_video(encode_source) + + def _encode_text(self, description: str) -> list[float]: + text_inputs = self.processor(text=[description], return_tensors="pt", padding=True) with torch.no_grad(): - text_features = model.get_text_features(**text_inputs) + text_features = self.model.get_text_features(**text_inputs) text_features /= text_features.norm(dim=-1, keepdim=True) - - if request.link: - images = create_key_frames_for_video(request.link) + + return text_features.tolist()[0] + + def _encode_video(self, link: str) -> list[float]: + images = self._create_key_frames_for_video(link) image_inputs = [] for image in images: image = Image.open(image.file) - image_input = processor(images=image, return_tensors="pt") + image_input = self.processor(images=image, return_tensors="pt") image_inputs.append(image_input) with torch.no_grad(): - image_features = model.get_image_features(**image_inputs[0]) + image_features = self.model.get_image_features(**image_inputs[0]) for image_input in image_inputs[1:]: - image_feature = model.get_image_features(**image_input) + image_feature = self.model.get_image_features(**image_input) image_features = torch.cat((image_features, image_feature), dim=0) features = torch.mean(image_features, dim=0) features /= features.norm(dim=-1, keepdim=True) - if request.description and request.link: - text_weight = 1.0 - video_weight = 2.0 # Giving more importance to video - # Merged weighted vectors of text and video didn't work so well, leave off for now - unified_features = (text_features * text_weight + image_features * video_weight) / (text_weight + video_weight) - return {"features": image_features.tolist()[0]} - - elif request.description: - return {"features": text_features.tolist()[0]} - - elif request.link: - return {"features": image_features.tolist()[0]} + return features.tolist()[0] diff --git a/inference/deps.py b/inference/deps.py index 3f77a14..ad509c6 100644 --- a/inference/deps.py +++ b/inference/deps.py @@ -4,6 +4,7 @@ from fastapi import Depends, FastAPI, Request from transformers import CLIPModel, CLIPProcessor +from inference.whisper import WhisperService from settings import Settings @@ -17,6 +18,7 @@ async def lifespan(app: FastAPI): Settings.clip_model, cache_dir="./model_cache" ) + app.state.whisper_model = WhisperService() yield @@ -28,5 +30,10 @@ def _get_clip_processor(request: Request) -> CLIPProcessor: return request.app.state.processor +def _get_whisper(request: Request) -> WhisperService: + return request.app.state.whisper_model + + Processor = Annotated[CLIPProcessor, Depends(_get_clip_processor)] Model = Annotated[CLIPModel, Depends(_get_clip_model)] +Whisper = Annotated[WhisperService, Depends(_get_whisper)] diff --git a/inference/download_whisper.sh b/inference/download_whisper.sh new file mode 100755 index 0000000..b39d2e7 --- /dev/null +++ b/inference/download_whisper.sh @@ -0,0 +1,9 @@ +#!usr/bin/bash + +if ! test /app/model_data/ggml-large-v3.bin; then + mkdir /app/model_data + git clone https://github.com/ggerganov/whisper.cpp.git + cd whisper.cpp + bash ./models/download-ggml-model.sh large-v3 + mv ./models/ggml-large-v3.bin /app/model_data +fi diff --git a/inference/main.py b/inference/main.py new file mode 100644 index 0000000..f8edabc --- /dev/null +++ b/inference/main.py @@ -0,0 +1,45 @@ +from fastapi import FastAPI +from fastapi.responses import JSONResponse + +from inference.deps import Model, Processor, Whisper, lifespan +from inference.clip import CLIP +from inference.models import EncodeRequest, EncodeSearchRequest + +app = FastAPI(lifespan=lifespan) + +@app.get("/") +async def root(): + return JSONResponse(content={"ok": True}) + +@app.post("/encode") +async def encode( + request: EncodeRequest, + processor: Processor, + model: Model, + whisper: Whisper +): + clip = CLIP(processor=processor, model=model) + + video_features = clip(request.link, encode_type="video") + if request.description is not None: + description_features = clip(request.description, encode_type="text") + else: + description_features = None + + audio_transcription = whisper(request.link) + audio_features = clip(audio_transcription, encode_type="text") + return { + "video": video_features, + "audio": audio_features, + "description": description_features + } + +@app.post("/encode-search") +async def encode_search( + request: EncodeSearchRequest, processor: Processor, model: Model +): + clip = CLIP(processor=processor, model=model) + + features = clip(request.query, encode_type="text") + + return {"features": features} diff --git a/inference/models.py b/inference/models.py new file mode 100644 index 0000000..61888e4 --- /dev/null +++ b/inference/models.py @@ -0,0 +1,10 @@ +from pydantic import BaseModel + + +class EncodeRequest(BaseModel): + link: str + description: str | None = None + + +class EncodeSearchRequest(BaseModel): + query: str diff --git a/inference/settings.py b/inference/settings.py index 82c3549..9170c72 100644 --- a/inference/settings.py +++ b/inference/settings.py @@ -6,3 +6,4 @@ class Settings: clip_model: str = env.str("CLIP_MODEL") + whisper_path: str = env.str("WHISPER_PATH") diff --git a/inference/whisper.py b/inference/whisper.py new file mode 100644 index 0000000..8cde250 --- /dev/null +++ b/inference/whisper.py @@ -0,0 +1,18 @@ +from dataclasses import dataclass, field +from io import BytesIO + +import requests +from whisper_cpp_python import Whisper + +from inference.settings import Settings + + +@dataclass +class WhisperService: + _service: Whisper = field(default_factory=lambda: Whisper(model_path=Settings.whisper_path)) + + def __call__(self, link: str) -> str: + + video_data = BytesIO(requests.get(link).content) + data = self._service.transcribe(video_data) + return data["text"] diff --git a/main/.env.dist b/main/.env.dist index 19f980c..353c69c 100644 --- a/main/.env.dist +++ b/main/.env.dist @@ -1,3 +1,3 @@ -CLIP_URL=http://inference:8040/encode +CLIP_URL=http://inference:8040/ DB_HOST=chroma_db DB_PORT=8000 diff --git a/main/chroma.py b/main/chroma.py index 1a18fe7..97bea98 100644 --- a/main/chroma.py +++ b/main/chroma.py @@ -1,3 +1,4 @@ +from uuid import uuid4 import chromadb from chromadb.server import Settings as ChromaSettings from models import Feature @@ -25,8 +26,10 @@ def __init__( def add_feature(self, feature: Feature) -> None: self.collection.add( - ids=[feature.link], + ids=[str(uuid4())], embeddings=[feature.features], + uris=[feature.link], + metadatas=[{"feature_type": feature.feature_type}] ) def search_relevant_videos(self, search_feature: Feature, top_k: int = 100) -> list[str]: @@ -34,7 +37,7 @@ def search_relevant_videos(self, search_feature: Feature, top_k: int = 100) -> l query_embeddings=search_feature.features, n_results=top_k ) - return results['ids'][0] + return results['uris'][0] def add_text_search_suggestion(self, suggestion_query: str) -> None: subsearches = suggestion_query.split() diff --git a/main/clip.py b/main/clip.py index b94c8dd..9ae4e42 100644 --- a/main/clip.py +++ b/main/clip.py @@ -1,26 +1,34 @@ import aiohttp -from models import Video, Feature +from models import SearchFeature, Video, Feature class CLIPService: def __init__(self, url: str) -> None: self.clip_url = url - async def get_video_embedding(self, request: Video) -> Feature: + async def get_video_embeddings(self, request: Video) -> list[Feature]: async with aiohttp.ClientSession().post( - url=self.clip_url, + url=f"{self.clip_url}/encode", json=request.model_dump(mode="json") ) as resp: features = await resp.json() - return Feature(features=features['features'], link=request.link, description=request.description) + return [ + Feature( + features=v, + link=request.link, + description=request.description, + feature_type=k + ) + for k, v in features.items() + ] async def get_text_embedding( self, - request: Video, + request: SearchFeature, ) -> Feature: async with aiohttp.ClientSession().post( - self.clip_url, + f"{self.clip_url}/encode-search", json=request.model_dump(mode="json") ) as resp: features = await resp.json() - return Feature(features=features['features']) + return Feature(features=features['features'], feature_type="description") diff --git a/main/main.py b/main/main.py index 86903d5..962cc43 100644 --- a/main/main.py +++ b/main/main.py @@ -3,18 +3,30 @@ from deps import Opus, Clip, Chroma, Speller, lifespan from settings import Settings -from models import Video, Text, SuggestRequest +from models import SearchFeature, Video, Text, SuggestRequest app = FastAPI(lifespan=lifespan) + @app.post("/index") async def add_video_to_index(request: Video, clip: Clip, chroma: Chroma) -> Video: """Добавляет новое видео в хранилище - индекс""" +<<<<<<< Updated upstream feature = await clip.get_video_embedding(request) if request.description is not None: chroma.add_text_search_suggestion(suggestion_query=request.description) chroma.add_feature(feature=feature) return request.model_dump(mode="dict") +======= + features = await clip.get_video_embeddings(request) + if request.description is not None: + chroma.add_text_search_suggestion(suggestion_query=request.description) + + for feature in features: + chroma.add_feature(feature=feature) + return request + +>>>>>>> Stashed changes @app.get("/search") @cache(expire=Settings.cache_lifetime) @@ -29,8 +41,13 @@ async def search_for_related_videos( spelled_search = speller(params.text) translated_search = translator(spelled_search) search_vector = await clip.get_text_embedding( +<<<<<<< Updated upstream Video( description=translated_search +======= + SearchFeature( + query=translated_search +>>>>>>> Stashed changes ) ) return {"results": chroma.search_relevant_videos(search_feature=search_vector, top_k=params.return_amount)} diff --git a/main/models.py b/main/models.py index 633e847..e25975a 100644 --- a/main/models.py +++ b/main/models.py @@ -1,17 +1,20 @@ -from typing import Optional +from typing import Literal, Optional from pydantic import BaseModel class Video(BaseModel): """Represents a Link to Video with text description to be vectorized and added to index""" - description: Optional[str] = None - link: Optional[str] = None + link: str + description: str | None = None class Text(BaseModel): """Represents a text query to search related videos""" text: str return_amount: int = 50 +class SearchFeature(BaseModel): + query: str + class SuggestRequest(BaseModel): """Represents a text query to suggest related completions""" text: str @@ -21,3 +24,4 @@ class Feature(BaseModel): link: Optional[str] = None description: Optional[str] = None features: list[float] + feature_type: Literal["description"] | Literal["video"] | Literal["audio"] diff --git a/poetry.lock b/poetry.lock index 825512c..8e449ae 100644 --- a/poetry.lock +++ b/poetry.lock @@ -226,6 +226,20 @@ tests = ["attrs[tests-no-zope]", "zope-interface"] tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"] tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"] +[[package]] +name = "audioread" +version = "3.0.1" +description = "Multi-library, cross-platform audio decoding." +optional = false +python-versions = ">=3.6" +files = [ + {file = "audioread-3.0.1-py3-none-any.whl", hash = "sha256:4cdce70b8adc0da0a3c9e0d85fb10b3ace30fbdf8d1670fd443929b61d117c33"}, + {file = "audioread-3.0.1.tar.gz", hash = "sha256:ac5460a5498c48bdf2e8e767402583a4dcd13f4414d286f42ce4379e8b35066d"}, +] + +[package.extras] +test = ["tox"] + [[package]] name = "autocorrect" version = "2.6.1" @@ -332,6 +346,70 @@ files = [ {file = "certifi-2024.6.2.tar.gz", hash = "sha256:3cd43f1c6fa7dedc5899d69d3ad0398fd018ad1a17fba83ddaf78aa46c747516"}, ] +[[package]] +name = "cffi" +version = "1.16.0" +description = "Foreign Function Interface for Python calling C code." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cffi-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088"}, + {file = "cffi-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7"}, + {file = "cffi-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614"}, + {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743"}, + {file = "cffi-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d"}, + {file = "cffi-1.16.0-cp310-cp310-win32.whl", hash = "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a"}, + {file = "cffi-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1"}, + {file = "cffi-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404"}, + {file = "cffi-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56"}, + {file = "cffi-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e"}, + {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc"}, + {file = "cffi-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb"}, + {file = "cffi-1.16.0-cp311-cp311-win32.whl", hash = "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab"}, + {file = "cffi-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956"}, + {file = "cffi-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6"}, + {file = "cffi-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969"}, + {file = "cffi-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520"}, + {file = "cffi-1.16.0-cp312-cp312-win32.whl", hash = "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b"}, + {file = "cffi-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235"}, + {file = "cffi-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b"}, + {file = "cffi-1.16.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324"}, + {file = "cffi-1.16.0-cp38-cp38-win32.whl", hash = "sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a"}, + {file = "cffi-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36"}, + {file = "cffi-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed"}, + {file = "cffi-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4"}, + {file = "cffi-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098"}, + {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000"}, + {file = "cffi-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe"}, + {file = "cffi-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4"}, + {file = "cffi-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8"}, + {file = "cffi-1.16.0.tar.gz", hash = "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0"}, +] + +[package.dependencies] +pycparser = "*" + [[package]] name = "charset-normalizer" version = "3.3.2" @@ -550,6 +628,17 @@ humanfriendly = ">=9.1" [package.extras] cron = ["capturer (>=2.4)"] +[[package]] +name = "decorator" +version = "5.1.1" +description = "Decorators for Humans" +optional = false +python-versions = ">=3.5" +files = [ + {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, + {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, +] + [[package]] name = "deprecated" version = "1.2.14" @@ -1360,6 +1449,17 @@ MarkupSafe = ">=2.0" [package.extras] i18n = ["Babel (>=2.7)"] +[[package]] +name = "joblib" +version = "1.4.2" +description = "Lightweight pipelining with Python functions" +optional = false +python-versions = ">=3.8" +files = [ + {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"}, + {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, +] + [[package]] name = "jsonpatch" version = "1.33" @@ -1503,6 +1603,86 @@ orjson = ">=3.9.14,<4.0.0" pydantic = ">=1,<3" requests = ">=2,<3" +[[package]] +name = "lazy-loader" +version = "0.4" +description = "Makes it easy to load subpackages and functions on demand." +optional = false +python-versions = ">=3.7" +files = [ + {file = "lazy_loader-0.4-py3-none-any.whl", hash = "sha256:342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc"}, + {file = "lazy_loader-0.4.tar.gz", hash = "sha256:47c75182589b91a4e1a85a136c074285a5ad4d9f39c63e0d7fb76391c4574cd1"}, +] + +[package.dependencies] +packaging = "*" + +[package.extras] +dev = ["changelist (==0.5)"] +lint = ["pre-commit (==3.7.0)"] +test = ["pytest (>=7.4)", "pytest-cov (>=4.1)"] + +[[package]] +name = "librosa" +version = "0.10.2.post1" +description = "Python module for audio and music processing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "librosa-0.10.2.post1-py3-none-any.whl", hash = "sha256:dc882750e8b577a63039f25661b7e39ec4cfbacc99c1cffba666cd664fb0a7a0"}, + {file = "librosa-0.10.2.post1.tar.gz", hash = "sha256:cd99f16717cbcd1e0983e37308d1db46a6f7dfc2e396e5a9e61e6821e44bd2e7"}, +] + +[package.dependencies] +audioread = ">=2.1.9" +decorator = ">=4.3.0" +joblib = ">=0.14" +lazy-loader = ">=0.1" +msgpack = ">=1.0" +numba = ">=0.51.0" +numpy = ">=1.20.3,<1.22.0 || >1.22.0,<1.22.1 || >1.22.1,<1.22.2 || >1.22.2" +pooch = ">=1.1" +scikit-learn = ">=0.20.0" +scipy = ">=1.2.0" +soundfile = ">=0.12.1" +soxr = ">=0.3.2" +typing-extensions = ">=4.1.1" + +[package.extras] +display = ["matplotlib (>=3.5.0)"] +docs = ["ipython (>=7.0)", "matplotlib (>=3.5.0)", "mir-eval (>=0.5)", "numba (>=0.51)", "numpydoc", "presets", "sphinx (!=1.3.1)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.7)", "sphinx-multiversion (>=0.2.3)", "sphinx-rtd-theme (>=1.2.0)", "sphinxcontrib-svg2pdfconverter"] +tests = ["matplotlib (>=3.5.0)", "packaging (>=20.0)", "pytest", "pytest-cov", "pytest-mpl", "resampy (>=0.2.2)", "samplerate", "types-decorator"] + +[[package]] +name = "llvmlite" +version = "0.43.0" +description = "lightweight wrapper around basic LLVM functionality" +optional = false +python-versions = ">=3.9" +files = [ + {file = "llvmlite-0.43.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a289af9a1687c6cf463478f0fa8e8aa3b6fb813317b0d70bf1ed0759eab6f761"}, + {file = "llvmlite-0.43.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6d4fd101f571a31acb1559ae1af30f30b1dc4b3186669f92ad780e17c81e91bc"}, + {file = "llvmlite-0.43.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d434ec7e2ce3cc8f452d1cd9a28591745de022f931d67be688a737320dfcead"}, + {file = "llvmlite-0.43.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6912a87782acdff6eb8bf01675ed01d60ca1f2551f8176a300a886f09e836a6a"}, + {file = "llvmlite-0.43.0-cp310-cp310-win_amd64.whl", hash = "sha256:14f0e4bf2fd2d9a75a3534111e8ebeb08eda2f33e9bdd6dfa13282afacdde0ed"}, + {file = "llvmlite-0.43.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3e8d0618cb9bfe40ac38a9633f2493d4d4e9fcc2f438d39a4e854f39cc0f5f98"}, + {file = "llvmlite-0.43.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e0a9a1a39d4bf3517f2af9d23d479b4175ead205c592ceeb8b89af48a327ea57"}, + {file = "llvmlite-0.43.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1da416ab53e4f7f3bc8d4eeba36d801cc1894b9fbfbf2022b29b6bad34a7df2"}, + {file = "llvmlite-0.43.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:977525a1e5f4059316b183fb4fd34fa858c9eade31f165427a3977c95e3ee749"}, + {file = "llvmlite-0.43.0-cp311-cp311-win_amd64.whl", hash = "sha256:d5bd550001d26450bd90777736c69d68c487d17bf371438f975229b2b8241a91"}, + {file = "llvmlite-0.43.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f99b600aa7f65235a5a05d0b9a9f31150c390f31261f2a0ba678e26823ec38f7"}, + {file = "llvmlite-0.43.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:35d80d61d0cda2d767f72de99450766250560399edc309da16937b93d3b676e7"}, + {file = "llvmlite-0.43.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eccce86bba940bae0d8d48ed925f21dbb813519169246e2ab292b5092aba121f"}, + {file = "llvmlite-0.43.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df6509e1507ca0760787a199d19439cc887bfd82226f5af746d6977bd9f66844"}, + {file = "llvmlite-0.43.0-cp312-cp312-win_amd64.whl", hash = "sha256:7a2872ee80dcf6b5dbdc838763d26554c2a18aa833d31a2635bff16aafefb9c9"}, + {file = "llvmlite-0.43.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9cd2a7376f7b3367019b664c21f0c61766219faa3b03731113ead75107f3b66c"}, + {file = "llvmlite-0.43.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:18e9953c748b105668487b7c81a3e97b046d8abf95c4ddc0cd3c94f4e4651ae8"}, + {file = "llvmlite-0.43.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74937acd22dc11b33946b67dca7680e6d103d6e90eeaaaf932603bec6fe7b03a"}, + {file = "llvmlite-0.43.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc9efc739cc6ed760f795806f67889923f7274276f0eb45092a1473e40d9b867"}, + {file = "llvmlite-0.43.0-cp39-cp39-win_amd64.whl", hash = "sha256:47e147cdda9037f94b399bf03bfd8a6b6b1f2f90be94a454e3386f006455a9b4"}, + {file = "llvmlite-0.43.0.tar.gz", hash = "sha256:ae2b5b5c3ef67354824fb75517c8db5fbe93bc02cd9671f3c62271626bc041d5"}, +] + [[package]] name = "markdown-it-py" version = "3.0.0" @@ -1764,6 +1944,71 @@ docs = ["sphinx"] gmpy = ["gmpy2 (>=2.1.0a4)"] tests = ["pytest (>=4.6)"] +[[package]] +name = "msgpack" +version = "1.0.8" +description = "MessagePack serializer" +optional = false +python-versions = ">=3.8" +files = [ + {file = "msgpack-1.0.8-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:505fe3d03856ac7d215dbe005414bc28505d26f0c128906037e66d98c4e95868"}, + {file = "msgpack-1.0.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e6b7842518a63a9f17107eb176320960ec095a8ee3b4420b5f688e24bf50c53c"}, + {file = "msgpack-1.0.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:376081f471a2ef24828b83a641a02c575d6103a3ad7fd7dade5486cad10ea659"}, + {file = "msgpack-1.0.8-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e390971d082dba073c05dbd56322427d3280b7cc8b53484c9377adfbae67dc2"}, + {file = "msgpack-1.0.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00e073efcba9ea99db5acef3959efa45b52bc67b61b00823d2a1a6944bf45982"}, + {file = "msgpack-1.0.8-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82d92c773fbc6942a7a8b520d22c11cfc8fd83bba86116bfcf962c2f5c2ecdaa"}, + {file = "msgpack-1.0.8-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9ee32dcb8e531adae1f1ca568822e9b3a738369b3b686d1477cbc643c4a9c128"}, + {file = "msgpack-1.0.8-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e3aa7e51d738e0ec0afbed661261513b38b3014754c9459508399baf14ae0c9d"}, + {file = "msgpack-1.0.8-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:69284049d07fce531c17404fcba2bb1df472bc2dcdac642ae71a2d079d950653"}, + {file = "msgpack-1.0.8-cp310-cp310-win32.whl", hash = "sha256:13577ec9e247f8741c84d06b9ece5f654920d8365a4b636ce0e44f15e07ec693"}, + {file = "msgpack-1.0.8-cp310-cp310-win_amd64.whl", hash = "sha256:e532dbd6ddfe13946de050d7474e3f5fb6ec774fbb1a188aaf469b08cf04189a"}, + {file = "msgpack-1.0.8-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9517004e21664f2b5a5fd6333b0731b9cf0817403a941b393d89a2f1dc2bd836"}, + {file = "msgpack-1.0.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d16a786905034e7e34098634b184a7d81f91d4c3d246edc6bd7aefb2fd8ea6ad"}, + {file = "msgpack-1.0.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2872993e209f7ed04d963e4b4fbae72d034844ec66bc4ca403329db2074377b"}, + {file = "msgpack-1.0.8-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c330eace3dd100bdb54b5653b966de7f51c26ec4a7d4e87132d9b4f738220ba"}, + {file = "msgpack-1.0.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83b5c044f3eff2a6534768ccfd50425939e7a8b5cf9a7261c385de1e20dcfc85"}, + {file = "msgpack-1.0.8-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1876b0b653a808fcd50123b953af170c535027bf1d053b59790eebb0aeb38950"}, + {file = "msgpack-1.0.8-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:dfe1f0f0ed5785c187144c46a292b8c34c1295c01da12e10ccddfc16def4448a"}, + {file = "msgpack-1.0.8-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3528807cbbb7f315bb81959d5961855e7ba52aa60a3097151cb21956fbc7502b"}, + {file = "msgpack-1.0.8-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e2f879ab92ce502a1e65fce390eab619774dda6a6ff719718069ac94084098ce"}, + {file = "msgpack-1.0.8-cp311-cp311-win32.whl", hash = "sha256:26ee97a8261e6e35885c2ecd2fd4a6d38252246f94a2aec23665a4e66d066305"}, + {file = "msgpack-1.0.8-cp311-cp311-win_amd64.whl", hash = "sha256:eadb9f826c138e6cf3c49d6f8de88225a3c0ab181a9b4ba792e006e5292d150e"}, + {file = "msgpack-1.0.8-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:114be227f5213ef8b215c22dde19532f5da9652e56e8ce969bf0a26d7c419fee"}, + {file = "msgpack-1.0.8-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d661dc4785affa9d0edfdd1e59ec056a58b3dbb9f196fa43587f3ddac654ac7b"}, + {file = "msgpack-1.0.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d56fd9f1f1cdc8227d7b7918f55091349741904d9520c65f0139a9755952c9e8"}, + {file = "msgpack-1.0.8-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0726c282d188e204281ebd8de31724b7d749adebc086873a59efb8cf7ae27df3"}, + {file = "msgpack-1.0.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8db8e423192303ed77cff4dce3a4b88dbfaf43979d280181558af5e2c3c71afc"}, + {file = "msgpack-1.0.8-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:99881222f4a8c2f641f25703963a5cefb076adffd959e0558dc9f803a52d6a58"}, + {file = "msgpack-1.0.8-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b5505774ea2a73a86ea176e8a9a4a7c8bf5d521050f0f6f8426afe798689243f"}, + {file = "msgpack-1.0.8-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:ef254a06bcea461e65ff0373d8a0dd1ed3aa004af48839f002a0c994a6f72d04"}, + {file = "msgpack-1.0.8-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e1dd7839443592d00e96db831eddb4111a2a81a46b028f0facd60a09ebbdd543"}, + {file = "msgpack-1.0.8-cp312-cp312-win32.whl", hash = "sha256:64d0fcd436c5683fdd7c907eeae5e2cbb5eb872fafbc03a43609d7941840995c"}, + {file = "msgpack-1.0.8-cp312-cp312-win_amd64.whl", hash = "sha256:74398a4cf19de42e1498368c36eed45d9528f5fd0155241e82c4082b7e16cffd"}, + {file = "msgpack-1.0.8-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:0ceea77719d45c839fd73abcb190b8390412a890df2f83fb8cf49b2a4b5c2f40"}, + {file = "msgpack-1.0.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1ab0bbcd4d1f7b6991ee7c753655b481c50084294218de69365f8f1970d4c151"}, + {file = "msgpack-1.0.8-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1cce488457370ffd1f953846f82323cb6b2ad2190987cd4d70b2713e17268d24"}, + {file = "msgpack-1.0.8-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3923a1778f7e5ef31865893fdca12a8d7dc03a44b33e2a5f3295416314c09f5d"}, + {file = "msgpack-1.0.8-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a22e47578b30a3e199ab067a4d43d790249b3c0587d9a771921f86250c8435db"}, + {file = "msgpack-1.0.8-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd739c9251d01e0279ce729e37b39d49a08c0420d3fee7f2a4968c0576678f77"}, + {file = "msgpack-1.0.8-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:d3420522057ebab1728b21ad473aa950026d07cb09da41103f8e597dfbfaeb13"}, + {file = "msgpack-1.0.8-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5845fdf5e5d5b78a49b826fcdc0eb2e2aa7191980e3d2cfd2a30303a74f212e2"}, + {file = "msgpack-1.0.8-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6a0e76621f6e1f908ae52860bdcb58e1ca85231a9b0545e64509c931dd34275a"}, + {file = "msgpack-1.0.8-cp38-cp38-win32.whl", hash = "sha256:374a8e88ddab84b9ada695d255679fb99c53513c0a51778796fcf0944d6c789c"}, + {file = "msgpack-1.0.8-cp38-cp38-win_amd64.whl", hash = "sha256:f3709997b228685fe53e8c433e2df9f0cdb5f4542bd5114ed17ac3c0129b0480"}, + {file = "msgpack-1.0.8-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f51bab98d52739c50c56658cc303f190785f9a2cd97b823357e7aeae54c8f68a"}, + {file = "msgpack-1.0.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:73ee792784d48aa338bba28063e19a27e8d989344f34aad14ea6e1b9bd83f596"}, + {file = "msgpack-1.0.8-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f9904e24646570539a8950400602d66d2b2c492b9010ea7e965025cb71d0c86d"}, + {file = "msgpack-1.0.8-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e75753aeda0ddc4c28dce4c32ba2f6ec30b1b02f6c0b14e547841ba5b24f753f"}, + {file = "msgpack-1.0.8-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5dbf059fb4b7c240c873c1245ee112505be27497e90f7c6591261c7d3c3a8228"}, + {file = "msgpack-1.0.8-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4916727e31c28be8beaf11cf117d6f6f188dcc36daae4e851fee88646f5b6b18"}, + {file = "msgpack-1.0.8-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7938111ed1358f536daf311be244f34df7bf3cdedb3ed883787aca97778b28d8"}, + {file = "msgpack-1.0.8-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:493c5c5e44b06d6c9268ce21b302c9ca055c1fd3484c25ba41d34476c76ee746"}, + {file = "msgpack-1.0.8-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5fbb160554e319f7b22ecf530a80a3ff496d38e8e07ae763b9e82fadfe96f273"}, + {file = "msgpack-1.0.8-cp39-cp39-win32.whl", hash = "sha256:f9af38a89b6a5c04b7d18c492c8ccf2aee7048aff1ce8437c4683bb5a1df893d"}, + {file = "msgpack-1.0.8-cp39-cp39-win_amd64.whl", hash = "sha256:ed59dd52075f8fc91da6053b12e8c89e37aa043f8986efd89e61fae69dc1b011"}, + {file = "msgpack-1.0.8.tar.gz", hash = "sha256:95c02b0e27e706e48d0e5426d1710ca78e0f0628d6e89d5b5a5b91a5f12274f3"}, +] + [[package]] name = "multidict" version = "6.0.5" @@ -1881,6 +2126,40 @@ doc = ["myst-nb (>=1.0)", "numpydoc (>=1.7)", "pillow (>=9.4)", "pydata-sphinx-t extra = ["lxml (>=4.6)", "pydot (>=2.0)", "pygraphviz (>=1.12)", "sympy (>=1.10)"] test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"] +[[package]] +name = "numba" +version = "0.60.0" +description = "compiling Python code using LLVM" +optional = false +python-versions = ">=3.9" +files = [ + {file = "numba-0.60.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d761de835cd38fb400d2c26bb103a2726f548dc30368853121d66201672e651"}, + {file = "numba-0.60.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:159e618ef213fba758837f9837fb402bbe65326e60ba0633dbe6c7f274d42c1b"}, + {file = "numba-0.60.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1527dc578b95c7c4ff248792ec33d097ba6bef9eda466c948b68dfc995c25781"}, + {file = "numba-0.60.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fe0b28abb8d70f8160798f4de9d486143200f34458d34c4a214114e445d7124e"}, + {file = "numba-0.60.0-cp310-cp310-win_amd64.whl", hash = "sha256:19407ced081d7e2e4b8d8c36aa57b7452e0283871c296e12d798852bc7d7f198"}, + {file = "numba-0.60.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a17b70fc9e380ee29c42717e8cc0bfaa5556c416d94f9aa96ba13acb41bdece8"}, + {file = "numba-0.60.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3fb02b344a2a80efa6f677aa5c40cd5dd452e1b35f8d1c2af0dfd9ada9978e4b"}, + {file = "numba-0.60.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5f4fde652ea604ea3c86508a3fb31556a6157b2c76c8b51b1d45eb40c8598703"}, + {file = "numba-0.60.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4142d7ac0210cc86432b818338a2bc368dc773a2f5cf1e32ff7c5b378bd63ee8"}, + {file = "numba-0.60.0-cp311-cp311-win_amd64.whl", hash = "sha256:cac02c041e9b5bc8cf8f2034ff6f0dbafccd1ae9590dc146b3a02a45e53af4e2"}, + {file = "numba-0.60.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d7da4098db31182fc5ffe4bc42c6f24cd7d1cb8a14b59fd755bfee32e34b8404"}, + {file = "numba-0.60.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:38d6ea4c1f56417076ecf8fc327c831ae793282e0ff51080c5094cb726507b1c"}, + {file = "numba-0.60.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:62908d29fb6a3229c242e981ca27e32a6e606cc253fc9e8faeb0e48760de241e"}, + {file = "numba-0.60.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0ebaa91538e996f708f1ab30ef4d3ddc344b64b5227b67a57aa74f401bb68b9d"}, + {file = "numba-0.60.0-cp312-cp312-win_amd64.whl", hash = "sha256:f75262e8fe7fa96db1dca93d53a194a38c46da28b112b8a4aca168f0df860347"}, + {file = "numba-0.60.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:01ef4cd7d83abe087d644eaa3d95831b777aa21d441a23703d649e06b8e06b74"}, + {file = "numba-0.60.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:819a3dfd4630d95fd574036f99e47212a1af41cbcb019bf8afac63ff56834449"}, + {file = "numba-0.60.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b983bd6ad82fe868493012487f34eae8bf7dd94654951404114f23c3466d34b"}, + {file = "numba-0.60.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c151748cd269ddeab66334bd754817ffc0cabd9433acb0f551697e5151917d25"}, + {file = "numba-0.60.0-cp39-cp39-win_amd64.whl", hash = "sha256:3031547a015710140e8c87226b4cfe927cac199835e5bf7d4fe5cb64e814e3ab"}, + {file = "numba-0.60.0.tar.gz", hash = "sha256:5df6158e5584eece5fc83294b949fd30b9f1125df7708862205217e068aabf16"}, +] + +[package.dependencies] +llvmlite = "==0.43.*" +numpy = ">=1.22,<2.1" + [[package]] name = "numpy" version = "1.26.4" @@ -2617,6 +2896,27 @@ docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx- test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"] type = ["mypy (>=1.8)"] +[[package]] +name = "pooch" +version = "1.8.2" +description = "A friend to fetch your data files" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pooch-1.8.2-py3-none-any.whl", hash = "sha256:3529a57096f7198778a5ceefd5ac3ef0e4d06a6ddaf9fc2d609b806f25302c47"}, + {file = "pooch-1.8.2.tar.gz", hash = "sha256:76561f0de68a01da4df6af38e9955c4c9d1a5c90da73f7e40276a5728ec83d10"}, +] + +[package.dependencies] +packaging = ">=20.0" +platformdirs = ">=2.5.0" +requests = ">=2.19.0" + +[package.extras] +progress = ["tqdm (>=4.41.0,<5.0.0)"] +sftp = ["paramiko (>=2.7.0)"] +xxhash = ["xxhash (>=1.4.3)"] + [[package]] name = "posthog" version = "3.5.0" @@ -2713,6 +3013,17 @@ files = [ [package.dependencies] pyasn1 = ">=0.4.6,<0.7.0" +[[package]] +name = "pycparser" +version = "2.22" +description = "C parser in Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, + {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, +] + [[package]] name = "pydantic" version = "2.7.3" @@ -3374,6 +3685,93 @@ opencv = ["opencv-python"] opencv-headless = ["opencv-python-headless"] pyav = ["av"] +[[package]] +name = "scikit-learn" +version = "1.5.0" +description = "A set of python modules for machine learning and data mining" +optional = false +python-versions = ">=3.9" +files = [ + {file = "scikit_learn-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:12e40ac48555e6b551f0a0a5743cc94cc5a765c9513fe708e01f0aa001da2801"}, + {file = "scikit_learn-1.5.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:f405c4dae288f5f6553b10c4ac9ea7754d5180ec11e296464adb5d6ac68b6ef5"}, + {file = "scikit_learn-1.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df8ccabbf583315f13160a4bb06037bde99ea7d8211a69787a6b7c5d4ebb6fc3"}, + {file = "scikit_learn-1.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c75ea812cd83b1385bbfa94ae971f0d80adb338a9523f6bbcb5e0b0381151d4"}, + {file = "scikit_learn-1.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:a90c5da84829a0b9b4bf00daf62754b2be741e66b5946911f5bdfaa869fcedd6"}, + {file = "scikit_learn-1.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2a65af2d8a6cce4e163a7951a4cfbfa7fceb2d5c013a4b593686c7f16445cf9d"}, + {file = "scikit_learn-1.5.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:4c0c56c3005f2ec1db3787aeaabefa96256580678cec783986836fc64f8ff622"}, + {file = "scikit_learn-1.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f77547165c00625551e5c250cefa3f03f2fc92c5e18668abd90bfc4be2e0bff"}, + {file = "scikit_learn-1.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:118a8d229a41158c9f90093e46b3737120a165181a1b58c03461447aa4657415"}, + {file = "scikit_learn-1.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:a03b09f9f7f09ffe8c5efffe2e9de1196c696d811be6798ad5eddf323c6f4d40"}, + {file = "scikit_learn-1.5.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:460806030c666addee1f074788b3978329a5bfdc9b7d63e7aad3f6d45c67a210"}, + {file = "scikit_learn-1.5.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:1b94d6440603752b27842eda97f6395f570941857456c606eb1d638efdb38184"}, + {file = "scikit_learn-1.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d82c2e573f0f2f2f0be897e7a31fcf4e73869247738ab8c3ce7245549af58ab8"}, + {file = "scikit_learn-1.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3a10e1d9e834e84d05e468ec501a356226338778769317ee0b84043c0d8fb06"}, + {file = "scikit_learn-1.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:855fc5fa8ed9e4f08291203af3d3e5fbdc4737bd617a371559aaa2088166046e"}, + {file = "scikit_learn-1.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:40fb7d4a9a2db07e6e0cae4dc7bdbb8fada17043bac24104d8165e10e4cff1a2"}, + {file = "scikit_learn-1.5.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:47132440050b1c5beb95f8ba0b2402bbd9057ce96ec0ba86f2f445dd4f34df67"}, + {file = "scikit_learn-1.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:174beb56e3e881c90424e21f576fa69c4ffcf5174632a79ab4461c4c960315ac"}, + {file = "scikit_learn-1.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:261fe334ca48f09ed64b8fae13f9b46cc43ac5f580c4a605cbb0a517456c8f71"}, + {file = "scikit_learn-1.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:057b991ac64b3e75c9c04b5f9395eaf19a6179244c089afdebaad98264bff37c"}, + {file = "scikit_learn-1.5.0.tar.gz", hash = "sha256:789e3db01c750ed6d496fa2db7d50637857b451e57bcae863bff707c1247bef7"}, +] + +[package.dependencies] +joblib = ">=1.2.0" +numpy = ">=1.19.5" +scipy = ">=1.6.0" +threadpoolctl = ">=3.1.0" + +[package.extras] +benchmark = ["matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "pandas (>=1.1.5)"] +build = ["cython (>=3.0.10)", "meson-python (>=0.15.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)"] +docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "polars (>=0.20.23)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)", "sphinx (>=6.0.0)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.15.0)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"] +examples = ["matplotlib (>=3.3.4)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)"] +install = ["joblib (>=1.2.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)", "threadpoolctl (>=3.1.0)"] +maintenance = ["conda-lock (==2.5.6)"] +tests = ["black (>=24.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.9)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.20.23)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.2.1)", "scikit-image (>=0.17.2)"] + +[[package]] +name = "scipy" +version = "1.13.1" +description = "Fundamental algorithms for scientific computing in Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "scipy-1.13.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:20335853b85e9a49ff7572ab453794298bcf0354d8068c5f6775a0eabf350aca"}, + {file = "scipy-1.13.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:d605e9c23906d1994f55ace80e0125c587f96c020037ea6aa98d01b4bd2e222f"}, + {file = "scipy-1.13.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cfa31f1def5c819b19ecc3a8b52d28ffdcc7ed52bb20c9a7589669dd3c250989"}, + {file = "scipy-1.13.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f26264b282b9da0952a024ae34710c2aff7d27480ee91a2e82b7b7073c24722f"}, + {file = "scipy-1.13.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:eccfa1906eacc02de42d70ef4aecea45415f5be17e72b61bafcfd329bdc52e94"}, + {file = "scipy-1.13.1-cp310-cp310-win_amd64.whl", hash = "sha256:2831f0dc9c5ea9edd6e51e6e769b655f08ec6db6e2e10f86ef39bd32eb11da54"}, + {file = "scipy-1.13.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:27e52b09c0d3a1d5b63e1105f24177e544a222b43611aaf5bc44d4a0979e32f9"}, + {file = "scipy-1.13.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:54f430b00f0133e2224c3ba42b805bfd0086fe488835effa33fa291561932326"}, + {file = "scipy-1.13.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e89369d27f9e7b0884ae559a3a956e77c02114cc60a6058b4e5011572eea9299"}, + {file = "scipy-1.13.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a78b4b3345f1b6f68a763c6e25c0c9a23a9fd0f39f5f3d200efe8feda560a5fa"}, + {file = "scipy-1.13.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:45484bee6d65633752c490404513b9ef02475b4284c4cfab0ef946def50b3f59"}, + {file = "scipy-1.13.1-cp311-cp311-win_amd64.whl", hash = "sha256:5713f62f781eebd8d597eb3f88b8bf9274e79eeabf63afb4a737abc6c84ad37b"}, + {file = "scipy-1.13.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5d72782f39716b2b3509cd7c33cdc08c96f2f4d2b06d51e52fb45a19ca0c86a1"}, + {file = "scipy-1.13.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:017367484ce5498445aade74b1d5ab377acdc65e27095155e448c88497755a5d"}, + {file = "scipy-1.13.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:949ae67db5fa78a86e8fa644b9a6b07252f449dcf74247108c50e1d20d2b4627"}, + {file = "scipy-1.13.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de3ade0e53bc1f21358aa74ff4830235d716211d7d077e340c7349bc3542e884"}, + {file = "scipy-1.13.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2ac65fb503dad64218c228e2dc2d0a0193f7904747db43014645ae139c8fad16"}, + {file = "scipy-1.13.1-cp312-cp312-win_amd64.whl", hash = "sha256:cdd7dacfb95fea358916410ec61bbc20440f7860333aee6d882bb8046264e949"}, + {file = "scipy-1.13.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:436bbb42a94a8aeef855d755ce5a465479c721e9d684de76bf61a62e7c2b81d5"}, + {file = "scipy-1.13.1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:8335549ebbca860c52bf3d02f80784e91a004b71b059e3eea9678ba994796a24"}, + {file = "scipy-1.13.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d533654b7d221a6a97304ab63c41c96473ff04459e404b83275b60aa8f4b7004"}, + {file = "scipy-1.13.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:637e98dcf185ba7f8e663e122ebf908c4702420477ae52a04f9908707456ba4d"}, + {file = "scipy-1.13.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a014c2b3697bde71724244f63de2476925596c24285c7a637364761f8710891c"}, + {file = "scipy-1.13.1-cp39-cp39-win_amd64.whl", hash = "sha256:392e4ec766654852c25ebad4f64e4e584cf19820b980bc04960bca0b0cd6eaa2"}, + {file = "scipy-1.13.1.tar.gz", hash = "sha256:095a87a0312b08dfd6a6155cbbd310a8c51800fc931b8c0b84003014b874ed3c"}, +] + +[package.dependencies] +numpy = ">=1.22.4,<2.3" + +[package.extras] +dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy", "pycodestyle", "pydevtool", "rich-click", "ruff", "types-psutil", "typing_extensions"] +doc = ["jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.12.0)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0)", "sphinx-design (>=0.4.0)"] +test = ["array-api-strict", "asv", "gmpy2", "hypothesis (>=6.30)", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] + [[package]] name = "setuptools" version = "70.0.0" @@ -3422,6 +3820,75 @@ files = [ {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, ] +[[package]] +name = "soundfile" +version = "0.12.1" +description = "An audio library based on libsndfile, CFFI and NumPy" +optional = false +python-versions = "*" +files = [ + {file = "soundfile-0.12.1-py2.py3-none-any.whl", hash = "sha256:828a79c2e75abab5359f780c81dccd4953c45a2c4cd4f05ba3e233ddf984b882"}, + {file = "soundfile-0.12.1-py2.py3-none-macosx_10_9_x86_64.whl", hash = "sha256:d922be1563ce17a69582a352a86f28ed8c9f6a8bc951df63476ffc310c064bfa"}, + {file = "soundfile-0.12.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:bceaab5c4febb11ea0554566784bcf4bc2e3977b53946dda2b12804b4fe524a8"}, + {file = "soundfile-0.12.1-py2.py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:2dc3685bed7187c072a46ab4ffddd38cef7de9ae5eb05c03df2ad569cf4dacbc"}, + {file = "soundfile-0.12.1-py2.py3-none-manylinux_2_31_x86_64.whl", hash = "sha256:074247b771a181859d2bc1f98b5ebf6d5153d2c397b86ee9e29ba602a8dfe2a6"}, + {file = "soundfile-0.12.1-py2.py3-none-win32.whl", hash = "sha256:59dfd88c79b48f441bbf6994142a19ab1de3b9bb7c12863402c2bc621e49091a"}, + {file = "soundfile-0.12.1-py2.py3-none-win_amd64.whl", hash = "sha256:0d86924c00b62552b650ddd28af426e3ff2d4dc2e9047dae5b3d8452e0a49a77"}, + {file = "soundfile-0.12.1.tar.gz", hash = "sha256:e8e1017b2cf1dda767aef19d2fd9ee5ebe07e050d430f77a0a7c66ba08b8cdae"}, +] + +[package.dependencies] +cffi = ">=1.0" + +[package.extras] +numpy = ["numpy"] + +[[package]] +name = "soxr" +version = "0.3.7" +description = "High quality, one-dimensional sample-rate conversion library" +optional = false +python-versions = ">=3.6" +files = [ + {file = "soxr-0.3.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ac81c4af6a993d5b7c0b466bbac4835bad2b14ec32f342b2c1f83e4cf825e301"}, + {file = "soxr-0.3.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8d8a2b3e7f8d0255e2484fb82cb66c86da6fb25b342ef793cceca9ce9a61aa16"}, + {file = "soxr-0.3.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd6eb6f6bbda2e8de36672cf2f0529ced6e638773150744ef075be0cc4f52c"}, + {file = "soxr-0.3.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e47d86af35b942c92606fc2d5dfccf3f01309329475571ae2312bbf9edc3a790"}, + {file = "soxr-0.3.7-cp310-cp310-win_amd64.whl", hash = "sha256:0e291adfaf9f2a7c4dd180a1b8c280f9beb1c84cb381853e4f4b3434d002ed7f"}, + {file = "soxr-0.3.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9e811450f0e91972932bd37ac58e32e44002c2c99db2aa926a9e7ba164545034"}, + {file = "soxr-0.3.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9cea63014ce91035074e1228c9340e2b8609faf964e268705fcac5135d05060c"}, + {file = "soxr-0.3.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bfab27830f6217a15b83445988225c3aeea3bbccfa9399ced291e53e1b05925d"}, + {file = "soxr-0.3.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:286858e3078d76c11b6d490b66fed3c9bb2a4229759f6be03ceef5c02189bf2c"}, + {file = "soxr-0.3.7-cp311-cp311-win_amd64.whl", hash = "sha256:54985ff33292192d2937be80df3e5f3a44d6d53e6835f727d6b99b7cdd3f1611"}, + {file = "soxr-0.3.7-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:83c74ef6d61d7dcd81be26f91bee0a420f792f5c1982266f2a80e655f0650a98"}, + {file = "soxr-0.3.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cb1e14663a43fe88b8fbc287822a159028366a820abe1a0a9670fb53618cb47b"}, + {file = "soxr-0.3.7-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48acdfbcf870ab54f645b1cfd641bce92c1e3a67346c3bf0f6c0ad2873c1dd35"}, + {file = "soxr-0.3.7-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea663b76f2b0ec1576b8a43aef317aec080abc0a67a4015fcd9f3407039f260a"}, + {file = "soxr-0.3.7-cp312-cp312-win_amd64.whl", hash = "sha256:42da0d9eb79c70e5a41917f1b48a032e241a48eb4a1bcea7c80577302ff26974"}, + {file = "soxr-0.3.7-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:511c6b2279c8ddd83459d129d69f628f7aae4616ae0a1912963985bd89e35df7"}, + {file = "soxr-0.3.7-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a37c518c0b5d70162956d808d6c2e249bae0672e414e0dcfc101e200d8c31f3c"}, + {file = "soxr-0.3.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27f2890528d2b2e358938ab660a6b8346802863f5b6b646204d7ff8ab0ca2c66"}, + {file = "soxr-0.3.7-cp37-cp37m-win_amd64.whl", hash = "sha256:52467c8c012495544a6dcfcce6b5bcbbc653d24fe9bb33c0b6191acecdb5e297"}, + {file = "soxr-0.3.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ce12b93747958f2769d6b297e6e27c73d9ad635fe8104ef052bece9c8a322824"}, + {file = "soxr-0.3.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1cd65dc7b96ea3cb6c8c48e6020e859680556cc42dd3d4de44779530cce21037"}, + {file = "soxr-0.3.7-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d994f1a7690b1b13ab639ea33e0c1d78415b64d88d6df4af705a9443f97b9687"}, + {file = "soxr-0.3.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e87b58bc9e8c2caa16f07726f666bd043f0a49ca937baa803ce7708003b27833"}, + {file = "soxr-0.3.7-cp38-cp38-win_amd64.whl", hash = "sha256:07f4c0c6125ea1482fa187ad5f007216712ee0a93586a9b2f80e79c0bf944cf7"}, + {file = "soxr-0.3.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e5267c3ba34d4b873d9bbe3a9e58418b01ae4fd04349a4f944d9943b9ddac0f7"}, + {file = "soxr-0.3.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6e39668c250e221db888cf3b290a16fbe10a702d9a4eb604a127f720040de583"}, + {file = "soxr-0.3.7-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8ceeb74e5a55d903cc286d3bd12c2d8f8c85d02894071e9ec92ab405430907c"}, + {file = "soxr-0.3.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0eed6bf58192dd1bb93becd2444de4d712689713d727b32fd55623ae9aae7df7"}, + {file = "soxr-0.3.7-cp39-cp39-win_amd64.whl", hash = "sha256:7221302b4547d02a3f38dd3cd15317ab2b78873c75921db5f4a070848f0c71be"}, + {file = "soxr-0.3.7.tar.gz", hash = "sha256:436ddff00c6eb2c75b79c19cfdca7527b1e31b5fad738652f044045ba6258593"}, +] + +[package.dependencies] +numpy = "*" + +[package.extras] +docs = ["linkify-it-py", "myst-parser", "sphinx", "sphinx-book-theme"] +test = ["pytest"] + [[package]] name = "sqlalchemy" version = "2.0.30" @@ -3568,6 +4035,17 @@ files = [ doc = ["reno", "sphinx"] test = ["pytest", "tornado (>=4.5)", "typeguard"] +[[package]] +name = "threadpoolctl" +version = "3.5.0" +description = "threadpoolctl" +optional = false +python-versions = ">=3.8" +files = [ + {file = "threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467"}, + {file = "threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107"}, +] + [[package]] name = "tokenizers" version = "0.19.1" @@ -4244,6 +4722,19 @@ files = [ {file = "websockets-12.0.tar.gz", hash = "sha256:81df9cbcbb6c260de1e007e58c011bfebe2dafc8435107b0537f393dd38c8b1b"}, ] +[[package]] +name = "whisper-cpp-python" +version = "0.2.0" +description = "A Python wrapper for whisper.cpp" +optional = false +python-versions = ">=3.9" +files = [ + {file = "whisper_cpp_python-0.2.0.tar.gz", hash = "sha256:4e727040711d9f8adc3767f786525ba1543b5dd34e3a851880a953933b402f55"}, +] + +[package.dependencies] +librosa = ">=0.10.0.post2,<0.11.0" + [[package]] name = "wrapt" version = "1.16.0" @@ -4444,4 +4935,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "854913e46a4742275f760698c85f5c0003a5df398805ce81d5a40bb074697baa" +content-hash = "f8851f3ea1cdfc285ecd3281914ea167219f602416c140724e37ae3a63d2675c" diff --git a/pyproject.toml b/pyproject.toml index 4bee240..a0f3e54 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ environs = "^11.0.0" fastapi-cache2 = {extras = ["redis"], version = "^0.2.1"} aiomcache = "^0.8.2" autocorrect = "^2.6.1" +whisper-cpp-python = "^0.2.0" [build-system] From 38c90781336b828ca8955265ce0173bbcb8fa38f Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sat, 22 Jun 2024 17:31:34 +0300 Subject: [PATCH 03/39] add routing between search and encode instances --- docker-compose.yml | 8 ++++++-- main/.env.dist | 3 ++- main/clip.py | 10 ++++++---- main/deps.py | 4 +--- main/settings.py | 3 ++- 5 files changed, 17 insertions(+), 11 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index de94422..1142ad0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -17,7 +17,7 @@ services: - "11211:11211" restart: always - inference: + encode-inference: &inference build: context: ./inference dockerfile: Dockerfile @@ -31,6 +31,9 @@ services: ports: - "8040:8040" + search-inference: + <<: *inference + main: build: context: ./main @@ -42,7 +45,8 @@ services: - main-model-data:/app/model_data depends_on: - db - - inference + - encode-inference + - search-inference - cache env_file: - main/.env.dist diff --git a/main/.env.dist b/main/.env.dist index 353c69c..ccaf278 100644 --- a/main/.env.dist +++ b/main/.env.dist @@ -1,3 +1,4 @@ -CLIP_URL=http://inference:8040/ +ENCODE_CLIP_URL=http://encode-inference:8040/ +SEARCH_CLIP_URL=http://search-inference:8040/ DB_HOST=chroma_db DB_PORT=8000 diff --git a/main/clip.py b/main/clip.py index 9ae4e42..57c3a6d 100644 --- a/main/clip.py +++ b/main/clip.py @@ -1,13 +1,15 @@ import aiohttp +from main.settings import Settings from models import SearchFeature, Video, Feature class CLIPService: - def __init__(self, url: str) -> None: - self.clip_url = url + def __init__(self) -> None: + self.encode_clip_url = Settings.encode_clip_url + self.search_clip_url = Settings.search_clip_url async def get_video_embeddings(self, request: Video) -> list[Feature]: async with aiohttp.ClientSession().post( - url=f"{self.clip_url}/encode", + url=f"{self.encode_clip_url}/encode", json=request.model_dump(mode="json") ) as resp: features = await resp.json() @@ -27,7 +29,7 @@ async def get_text_embedding( request: SearchFeature, ) -> Feature: async with aiohttp.ClientSession().post( - f"{self.clip_url}/encode-search", + f"{self.search_clip_url}/encode-search", json=request.model_dump(mode="json") ) as resp: features = await resp.json() diff --git a/main/deps.py b/main/deps.py index 18e8277..6afa00e 100644 --- a/main/deps.py +++ b/main/deps.py @@ -14,9 +14,7 @@ def get_clip_service() -> CLIPService: - return CLIPService( - url=Settings.clip_url - ) + return CLIPService() def get_chroma_storage() -> ChromaStorage: return ChromaStorage() diff --git a/main/settings.py b/main/settings.py index 68e1c3a..88ad0b6 100644 --- a/main/settings.py +++ b/main/settings.py @@ -7,6 +7,7 @@ class Settings: db_host: str = env.str("DB_HOST", default="chroma_db") db_port: int = env.int("DB_PORT", default=8080) - clip_url: str = env.str("CLIP_URL", default="http://inference:8040/encode") + encode_clip_url: str = env.str("ENCODE_CLIP_URL", default="http://encode-inference:8040/") + search_clip_url: str = env.str("SEARCH_CLIP_URL", default="http://search-inference:8040/") memcached_host: str = env.str("MEMCACHED_HOST", default="request_cache") cache_lifetime: int = env.int("CACHE_LIFETIME", default=3600) From e45ccde35076ea057971b645fc68def064fe04a5 Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sat, 22 Jun 2024 17:51:38 +0300 Subject: [PATCH 04/39] fix docker compose --- docker-compose.yml | 8 ++++---- main/.env.dist | 4 ++-- main/settings.py | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 1142ad0..f0ebe8c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -17,7 +17,7 @@ services: - "11211:11211" restart: always - encode-inference: &inference + encode: &inference build: context: ./inference dockerfile: Dockerfile @@ -31,7 +31,7 @@ services: ports: - "8040:8040" - search-inference: + search: <<: *inference main: @@ -45,8 +45,8 @@ services: - main-model-data:/app/model_data depends_on: - db - - encode-inference - - search-inference + - encode + - search - cache env_file: - main/.env.dist diff --git a/main/.env.dist b/main/.env.dist index ccaf278..1c25429 100644 --- a/main/.env.dist +++ b/main/.env.dist @@ -1,4 +1,4 @@ -ENCODE_CLIP_URL=http://encode-inference:8040/ -SEARCH_CLIP_URL=http://search-inference:8040/ +ENCODE_CLIP_URL=http://encode:8040/ +SEARCH_CLIP_URL=http://search:8040/ DB_HOST=chroma_db DB_PORT=8000 diff --git a/main/settings.py b/main/settings.py index 88ad0b6..a31cc00 100644 --- a/main/settings.py +++ b/main/settings.py @@ -7,7 +7,7 @@ class Settings: db_host: str = env.str("DB_HOST", default="chroma_db") db_port: int = env.int("DB_PORT", default=8080) - encode_clip_url: str = env.str("ENCODE_CLIP_URL", default="http://encode-inference:8040/") - search_clip_url: str = env.str("SEARCH_CLIP_URL", default="http://search-inference:8040/") + encode_clip_url: str = env.str("ENCODE_CLIP_URL", default="http://encode:8040/") + search_clip_url: str = env.str("SEARCH_CLIP_URL", default="http://search:8040/") memcached_host: str = env.str("MEMCACHED_HOST", default="request_cache") cache_lifetime: int = env.int("CACHE_LIFETIME", default=3600) From b291d455c3e794e44d11d9add243ee4c762d4a6b Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sat, 22 Jun 2024 17:53:40 +0300 Subject: [PATCH 05/39] fix docker compose --- docker-compose.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index f0ebe8c..dd6735c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,6 @@ version: "3.10" services: db: - container_name: chroma_db image: chromadb/chroma:latest volumes: - chroma-data:/chroma/chroma @@ -11,7 +10,6 @@ services: - "8000:8000" cache: - container_name: request_cache image: memcached:latest ports: - "11211:11211" @@ -21,7 +19,6 @@ services: build: context: ./inference dockerfile: Dockerfile - container_name: inference command: uvicorn main:app --host "0.0.0.0" --port 8040 restart: unless-stopped volumes: @@ -38,7 +35,6 @@ services: build: context: ./main dockerfile: Dockerfile - container_name: main_gateway command: uvicorn main:app --host "0.0.0.0" --port 80 restart: unless-stopped volumes: From afffbc8a6edd32ea1826cb166e3a7ccd4152a8f6 Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sat, 22 Jun 2024 23:12:51 +0300 Subject: [PATCH 06/39] add whisper.cpp preload --- inference/download_whisper.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inference/download_whisper.sh b/inference/download_whisper.sh index b39d2e7..4f523a7 100755 --- a/inference/download_whisper.sh +++ b/inference/download_whisper.sh @@ -1,6 +1,6 @@ -#!usr/bin/bash +#!/bin/bash -if ! test /app/model_data/ggml-large-v3.bin; then +if [ ! -f /app/model_data/ggml-large-v3.bin ]; then mkdir /app/model_data git clone https://github.com/ggerganov/whisper.cpp.git cd whisper.cpp From 502aa1251ad941c4a952f1621bc1a33fbd62ed05 Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sat, 22 Jun 2024 23:40:03 +0300 Subject: [PATCH 07/39] change port schematics --- docker-compose.yml | 2 ++ main/.env.dist | 2 +- main/settings.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index dd6735c..937d2dd 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -30,6 +30,8 @@ services: search: <<: *inference + ports: + - "8050:8040" main: build: diff --git a/main/.env.dist b/main/.env.dist index 1c25429..0c4bcfe 100644 --- a/main/.env.dist +++ b/main/.env.dist @@ -1,4 +1,4 @@ ENCODE_CLIP_URL=http://encode:8040/ -SEARCH_CLIP_URL=http://search:8040/ +SEARCH_CLIP_URL=http://search:8050/ DB_HOST=chroma_db DB_PORT=8000 diff --git a/main/settings.py b/main/settings.py index a31cc00..09db4bd 100644 --- a/main/settings.py +++ b/main/settings.py @@ -8,6 +8,6 @@ class Settings: db_host: str = env.str("DB_HOST", default="chroma_db") db_port: int = env.int("DB_PORT", default=8080) encode_clip_url: str = env.str("ENCODE_CLIP_URL", default="http://encode:8040/") - search_clip_url: str = env.str("SEARCH_CLIP_URL", default="http://search:8040/") + search_clip_url: str = env.str("SEARCH_CLIP_URL", default="http://search:8050/") memcached_host: str = env.str("MEMCACHED_HOST", default="request_cache") cache_lifetime: int = env.int("CACHE_LIFETIME", default=3600) From bc45b6edf8f8608793c6138a815137b00e5705cf Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sat, 22 Jun 2024 23:42:25 +0300 Subject: [PATCH 08/39] fix stash errors --- main/main.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/main/main.py b/main/main.py index 962cc43..98ba956 100644 --- a/main/main.py +++ b/main/main.py @@ -11,13 +11,6 @@ @app.post("/index") async def add_video_to_index(request: Video, clip: Clip, chroma: Chroma) -> Video: """Добавляет новое видео в хранилище - индекс""" -<<<<<<< Updated upstream - feature = await clip.get_video_embedding(request) - if request.description is not None: - chroma.add_text_search_suggestion(suggestion_query=request.description) - chroma.add_feature(feature=feature) - return request.model_dump(mode="dict") -======= features = await clip.get_video_embeddings(request) if request.description is not None: chroma.add_text_search_suggestion(suggestion_query=request.description) @@ -26,7 +19,6 @@ async def add_video_to_index(request: Video, clip: Clip, chroma: Chroma) -> Vide chroma.add_feature(feature=feature) return request ->>>>>>> Stashed changes @app.get("/search") @cache(expire=Settings.cache_lifetime) @@ -41,13 +33,8 @@ async def search_for_related_videos( spelled_search = speller(params.text) translated_search = translator(spelled_search) search_vector = await clip.get_text_embedding( -<<<<<<< Updated upstream - Video( - description=translated_search -======= SearchFeature( query=translated_search ->>>>>>> Stashed changes ) ) return {"results": chroma.search_relevant_videos(search_feature=search_vector, top_k=params.return_amount)} From 940261eeca1eefa92d8f0fcdfbf54ee27df6f946 Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sat, 22 Jun 2024 23:44:58 +0300 Subject: [PATCH 09/39] fix import errors in containers --- inference/clip.py | 2 +- inference/deps.py | 2 +- inference/main.py | 6 +++--- inference/whisper.py | 2 +- main/clip.py | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/inference/clip.py b/inference/clip.py index cb2ed82..3425b6c 100644 --- a/inference/clip.py +++ b/inference/clip.py @@ -5,7 +5,7 @@ import torch from transformers import CLIPModel, CLIPProcessor -from inference.frame_video import VideoFrame, create_key_frames_for_video +from frame_video import VideoFrame, create_key_frames_for_video @dataclass diff --git a/inference/deps.py b/inference/deps.py index ad509c6..d90d0ca 100644 --- a/inference/deps.py +++ b/inference/deps.py @@ -4,7 +4,7 @@ from fastapi import Depends, FastAPI, Request from transformers import CLIPModel, CLIPProcessor -from inference.whisper import WhisperService +from whisper import WhisperService from settings import Settings diff --git a/inference/main.py b/inference/main.py index f8edabc..c145610 100644 --- a/inference/main.py +++ b/inference/main.py @@ -1,9 +1,9 @@ from fastapi import FastAPI from fastapi.responses import JSONResponse -from inference.deps import Model, Processor, Whisper, lifespan -from inference.clip import CLIP -from inference.models import EncodeRequest, EncodeSearchRequest +from deps import Model, Processor, Whisper, lifespan +from clip import CLIP +from models import EncodeRequest, EncodeSearchRequest app = FastAPI(lifespan=lifespan) diff --git a/inference/whisper.py b/inference/whisper.py index 8cde250..8f3c76b 100644 --- a/inference/whisper.py +++ b/inference/whisper.py @@ -4,7 +4,7 @@ import requests from whisper_cpp_python import Whisper -from inference.settings import Settings +from settings import Settings @dataclass diff --git a/main/clip.py b/main/clip.py index 57c3a6d..95d08ba 100644 --- a/main/clip.py +++ b/main/clip.py @@ -1,5 +1,5 @@ import aiohttp -from main.settings import Settings +from settings import Settings from models import SearchFeature, Video, Feature class CLIPService: From dbfc95cb3007ad8e19a1e6d2db16dc15c58a0d8b Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sat, 22 Jun 2024 23:46:33 +0300 Subject: [PATCH 10/39] add whisper cpp to dep list --- inference/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/inference/requirements.txt b/inference/requirements.txt index 5291272..bc74758 100644 --- a/inference/requirements.txt +++ b/inference/requirements.txt @@ -9,3 +9,4 @@ pillow==10.3.0 scenedetect==0.6.3 opencv-python==4.10.0.82 environs==11.0.0 +whisper-cpp-python==0.2.0 From e92c92660a5c8c56c889d805fba9c937f44fab3f Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sat, 22 Jun 2024 23:52:00 +0300 Subject: [PATCH 11/39] change db host name --- main/.env.dist | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/.env.dist b/main/.env.dist index 0c4bcfe..a81ca7a 100644 --- a/main/.env.dist +++ b/main/.env.dist @@ -1,4 +1,4 @@ ENCODE_CLIP_URL=http://encode:8040/ SEARCH_CLIP_URL=http://search:8050/ -DB_HOST=chroma_db +DB_HOST=db DB_PORT=8000 From ad996e74814f5bf88a04653863d91df5d7087d9d Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sun, 23 Jun 2024 00:01:09 +0300 Subject: [PATCH 12/39] fix paths to inference --- main/clip.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main/clip.py b/main/clip.py index 95d08ba..e9ec7c6 100644 --- a/main/clip.py +++ b/main/clip.py @@ -9,7 +9,7 @@ def __init__(self) -> None: async def get_video_embeddings(self, request: Video) -> list[Feature]: async with aiohttp.ClientSession().post( - url=f"{self.encode_clip_url}/encode", + url=f"{self.encode_clip_url}encode", json=request.model_dump(mode="json") ) as resp: features = await resp.json() @@ -29,7 +29,7 @@ async def get_text_embedding( request: SearchFeature, ) -> Feature: async with aiohttp.ClientSession().post( - f"{self.search_clip_url}/encode-search", + f"{self.search_clip_url}encode-search", json=request.model_dump(mode="json") ) as resp: features = await resp.json() From 898b711808154f0de0b2b709c4bcff397f2d4c66 Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sun, 23 Jun 2024 00:21:00 +0300 Subject: [PATCH 13/39] add tempfiles for whisper --- inference/whisper.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/inference/whisper.py b/inference/whisper.py index 8f3c76b..50749ac 100644 --- a/inference/whisper.py +++ b/inference/whisper.py @@ -1,5 +1,6 @@ from dataclasses import dataclass, field from io import BytesIO +import tempfile import requests from whisper_cpp_python import Whisper @@ -14,5 +15,8 @@ class WhisperService: def __call__(self, link: str) -> str: video_data = BytesIO(requests.get(link).content) - data = self._service.transcribe(video_data) + with tempfile.NamedTemporaryFile(delete_on_close=False) as tp: + tp.write(video_data.read()) + tp.close() + data = self._service.transcribe(open(tp.name)) return data["text"] From f1012c1ebf903ad0f90da25515130f2ac9e823e9 Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sun, 23 Jun 2024 00:26:38 +0300 Subject: [PATCH 14/39] fix tempfiles + translate --- inference/whisper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inference/whisper.py b/inference/whisper.py index 50749ac..dd11e73 100644 --- a/inference/whisper.py +++ b/inference/whisper.py @@ -15,8 +15,8 @@ class WhisperService: def __call__(self, link: str) -> str: video_data = BytesIO(requests.get(link).content) - with tempfile.NamedTemporaryFile(delete_on_close=False) as tp: + with tempfile.NamedTemporaryFile(delete=False) as tp: tp.write(video_data.read()) tp.close() - data = self._service.transcribe(open(tp.name)) + data = self._service.translate(open(tp.name)) return data["text"] From d1caad3da71d057a5911691fc27476b5d765aea4 Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sun, 23 Jun 2024 00:36:40 +0300 Subject: [PATCH 15/39] redo file submission to whisper --- inference/whisper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inference/whisper.py b/inference/whisper.py index dd11e73..455e3f7 100644 --- a/inference/whisper.py +++ b/inference/whisper.py @@ -18,5 +18,5 @@ def __call__(self, link: str) -> str: with tempfile.NamedTemporaryFile(delete=False) as tp: tp.write(video_data.read()) tp.close() - data = self._service.translate(open(tp.name)) + data = self._service.translate(tp.name) return data["text"] From 85860a8f0c89a907d5555b2d5af3ceb53cd0a764 Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sun, 23 Jun 2024 16:46:51 +0300 Subject: [PATCH 16/39] fix prompts + add logging --- inference/clip.py | 4 ++++ inference/deps.py | 5 +++++ inference/main.py | 10 ++++++++-- inference/whisper.py | 4 +++- 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/inference/clip.py b/inference/clip.py index 3425b6c..7a80688 100644 --- a/inference/clip.py +++ b/inference/clip.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from logging import Logger from typing import Callable, Literal from PIL import Image @@ -12,14 +13,17 @@ class CLIP: processor: CLIPProcessor model: CLIPModel + logger: Logger _create_key_frames_for_video: Callable[[str], list[VideoFrame]] = create_key_frames_for_video def __call__(self, encode_source: str, encode_type: Literal["text"] | Literal["video"]) -> list[float]: if encode_type == "text": + self.logger.info("Processing text input: %s, input length: %s", encode_source, len(encode_source)) return self._encode_text(encode_source) if encode_type == "video": + self.logger.info("Processing video input: %s", encode_source) return self._encode_video(encode_source) def _encode_text(self, description: str) -> list[float]: diff --git a/inference/deps.py b/inference/deps.py index d90d0ca..d937d8a 100644 --- a/inference/deps.py +++ b/inference/deps.py @@ -1,4 +1,5 @@ from contextlib import asynccontextmanager +import logging from typing import Annotated from fastapi import Depends, FastAPI, Request @@ -10,14 +11,18 @@ @asynccontextmanager async def lifespan(app: FastAPI): + logger = logging.getLogger(__name__) + logger.info("Setting up CLIP model...") app.state.clip_model = CLIPModel.from_pretrained( Settings.clip_model, cache_dir="./model_cache" ) + logger.info("Setting up CLIP processor...") app.state.processor = CLIPProcessor.from_pretrained( Settings.clip_model, cache_dir="./model_cache" ) + logger.info("Setting up Whisper service...") app.state.whisper_model = WhisperService() yield diff --git a/inference/main.py b/inference/main.py index c145610..0e9351a 100644 --- a/inference/main.py +++ b/inference/main.py @@ -1,3 +1,4 @@ +import logging from fastapi import FastAPI from fastapi.responses import JSONResponse @@ -6,6 +7,7 @@ from models import EncodeRequest, EncodeSearchRequest app = FastAPI(lifespan=lifespan) +logger = logging.getLogger(__name__) @app.get("/") async def root(): @@ -18,7 +20,9 @@ async def encode( model: Model, whisper: Whisper ): - clip = CLIP(processor=processor, model=model) + logger.info("Initializing CLIP module...") + clip = CLIP(processor=processor, model=model, logger=logger) + logger.info("CLIP module successfully initialized") video_features = clip(request.link, encode_type="video") if request.description is not None: @@ -38,7 +42,9 @@ async def encode( async def encode_search( request: EncodeSearchRequest, processor: Processor, model: Model ): - clip = CLIP(processor=processor, model=model) + logger.info("Initializing CLIP module...") + clip = CLIP(processor=processor, model=model, logger=logger) + logger.info("CLIP module successfully initialized") features = clip(request.query, encode_type="text") diff --git a/inference/whisper.py b/inference/whisper.py index 455e3f7..37698f2 100644 --- a/inference/whisper.py +++ b/inference/whisper.py @@ -18,5 +18,7 @@ def __call__(self, link: str) -> str: with tempfile.NamedTemporaryFile(delete=False) as tp: tp.write(video_data.read()) tp.close() - data = self._service.translate(tp.name) + data = self._service.translate( + tp.name, prompt="" + ) return data["text"] From 2456c34fdd2061a9a493ac5a9d3a64fac7c8f4b2 Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sun, 23 Jun 2024 17:10:43 +0300 Subject: [PATCH 17/39] adding threads to the problem --- inference/whisper.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/inference/whisper.py b/inference/whisper.py index 37698f2..b2b8ef6 100644 --- a/inference/whisper.py +++ b/inference/whisper.py @@ -10,7 +10,12 @@ @dataclass class WhisperService: - _service: Whisper = field(default_factory=lambda: Whisper(model_path=Settings.whisper_path)) + _service: Whisper = field( + default_factory=lambda: Whisper( + model_path=Settings.whisper_path, + n_threads=4 + ) + ) def __call__(self, link: str) -> str: From 6a7d81e9a3207d3af8f6b3da7f4f014add8b1d1d Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sun, 23 Jun 2024 17:51:14 +0300 Subject: [PATCH 18/39] piping video to wav and adding logging --- docker-compose.yml | 2 +- inference/Dockerfile | 2 +- inference/frame_video.py | 13 +++++++++++++ inference/log_conf.yaml | 34 ++++++++++++++++++++++++++++++++++ inference/requirements.txt | 1 + inference/whisper.py | 17 +++++++++++++---- main/clip.py | 3 ++- 7 files changed, 65 insertions(+), 7 deletions(-) create mode 100644 inference/log_conf.yaml diff --git a/docker-compose.yml b/docker-compose.yml index 937d2dd..cd7af13 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -19,7 +19,7 @@ services: build: context: ./inference dockerfile: Dockerfile - command: uvicorn main:app --host "0.0.0.0" --port 8040 + command: uvicorn main:app --host "0.0.0.0" --port 8040 --log-config=log_conf.yaml restart: unless-stopped volumes: - inference-model-data:/app/model_data diff --git a/inference/Dockerfile b/inference/Dockerfile index 1386abc..500c877 100644 --- a/inference/Dockerfile +++ b/inference/Dockerfile @@ -16,4 +16,4 @@ RUN python -m pip install --upgrade pip && pip install -r requirements.txt COPY ./ /app/ EXPOSE 8040 -CMD uvicorn main:app --port 8040 +CMD uvicorn main:app --port 8040 --log-config=log_conf.yaml diff --git a/inference/frame_video.py b/inference/frame_video.py index 374cc20..e61cd13 100644 --- a/inference/frame_video.py +++ b/inference/frame_video.py @@ -57,3 +57,16 @@ def create_frame_in_ram(video_path: str, timecode: str) -> BytesIO: process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) frame_data, _ = process.communicate() return BytesIO(frame_data) + +def get_audio_in_ram(video_path: str) -> BytesIO: + command = [ + "ffmpeg", + "-i", video_path, + "-acodec", "pcm_s16le", + "-ac", "1", + "-ar", "16000", + "-" + ] + process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + audio_data, _ = process.communicate() + return BytesIO(audio_data) diff --git a/inference/log_conf.yaml b/inference/log_conf.yaml new file mode 100644 index 0000000..1377c64 --- /dev/null +++ b/inference/log_conf.yaml @@ -0,0 +1,34 @@ +version: 1 +disable_existing_loggers: False +formatters: + default: + # "()": uvicorn.logging.DefaultFormatter + format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + access: + # "()": uvicorn.logging.AccessFormatter + format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s' +handlers: + default: + formatter: default + class: logging.StreamHandler + stream: ext://sys.stderr + access: + formatter: access + class: logging.StreamHandler + stream: ext://sys.stdout +loggers: + uvicorn.error: + level: INFO + handlers: + - default + propagate: no + uvicorn.access: + level: INFO + handlers: + - access + propagate: no +root: + level: DEBUG + handlers: + - default + propagate: no diff --git a/inference/requirements.txt b/inference/requirements.txt index bc74758..fd975b8 100644 --- a/inference/requirements.txt +++ b/inference/requirements.txt @@ -10,3 +10,4 @@ scenedetect==0.6.3 opencv-python==4.10.0.82 environs==11.0.0 whisper-cpp-python==0.2.0 +PyYAML>=6.0 diff --git a/inference/whisper.py b/inference/whisper.py index b2b8ef6..415b88e 100644 --- a/inference/whisper.py +++ b/inference/whisper.py @@ -1,10 +1,13 @@ from dataclasses import dataclass, field from io import BytesIO import tempfile +import os +from typing import Callable import requests from whisper_cpp_python import Whisper +from inference.frame_video import get_audio_in_ram from settings import Settings @@ -16,14 +19,20 @@ class WhisperService: n_threads=4 ) ) + _get_audio_in_ram: Callable[[str], BytesIO] = get_audio_in_ram def __call__(self, link: str) -> str: video_data = BytesIO(requests.get(link).content) - with tempfile.NamedTemporaryFile(delete=False) as tp: - tp.write(video_data.read()) - tp.close() + with tempfile.NamedTemporaryFile() as video: + video.write(video_data.read()) + audio_data = self._get_audio_in_ram(video.name) + + with tempfile.NamedTemporaryFile(delete=False) as audio: + audio.write(audio_data.read()) + audio.close() data = self._service.translate( - tp.name, prompt="" + audio.name, prompt="" ) + os.unlink(audio.name) return data["text"] diff --git a/main/clip.py b/main/clip.py index e9ec7c6..e5264c9 100644 --- a/main/clip.py +++ b/main/clip.py @@ -6,9 +6,10 @@ class CLIPService: def __init__(self) -> None: self.encode_clip_url = Settings.encode_clip_url self.search_clip_url = Settings.search_clip_url + self.session_timeout = aiohttp.ClientTimeout(60 * 5) async def get_video_embeddings(self, request: Video) -> list[Feature]: - async with aiohttp.ClientSession().post( + async with aiohttp.ClientSession(timeout=self.session_timeout).post( url=f"{self.encode_clip_url}encode", json=request.model_dump(mode="json") ) as resp: From 89d75a3a326f853a8749c0f9a016110b14e8d998 Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sun, 23 Jun 2024 17:59:21 +0300 Subject: [PATCH 19/39] adding summary api --- inference/.env.dist | 1 + inference/settings.py | 1 + inference/whisper.py | 11 ++++++++++- 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/inference/.env.dist b/inference/.env.dist index 71f7798..0281e6a 100644 --- a/inference/.env.dist +++ b/inference/.env.dist @@ -1,2 +1,3 @@ CLIP_MODEL=laion/CLIP-ViT-g-14-laion2B-s12B-b42K WHISPER_PATH=/app/model_data/ggml-large-v3.bin +SUMMARIZATION_MODEL=facebook/bart-large-cnn diff --git a/inference/settings.py b/inference/settings.py index 9170c72..30b2e00 100644 --- a/inference/settings.py +++ b/inference/settings.py @@ -6,4 +6,5 @@ class Settings: clip_model: str = env.str("CLIP_MODEL") + summarization_model: str = env.str("SUMMARIZATION_MODEL") whisper_path: str = env.str("WHISPER_PATH") diff --git a/inference/whisper.py b/inference/whisper.py index 415b88e..fb0fd06 100644 --- a/inference/whisper.py +++ b/inference/whisper.py @@ -5,6 +5,7 @@ from typing import Callable import requests +from transformers import Pipeline, pipeline from whisper_cpp_python import Whisper from inference.frame_video import get_audio_in_ram @@ -19,6 +20,12 @@ class WhisperService: n_threads=4 ) ) + _summary_pipeline: Pipeline = field( + default_factory=lambda: pipeline( + "summarization", + model=Settings.summarization_model + ) + ) _get_audio_in_ram: Callable[[str], BytesIO] = get_audio_in_ram def __call__(self, link: str) -> str: @@ -35,4 +42,6 @@ def __call__(self, link: str) -> str: audio.name, prompt="" ) os.unlink(audio.name) - return data["text"] + text = data["text"] + summary = self._summary_pipeline(text, max_length=77) + return summary[0]["summary_text"] From 86e8aeb740ff31eb9b58265dce6e03695abcdeb4 Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sun, 23 Jun 2024 18:04:43 +0300 Subject: [PATCH 20/39] adding more logging to whisper for debug purposes --- inference/whisper.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/inference/whisper.py b/inference/whisper.py index fb0fd06..b29788b 100644 --- a/inference/whisper.py +++ b/inference/whisper.py @@ -1,5 +1,6 @@ from dataclasses import dataclass, field from io import BytesIO +import logging import tempfile import os from typing import Callable @@ -26,15 +27,18 @@ class WhisperService: model=Settings.summarization_model ) ) + _logger: logging.Logger = field( + default_factory=lambda: logging.getLogger(__name__) + ) _get_audio_in_ram: Callable[[str], BytesIO] = get_audio_in_ram def __call__(self, link: str) -> str: - + self._logger.info("Converting video file to WAV") video_data = BytesIO(requests.get(link).content) with tempfile.NamedTemporaryFile() as video: video.write(video_data.read()) audio_data = self._get_audio_in_ram(video.name) - + self._logger.info("Processing WAV file by whisper") with tempfile.NamedTemporaryFile(delete=False) as audio: audio.write(audio_data.read()) audio.close() @@ -42,6 +46,9 @@ def __call__(self, link: str) -> str: audio.name, prompt="" ) os.unlink(audio.name) + self._logger.info("summarizing transcript into 77 CLIP tokens") text = data["text"] summary = self._summary_pipeline(text, max_length=77) - return summary[0]["summary_text"] + result: str = summary[0]["summary_text"] # type: ignore + self._logger.info("Processed video file into text description: %s, total length: %s", result, len(result)) + return result From 09c2bd4e63d7d92d9dc04c39f0ff62bd844e7ba2 Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sun, 23 Jun 2024 18:11:15 +0300 Subject: [PATCH 21/39] fix importing error --- inference/whisper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inference/whisper.py b/inference/whisper.py index b29788b..1c2fc15 100644 --- a/inference/whisper.py +++ b/inference/whisper.py @@ -9,7 +9,7 @@ from transformers import Pipeline, pipeline from whisper_cpp_python import Whisper -from inference.frame_video import get_audio_in_ram +from frame_video import get_audio_in_ram from settings import Settings From 5d848d31d501b661fcc49b110be28bb680cdf8bf Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sun, 23 Jun 2024 18:37:41 +0300 Subject: [PATCH 22/39] fixing suffix errors + debug --- inference/log_conf.yaml | 2 +- inference/whisper.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/inference/log_conf.yaml b/inference/log_conf.yaml index 1377c64..32e7e50 100644 --- a/inference/log_conf.yaml +++ b/inference/log_conf.yaml @@ -28,7 +28,7 @@ loggers: - access propagate: no root: - level: DEBUG + level: INFO handlers: - default propagate: no diff --git a/inference/whisper.py b/inference/whisper.py index 1c2fc15..d7fcdbf 100644 --- a/inference/whisper.py +++ b/inference/whisper.py @@ -35,11 +35,13 @@ class WhisperService: def __call__(self, link: str) -> str: self._logger.info("Converting video file to WAV") video_data = BytesIO(requests.get(link).content) - with tempfile.NamedTemporaryFile() as video: + with tempfile.NamedTemporaryFile(delete=False) as video: video.write(video_data.read()) + video.close() audio_data = self._get_audio_in_ram(video.name) + os.unlink(video.name) self._logger.info("Processing WAV file by whisper") - with tempfile.NamedTemporaryFile(delete=False) as audio: + with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as audio: audio.write(audio_data.read()) audio.close() data = self._service.translate( From 20c0f42238994025a4ede8fc721a57cd1a782432 Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sun, 23 Jun 2024 18:44:27 +0300 Subject: [PATCH 23/39] ditching wav formatting for now --- inference/whisper.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/inference/whisper.py b/inference/whisper.py index d7fcdbf..0030346 100644 --- a/inference/whisper.py +++ b/inference/whisper.py @@ -33,21 +33,24 @@ class WhisperService: _get_audio_in_ram: Callable[[str], BytesIO] = get_audio_in_ram def __call__(self, link: str) -> str: - self._logger.info("Converting video file to WAV") + self._logger.info("Converting video file to transcript") video_data = BytesIO(requests.get(link).content) with tempfile.NamedTemporaryFile(delete=False) as video: video.write(video_data.read()) video.close() - audio_data = self._get_audio_in_ram(video.name) - os.unlink(video.name) - self._logger.info("Processing WAV file by whisper") - with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as audio: - audio.write(audio_data.read()) - audio.close() data = self._service.translate( - audio.name, prompt="" + video.name, prompt="" ) - os.unlink(audio.name) + # audio_data = self._get_audio_in_ram(video.name) + os.unlink(video.name) + #self._logger.info("Processing WAV file by whisper") + #with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as audio: + # audio.write(audio_data.read()) + # audio.close() + # data = self._service.translate( + # audio.name, prompt="" + # ) + #os.unlink(audio.name) self._logger.info("summarizing transcript into 77 CLIP tokens") text = data["text"] summary = self._summary_pipeline(text, max_length=77) From c9e5051f0fda767de5fa968c81cf5cb91ec422f8 Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sun, 23 Jun 2024 18:54:59 +0300 Subject: [PATCH 24/39] add logging for clip encoding result --- inference/clip.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/inference/clip.py b/inference/clip.py index 7a80688..89afb89 100644 --- a/inference/clip.py +++ b/inference/clip.py @@ -32,7 +32,9 @@ def _encode_text(self, description: str) -> list[float]: text_features = self.model.get_text_features(**text_inputs) text_features /= text_features.norm(dim=-1, keepdim=True) - return text_features.tolist()[0] + result = text_features.tolist()[0] + self.logger.info("Processed result vector - %s", result) + return result def _encode_video(self, link: str) -> list[float]: images = self._create_key_frames_for_video(link) @@ -50,4 +52,7 @@ def _encode_video(self, link: str) -> list[float]: features = torch.mean(image_features, dim=0) features /= features.norm(dim=-1, keepdim=True) - return features.tolist()[0] + result = features.tolist()[0] + self.logger.info("Processed result vector - %s", result) + return result + From 9cc77536cb97b4aef12250d56d43dd4b725dcdcd Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sun, 23 Jun 2024 19:00:40 +0300 Subject: [PATCH 25/39] fix video vectorization --- inference/clip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inference/clip.py b/inference/clip.py index 89afb89..5109475 100644 --- a/inference/clip.py +++ b/inference/clip.py @@ -52,7 +52,7 @@ def _encode_video(self, link: str) -> list[float]: features = torch.mean(image_features, dim=0) features /= features.norm(dim=-1, keepdim=True) - result = features.tolist()[0] + result = features.tolist() self.logger.info("Processed result vector - %s", result) return result From 22eeb6e460b26dd39084850baf266a1e27ee8fe0 Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sun, 23 Jun 2024 19:07:07 +0300 Subject: [PATCH 26/39] fix suggestion id generation --- main/chroma.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/chroma.py b/main/chroma.py index 97bea98..bfdddc8 100644 --- a/main/chroma.py +++ b/main/chroma.py @@ -43,7 +43,7 @@ def add_text_search_suggestion(self, suggestion_query: str) -> None: subsearches = suggestion_query.split() self.desc_collection.add( documents=[suggestion_query] + subsearches, - ids=[str(hash(query)) for query in [suggestion_query] + subsearches] + ids=[str(uuid4()) for _ in [suggestion_query] + subsearches] ) def get_text_search_suggestions(self, search_query: str, top_k: int = 20) -> list[str]: From 28775d3df681a1c2e7cf6f90b77d70fc53ca35a9 Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sun, 23 Jun 2024 19:34:47 +0300 Subject: [PATCH 27/39] redo ports again --- docker-compose.yml | 6 +++--- inference/Dockerfile | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index cd7af13..38d9149 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -19,19 +19,19 @@ services: build: context: ./inference dockerfile: Dockerfile - command: uvicorn main:app --host "0.0.0.0" --port 8040 --log-config=log_conf.yaml + command: uvicorn main:app --host "0.0.0.0" --port 80 --log-config=log_conf.yaml restart: unless-stopped volumes: - inference-model-data:/app/model_data env_file: - inference/.env.dist ports: - - "8040:8040" + - "8040:80" search: <<: *inference ports: - - "8050:8040" + - "8050:80" main: build: diff --git a/inference/Dockerfile b/inference/Dockerfile index 500c877..71d6389 100644 --- a/inference/Dockerfile +++ b/inference/Dockerfile @@ -16,4 +16,4 @@ RUN python -m pip install --upgrade pip && pip install -r requirements.txt COPY ./ /app/ EXPOSE 8040 -CMD uvicorn main:app --port 8040 --log-config=log_conf.yaml +CMD uvicorn main:app --port 80 --log-config=log_conf.yaml From 2fb6192d60801b984308676ee1c65a284ea61d57 Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sun, 23 Jun 2024 20:40:14 +0300 Subject: [PATCH 28/39] redo ports again --- docker-compose.yml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 38d9149..f41934d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,7 +15,7 @@ services: - "11211:11211" restart: always - encode: &inference + encode: build: context: ./inference dockerfile: Dockerfile @@ -29,7 +29,15 @@ services: - "8040:80" search: - <<: *inference + build: + context: ./inference + dockerfile: Dockerfile + command: uvicorn main:app --host "0.0.0.0" --port 80 --log-config=log_conf.yaml + restart: unless-stopped + volumes: + - inference-model-data:/app/model_data + env_file: + - inference/.env.dist ports: - "8050:80" From 0de8abf6791570220eadb25dd34d49c67abf776c Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sun, 23 Jun 2024 23:26:55 +0300 Subject: [PATCH 29/39] more port meddling --- docker-compose.yml | 8 ++++---- inference/Dockerfile | 3 --- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index f41934d..0b799ea 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -19,27 +19,27 @@ services: build: context: ./inference dockerfile: Dockerfile - command: uvicorn main:app --host "0.0.0.0" --port 80 --log-config=log_conf.yaml + command: uvicorn main:app --host "0.0.0.0" --port 8040 --log-config=log_conf.yaml restart: unless-stopped volumes: - inference-model-data:/app/model_data env_file: - inference/.env.dist ports: - - "8040:80" + - "8040:8040" search: build: context: ./inference dockerfile: Dockerfile - command: uvicorn main:app --host "0.0.0.0" --port 80 --log-config=log_conf.yaml + command: uvicorn main:app --host "0.0.0.0" --port 8050 --log-config=log_conf.yaml restart: unless-stopped volumes: - inference-model-data:/app/model_data env_file: - inference/.env.dist ports: - - "8050:80" + - "8050:8050" main: build: diff --git a/inference/Dockerfile b/inference/Dockerfile index 71d6389..48c4375 100644 --- a/inference/Dockerfile +++ b/inference/Dockerfile @@ -14,6 +14,3 @@ COPY requirements.txt /app/ RUN python -m pip install --upgrade pip && pip install -r requirements.txt COPY ./ /app/ - -EXPOSE 8040 -CMD uvicorn main:app --port 80 --log-config=log_conf.yaml From 784fd5469df1ea707390fbf399bf086095580049 Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Sun, 23 Jun 2024 23:44:36 +0300 Subject: [PATCH 30/39] save urls to documents instead of uris(they dont work) --- main/chroma.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main/chroma.py b/main/chroma.py index bfdddc8..9a86893 100644 --- a/main/chroma.py +++ b/main/chroma.py @@ -28,7 +28,7 @@ def add_feature(self, feature: Feature) -> None: self.collection.add( ids=[str(uuid4())], embeddings=[feature.features], - uris=[feature.link], + documents=[feature.link], metadatas=[{"feature_type": feature.feature_type}] ) @@ -37,7 +37,7 @@ def search_relevant_videos(self, search_feature: Feature, top_k: int = 100) -> l query_embeddings=search_feature.features, n_results=top_k ) - return results['uris'][0] + return results['documents'][0] def add_text_search_suggestion(self, suggestion_query: str) -> None: subsearches = suggestion_query.split() From ec62d4f9cf1d7bd1418a81efbf809e9dbd498c7b Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Mon, 24 Jun 2024 10:38:51 +0300 Subject: [PATCH 31/39] move to faster-whisper to improve performance + optimizations --- inference/Dockerfile | 3 - inference/download_whisper.sh | 9 --- inference/requirements.txt | 2 +- inference/settings.py | 3 +- inference/translator.py | 32 +++++++++ inference/whisper.py | 56 +++++++++------- poetry.lock | 121 +++++++++++++++++++++++++++++++++- pyproject.toml | 1 + 8 files changed, 188 insertions(+), 39 deletions(-) delete mode 100755 inference/download_whisper.sh create mode 100644 inference/translator.py diff --git a/inference/Dockerfile b/inference/Dockerfile index 48c4375..3496c73 100644 --- a/inference/Dockerfile +++ b/inference/Dockerfile @@ -5,9 +5,6 @@ ENV PYTHONUNBUFFERED 1 WORKDIR /app -COPY download_whisper.sh /app/ -RUN ./download_whisper.sh - RUN apt-get update && apt-get install ffmpeg -y COPY requirements.txt /app/ diff --git a/inference/download_whisper.sh b/inference/download_whisper.sh deleted file mode 100755 index 4f523a7..0000000 --- a/inference/download_whisper.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -if [ ! -f /app/model_data/ggml-large-v3.bin ]; then - mkdir /app/model_data - git clone https://github.com/ggerganov/whisper.cpp.git - cd whisper.cpp - bash ./models/download-ggml-model.sh large-v3 - mv ./models/ggml-large-v3.bin /app/model_data -fi diff --git a/inference/requirements.txt b/inference/requirements.txt index fd975b8..0ce7ad4 100644 --- a/inference/requirements.txt +++ b/inference/requirements.txt @@ -9,5 +9,5 @@ pillow==10.3.0 scenedetect==0.6.3 opencv-python==4.10.0.82 environs==11.0.0 -whisper-cpp-python==0.2.0 +faster-whisper==1.0.2 PyYAML>=6.0 diff --git a/inference/settings.py b/inference/settings.py index 30b2e00..5c5f3e0 100644 --- a/inference/settings.py +++ b/inference/settings.py @@ -7,4 +7,5 @@ class Settings: clip_model: str = env.str("CLIP_MODEL") summarization_model: str = env.str("SUMMARIZATION_MODEL") - whisper_path: str = env.str("WHISPER_PATH") + whisper_model: str = env.str("WHISPER_MODEL") + translation_model: str = env.str("TRANSLATION_MODEL") diff --git a/inference/translator.py b/inference/translator.py new file mode 100644 index 0000000..65a1aa2 --- /dev/null +++ b/inference/translator.py @@ -0,0 +1,32 @@ +from dataclasses import dataclass + +from transformers import AutoModelForSeq2SeqLM, AutoTokenizer + +from settings import Settings + + +@dataclass +class OpusTranslatorModel: + _model: AutoModelForSeq2SeqLM | None = None + _tokenizer: AutoTokenizer | None = None + + _model_name: str = Settings.translation_model + _device: str = "cpu" + + def __post_init__(self): + self._tokenizer = AutoTokenizer.from_pretrained( + self._model_name, + cache_dir="./model_cache" + ) + self._model = AutoModelForSeq2SeqLM.from_pretrained( + self._model_name, + cache_dir="./model_cache" + ) + + + def __call__(self, translate_query: str) -> str: + input_ids = self._tokenizer.encode(translate_query, return_tensors="pt") + output_ids = self._model.generate(input_ids.to(self._device), max_new_tokens=100) + en_text = self._tokenizer.decode(output_ids[0], skip_special_tokens=True) + + return en_text diff --git a/inference/whisper.py b/inference/whisper.py index 0030346..4937c9f 100644 --- a/inference/whisper.py +++ b/inference/whisper.py @@ -1,26 +1,34 @@ from dataclasses import dataclass, field from io import BytesIO import logging -import tempfile -import os from typing import Callable import requests from transformers import Pipeline, pipeline -from whisper_cpp_python import Whisper +from faster_whisper import WhisperModel from frame_video import get_audio_in_ram +from translator import OpusTranslatorModel from settings import Settings +model = WhisperModel + + @dataclass class WhisperService: - _service: Whisper = field( - default_factory=lambda: Whisper( - model_path=Settings.whisper_path, - n_threads=4 + _whisper: WhisperModel = field( + default_factory=lambda: WhisperModel( + Settings.whisper_model, + device="cpu", + compute_type="float16", + cpu_threads=8, + num_workers=4, ) ) + _translator: OpusTranslatorModel = field( + default_factory=OpusTranslatorModel + ) _summary_pipeline: Pipeline = field( default_factory=lambda: pipeline( "summarization", @@ -35,25 +43,25 @@ class WhisperService: def __call__(self, link: str) -> str: self._logger.info("Converting video file to transcript") video_data = BytesIO(requests.get(link).content) - with tempfile.NamedTemporaryFile(delete=False) as video: - video.write(video_data.read()) - video.close() - data = self._service.translate( - video.name, prompt="" + segments, info = self._whisper.transcribe( + video_data, + language="ru", + beam_size=5 + ) + if info.language_probability < 0.5: + self._logger.info( + "Cannot properly identify speech, probability=%s, returning empty string", + info.language_probability ) - # audio_data = self._get_audio_in_ram(video.name) - os.unlink(video.name) - #self._logger.info("Processing WAV file by whisper") - #with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as audio: - # audio.write(audio_data.read()) - # audio.close() - # data = self._service.translate( - # audio.name, prompt="" - # ) - #os.unlink(audio.name) + return "" self._logger.info("summarizing transcript into 77 CLIP tokens") - text = data["text"] - summary = self._summary_pipeline(text, max_length=77) + full_translation = "" + for segment in segments: + if segment.no_speech_prob > 0.5: + continue + translated_segment = self._translator(segment.text) + full_translation += " " + translated_segment + summary = self._summary_pipeline(full_translation, max_length=77) result: str = summary[0]["summary_text"] # type: ignore self._logger.info("Processed video file into text description: %s, total length: %s", result, len(result)) return result diff --git a/poetry.lock b/poetry.lock index 8e449ae..9c7f402 100644 --- a/poetry.lock +++ b/poetry.lock @@ -250,6 +250,64 @@ files = [ {file = "autocorrect-2.6.1.tar.gz", hash = "sha256:2bc68192dc645b44bece2613caac338e93548c3dac9c563095b27224c7fd4391"}, ] +[[package]] +name = "av" +version = "12.1.0" +description = "Pythonic bindings for FFmpeg's libraries." +optional = false +python-versions = ">=3.8" +files = [ + {file = "av-12.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0df2ad330ccf63ed8192d637306f13123cdf1c06717168d1de8b9a084d62f70"}, + {file = "av-12.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e66ad48dc3f618cf4a75cc14dd7e119d1151ff3c13b9b064014c79bad20df85"}, + {file = "av-12.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f0e8fbbe3cffd04dcbfaf7f9e0469c8c9d3ae962728487aae0dbbac9ebb62567"}, + {file = "av-12.1.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93c24d21b116e3af45e2f4b3a7ff1c96ae9a266bcde33a689ace0c52888e74d9"}, + {file = "av-12.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1eff59d1eb0ba263e9efe8e460ca239c6ee2285f1b92c6b3c64f002c1b2ffd56"}, + {file = "av-12.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:09f8bd1fd124e389a266c770d209b5b4333f69c4b5a66b9aa2d09a561b0b54ab"}, + {file = "av-12.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e4c409639699d75e85a5b4b9fbb0538388bb009c8b426f7976b218731815e645"}, + {file = "av-12.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f624a61d8062bb7128a4b0af018ef5c7642acff2af7cea1bb6cc5aa663954b77"}, + {file = "av-12.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:73c61635e959dd50857f1ae3ad28984ce813688262672a5188376686dd293333"}, + {file = "av-12.1.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7f8dcf20ecdfed62cb8b31790d3f394c76f05d5d58d5cc516f7b37c8608b78e2"}, + {file = "av-12.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebb11aba1ef2acb945713be5f4f7a359439230dc566243c354dddb2b06361367"}, + {file = "av-12.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:a309994db77f632b606fe22c5bac03302e3dbe48d53c195abc435ccc56192746"}, + {file = "av-12.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:08401e59a9e33a42511d28cf1fdc570c31d3416426a2d73f4f4aaaaca5945c54"}, + {file = "av-12.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:efd45e3aa1e478ccbaafd84baf7d95d660b9cef30d850816129fd37d76813589"}, + {file = "av-12.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ab553ce72c631477181d6c08c6e710afa44fa3452e61b82d9a75be07b1b2fef"}, + {file = "av-12.1.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:555f3240306ff02169ff209b152f97b071b57957868c3004c65e25c28130d593"}, + {file = "av-12.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07706499489f2047b54a4675dd04e2cf88322caef904b7b6eb03f480e682cf15"}, + {file = "av-12.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:f669f5fb2515e9a4c9ee05b24ffbe3168d33c241bda93c84c8e384ca682a5cde"}, + {file = "av-12.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:876302ee793a457a03c4faa8281012671bb52dec843062bec59d6f0ae3735ba6"}, + {file = "av-12.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e6ad88e1e61e65c69d92ff1db8826686f913f147b427c99aa3202b027e766128"}, + {file = "av-12.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:49a8f88b26d3d25140633a8ec48328a9467bbe001d01c54472394484cdb60b10"}, + {file = "av-12.1.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:97873f344344b9b6aef786b22b57fb42c6eaa4ea0798d2020c5ed061f29ab3d6"}, + {file = "av-12.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fdf4c54354580abbea9390e23a471a346e9a4b4ca19c6929ad11a59d525e2ad3"}, + {file = "av-12.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:dc1a82e7d43495be6d34b50fd917989a72de7c3a7434d8ec72af0952c1ad4ea3"}, + {file = "av-12.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:41d13494401bd3968255f7f9af2af203c30b684efc5a7ed92ebe9ec37f9f9264"}, + {file = "av-12.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fc36f7b74e88db8e73fa69dc869331da74abc4f034ecd55f85f6232fcdddca60"}, + {file = "av-12.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81ff7a43ce921f2cc3c794810b148c4fa2cfd7ff10f4404072c94cf57b39b13d"}, + {file = "av-12.1.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce02915698d605c19c372314b7894033a451e838300d0a45c2708a550044e2d1"}, + {file = "av-12.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eadd5c7c374c9ff889a9116802cdda7ef9d574b623338f4045effc0f3f3c2cbc"}, + {file = "av-12.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:f32893849fe34300f3cec51c4ae71c45b0acac448d36336d3452a5bb4f7e11bf"}, + {file = "av-12.1.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a0a2a8693fdaa3bbb00255cda388f110f7a0b00817470a8cd8f1aa5c8dcbc3c9"}, + {file = "av-12.1.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:615f440856cbc5b96b8ae52c75ba722f082b898c3ab837eae024a06a0914e8a6"}, + {file = "av-12.1.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:257fe519b0ffb4e900b737515137fb9ae0490edca7d70818b6c71c3cd79994ca"}, + {file = "av-12.1.0-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:04afe8f9005bb42f95717bcfbb22a8950b4b942a862444edb1f0bab71ea702e9"}, + {file = "av-12.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63cbeaedc0184094b7d36bd4267cd61e6c69c18cb3464cc726ce6a8a438ac87a"}, + {file = "av-12.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:0a0e056baa87037f932d12de3d3f258cbc4284d18d85099ccd845b333ac1bb91"}, + {file = "av-12.1.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7d549c2e6e9035022ea2280b781150a8c81acc4a03c69bde20b2f53262041a88"}, + {file = "av-12.1.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:3b1e02715cbb985b0efe6b6aaf134f9d1fee760822a07fd19e995a8e461909f4"}, + {file = "av-12.1.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b348264ba26152d7b06f2aaf0b2a11c90b13c628a447f6daa2a6770b9443fb0"}, + {file = "av-12.1.0-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c6a3b3e4138cd1977f14e3d16c5f89979de8efa251d7558e2dc10a51cfcc0100"}, + {file = "av-12.1.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:105b017958eb5b6a128a5399200a4ec2b1040c2047e0b5f5e3714cd64fe7046e"}, + {file = "av-12.1.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:00596e53db3082193142e32fbdf47349724221de117645b0ed8fcaaec508adf4"}, + {file = "av-12.1.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ed7c48d2d79961d70ea59f44fcff453bb2444a152793f80d2ceaa17af4331b9c"}, + {file = "av-12.1.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:d2c486adf83fc5b8e444efcc32f3eef27eefd6d0966ef68607d41205adcd8ec0"}, + {file = "av-12.1.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abe9475dd2c8bea47338d5e90d6a45a28930d0fe3820ed2d3d09dfbb3316d476"}, + {file = "av-12.1.0-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0130a8391aa258eee60da3c09d69eb5c9480f14a9f1b1b5312336bac879edd2a"}, + {file = "av-12.1.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:669f206cfdd5696d0edf2c81c5d220acc40b4153b71cf6662618c376e00b6d3a"}, + {file = "av-12.1.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:e322533f585c2e8df07aa708c594fcb67f5f27a2f8b4107a7e6a6f90606190c7"}, + {file = "av-12.1.0.tar.gz", hash = "sha256:67adab9fdabcb8a86bd542787196580e38ed4132331ee9e82234b23cea9546b3"}, +] + [[package]] name = "backoff" version = "2.2.1" @@ -628,6 +686,45 @@ humanfriendly = ">=9.1" [package.extras] cron = ["capturer (>=2.4)"] +[[package]] +name = "ctranslate2" +version = "4.3.1" +description = "Fast inference engine for Transformer models" +optional = false +python-versions = ">=3.8" +files = [ + {file = "ctranslate2-4.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e962c9dc3ddfacf60f2467bea5f91f75239c3d9c17656e4b0c569d956d662b99"}, + {file = "ctranslate2-4.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:49a0d9136d577b667c1bb450267248d9cf205b5eb28b89b3f70c296ec5285da8"}, + {file = "ctranslate2-4.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:343b24fe3d8a5b6a7c8082332415767bef7ceaf15bb43d0cec7e83665108c51e"}, + {file = "ctranslate2-4.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d95ecb440e4985cad4623a1fe7bb91406bab4aa55b00aa89a0c16eb5939d640"}, + {file = "ctranslate2-4.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:febf7cf0fb641c76035cdece58e97d27f4e8950a5e32fc480f9afa1bcbbb856c"}, + {file = "ctranslate2-4.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a49dc5d339e2f4ed016553db0d0e6cbd369742697c87c6cc0cc15a47c7c72d00"}, + {file = "ctranslate2-4.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:def98f6f8900470b2cec9408e5b0402af75f40f771391ebacd2b60666b8d75b9"}, + {file = "ctranslate2-4.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30c02fcd5a7be93bf42a8adf81a9ac4f394e23bd639192907b2e11feae589971"}, + {file = "ctranslate2-4.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a06043910a7dee91ea03634be2cff2e1338a9f87bb51e062c03bae69e2c826b6"}, + {file = "ctranslate2-4.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:6f49834b63848f17dfdc1b2b8c632c31932ad69e130ce0f7b1e2505aa3923e6c"}, + {file = "ctranslate2-4.3.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fcf649d976070ddd33cdda00a7a60fde6f1fbe27d65d2c6141dd95153f965f01"}, + {file = "ctranslate2-4.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f63f779f1d4518acdc694b1938887d4f28613ac2dfe507ccc2c0d56dd8c95b40"}, + {file = "ctranslate2-4.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68301fbc5fb7daa609eb12ca6c2ed8aa29852c20f962532317762d1889e751d9"}, + {file = "ctranslate2-4.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45c5b352783bd3806f0c9f5dcbfa49d89c0dde71cb7d1b1c527c525e85af3ded"}, + {file = "ctranslate2-4.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:08626f115d5a39c56a666680735d6eebfc4d8a215288896d4d8afc14cfcdcffe"}, + {file = "ctranslate2-4.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e40d43c5f7d25f40d31cca0541cf21c2846f89509b99189d340fdee595391196"}, + {file = "ctranslate2-4.3.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f352bcb802ab9ff1b94a25b4915c4f9f97cdd230993cf45ea290592d8997c2e2"}, + {file = "ctranslate2-4.3.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c202011fa2ebb8129ba98a65df48df075f0ef53f905f2b13b8cd00f31c7ccff"}, + {file = "ctranslate2-4.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4bca2ce519c497bc2f79e567093609d7bdfaff3313220e0d831797288803f3aa"}, + {file = "ctranslate2-4.3.1-cp38-cp38-win_amd64.whl", hash = "sha256:ef812a4129e877f64f8ca2438b6247060af0f053a56b438dbfa81dae9ca12675"}, + {file = "ctranslate2-4.3.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d8679354547260db999c2bcc6f11a31dad828c3d896d6120045bd0333940732f"}, + {file = "ctranslate2-4.3.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:60bc176dd2e0ee6ddd33682401440f7626d115fed4f1e5e6816d9f7f213d1a62"}, + {file = "ctranslate2-4.3.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d394367fe472b6540489e3b081fc7e17cea2264075b074fb28eca30ff63463f"}, + {file = "ctranslate2-4.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f1fd426d9019198d0fd8f37a18bf9c486241f711d597686956c58cd7676d564"}, + {file = "ctranslate2-4.3.1-cp39-cp39-win_amd64.whl", hash = "sha256:de05e33790d72492a76101a0357c3d87d97ad53af84417c78f45e85df76d39e8"}, +] + +[package.dependencies] +numpy = "*" +pyyaml = ">=5.3,<7" +setuptools = "*" + [[package]] name = "decorator" version = "5.1.1" @@ -817,6 +914,28 @@ typer = ">=0.12.3" [package.extras] standard = ["fastapi", "uvicorn[standard] (>=0.15.0)"] +[[package]] +name = "faster-whisper" +version = "1.0.2" +description = "Faster Whisper transcription with CTranslate2" +optional = false +python-versions = ">=3.8" +files = [ + {file = "faster-whisper-1.0.2.tar.gz", hash = "sha256:54d9fc698f7c665e00a0d5ed65d6e975b72a8862b8214f20a22e79b115c41511"}, + {file = "faster_whisper-1.0.2-py3-none-any.whl", hash = "sha256:d968c289222e766a49ed97eecec24e934bdef405183f57d6d434a364bb3569c1"}, +] + +[package.dependencies] +av = ">=11.0,<13" +ctranslate2 = ">=4.0,<5" +huggingface-hub = ">=0.13" +onnxruntime = ">=1.14,<2" +tokenizers = ">=0.13,<1" + +[package.extras] +conversion = ["transformers[torch] (>=4.23)"] +dev = ["black (==23.*)", "flake8 (==6.*)", "isort (==5.*)", "pytest (==7.*)"] + [[package]] name = "filelock" version = "3.14.0" @@ -4935,4 +5054,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "f8851f3ea1cdfc285ecd3281914ea167219f602416c140724e37ae3a63d2675c" +content-hash = "70e87c46c783b44ad19f49a08a06b8cb1e90ab33467b80cb32a6f1ee9b221616" diff --git a/pyproject.toml b/pyproject.toml index a0f3e54..d90b56f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ fastapi-cache2 = {extras = ["redis"], version = "^0.2.1"} aiomcache = "^0.8.2" autocorrect = "^2.6.1" whisper-cpp-python = "^0.2.0" +faster-whisper = "^1.0.2" [build-system] From 84affe88b1f807f61a590150bc1bc1007529a261 Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Mon, 24 Jun 2024 10:50:03 +0300 Subject: [PATCH 32/39] add lost env vars --- inference/.env.dist | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/inference/.env.dist b/inference/.env.dist index 0281e6a..70e8bb6 100644 --- a/inference/.env.dist +++ b/inference/.env.dist @@ -1,3 +1,4 @@ CLIP_MODEL=laion/CLIP-ViT-g-14-laion2B-s12B-b42K -WHISPER_PATH=/app/model_data/ggml-large-v3.bin +WHISPER_MODEL=distil-whisper/distil-large-v3 SUMMARIZATION_MODEL=facebook/bart-large-cnn +TRANSLATION_MODEL=Helsinki-NLP/opus-mt-ru-en From bc710eb8a46f99d81f9aeea4393972e346c381e4 Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Mon, 24 Jun 2024 11:31:18 +0300 Subject: [PATCH 33/39] fix snapshot loading --- inference/whisper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/inference/whisper.py b/inference/whisper.py index 4937c9f..228322f 100644 --- a/inference/whisper.py +++ b/inference/whisper.py @@ -3,6 +3,7 @@ import logging from typing import Callable +from huggingface_hub import snapshot_download import requests from transformers import Pipeline, pipeline from faster_whisper import WhisperModel @@ -19,7 +20,7 @@ class WhisperService: _whisper: WhisperModel = field( default_factory=lambda: WhisperModel( - Settings.whisper_model, + snapshot_download(Settings.whisper_model), device="cpu", compute_type="float16", cpu_threads=8, From 2dc3f8779662c25177985bb2e341bcb7f801d52e Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Mon, 24 Jun 2024 12:21:44 +0300 Subject: [PATCH 34/39] change whisper model --- inference/.env.dist | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inference/.env.dist b/inference/.env.dist index 70e8bb6..1f358fd 100644 --- a/inference/.env.dist +++ b/inference/.env.dist @@ -1,4 +1,4 @@ CLIP_MODEL=laion/CLIP-ViT-g-14-laion2B-s12B-b42K -WHISPER_MODEL=distil-whisper/distil-large-v3 +WHISPER_MODEL=flyingleafe/faster-whisper-large-v3 SUMMARIZATION_MODEL=facebook/bart-large-cnn TRANSLATION_MODEL=Helsinki-NLP/opus-mt-ru-en From e39bc1524e035c377032bd57068107324afe624b Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Mon, 24 Jun 2024 12:27:27 +0300 Subject: [PATCH 35/39] change quantization to int8 --- inference/whisper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inference/whisper.py b/inference/whisper.py index 228322f..b094ad2 100644 --- a/inference/whisper.py +++ b/inference/whisper.py @@ -22,7 +22,7 @@ class WhisperService: default_factory=lambda: WhisperModel( snapshot_download(Settings.whisper_model), device="cpu", - compute_type="float16", + compute_type="int8", cpu_threads=8, num_workers=4, ) From a07935a9c5e047808d72fe87c432699bb14dfa4c Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Mon, 24 Jun 2024 12:31:35 +0300 Subject: [PATCH 36/39] add sentencepiece to list of deps --- inference/requirements.txt | 1 + poetry.lock | 64 +++++++++++++++++++++++++++++++++++++- pyproject.toml | 1 + 3 files changed, 65 insertions(+), 1 deletion(-) diff --git a/inference/requirements.txt b/inference/requirements.txt index 0ce7ad4..252865d 100644 --- a/inference/requirements.txt +++ b/inference/requirements.txt @@ -11,3 +11,4 @@ opencv-python==4.10.0.82 environs==11.0.0 faster-whisper==1.0.2 PyYAML>=6.0 +sentencepiece==0.2.0 diff --git a/poetry.lock b/poetry.lock index 9c7f402..1120b4a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3891,6 +3891,68 @@ dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy", "pycodestyle", "pyde doc = ["jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.12.0)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0)", "sphinx-design (>=0.4.0)"] test = ["array-api-strict", "asv", "gmpy2", "hypothesis (>=6.30)", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] +[[package]] +name = "sentencepiece" +version = "0.2.0" +description = "SentencePiece python wrapper" +optional = false +python-versions = "*" +files = [ + {file = "sentencepiece-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:188779e1298a1c8b8253c7d3ad729cb0a9891e5cef5e5d07ce4592c54869e227"}, + {file = "sentencepiece-0.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bed9cf85b296fa2b76fc2547b9cbb691a523864cebaee86304c43a7b4cb1b452"}, + {file = "sentencepiece-0.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d7b67e724bead13f18db6e1d10b6bbdc454af574d70efbb36f27d90387be1ca3"}, + {file = "sentencepiece-0.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2fde4b08cfe237be4484c6c7c2e2c75fb862cfeab6bd5449ce4caeafd97b767a"}, + {file = "sentencepiece-0.2.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c378492056202d1c48a4979650981635fd97875a00eabb1f00c6a236b013b5e"}, + {file = "sentencepiece-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1380ce6540a368de2ef6d7e6ba14ba8f3258df650d39ba7d833b79ee68a52040"}, + {file = "sentencepiece-0.2.0-cp310-cp310-win32.whl", hash = "sha256:a1151d6a6dd4b43e552394aed0edfe9292820272f0194bd56c7c1660a0c06c3d"}, + {file = "sentencepiece-0.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:d490142b0521ef22bc1085f061d922a2a6666175bb6b42e588ff95c0db6819b2"}, + {file = "sentencepiece-0.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:17982700c4f6dbb55fa3594f3d7e5dd1c8659a274af3738e33c987d2a27c9d5c"}, + {file = "sentencepiece-0.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:7c867012c0e8bcd5bdad0f791609101cb5c66acb303ab3270218d6debc68a65e"}, + {file = "sentencepiece-0.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7fd6071249c74f779c5b27183295b9202f8dedb68034e716784364443879eaa6"}, + {file = "sentencepiece-0.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27f90c55a65013cbb8f4d7aab0599bf925cde4adc67ae43a0d323677b5a1c6cb"}, + {file = "sentencepiece-0.2.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b293734059ef656dcd65be62ff771507bea8fed0a711b6733976e1ed3add4553"}, + {file = "sentencepiece-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e58b47f933aca74c6a60a79dcb21d5b9e47416256c795c2d58d55cec27f9551d"}, + {file = "sentencepiece-0.2.0-cp311-cp311-win32.whl", hash = "sha256:c581258cf346b327c62c4f1cebd32691826306f6a41d8c4bec43b010dee08e75"}, + {file = "sentencepiece-0.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:0993dbc665f4113017892f1b87c3904a44d0640eda510abcacdfb07f74286d36"}, + {file = "sentencepiece-0.2.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:ea5f536e32ea8ec96086ee00d7a4a131ce583a1b18d130711707c10e69601cb2"}, + {file = "sentencepiece-0.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d0cb51f53b6aae3c36bafe41e86167c71af8370a039f542c43b0cce5ef24a68c"}, + {file = "sentencepiece-0.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3212121805afc58d8b00ab4e7dd1f8f76c203ddb9dc94aa4079618a31cf5da0f"}, + {file = "sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a3149e3066c2a75e0d68a43eb632d7ae728c7925b517f4c05c40f6f7280ce08"}, + {file = "sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:632f3594d3e7ac8b367bca204cb3fd05a01d5b21455acd097ea4c0e30e2f63d7"}, + {file = "sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f295105c6bdbb05bd5e1b0cafbd78ff95036f5d3641e7949455a3f4e5e7c3109"}, + {file = "sentencepiece-0.2.0-cp312-cp312-win32.whl", hash = "sha256:fb89f811e5efd18bab141afc3fea3de141c3f69f3fe9e898f710ae7fe3aab251"}, + {file = "sentencepiece-0.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:7a673a72aab81fef5ebe755c6e0cc60087d1f3a4700835d40537183c1703a45f"}, + {file = "sentencepiece-0.2.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:4547683f330289ec4f093027bfeb87f9ef023b2eb6f879fdc4a8187c7e0ffb90"}, + {file = "sentencepiece-0.2.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7cd6175f7eaec7142d2bf6f6597ce7db4c9ac89acf93fcdb17410c3a8b781eeb"}, + {file = "sentencepiece-0.2.0-cp36-cp36m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:859ba1acde782609a0910a26a60e16c191a82bf39b5621107552c0cd79fad00f"}, + {file = "sentencepiece-0.2.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcbbef6cc277f8f18f36959e305f10b1c620442d75addc79c21d7073ae581b50"}, + {file = "sentencepiece-0.2.0-cp36-cp36m-win32.whl", hash = "sha256:536b934e244829e3fe6c4f198652cd82da48adb9aa145c9f00889542726dee3d"}, + {file = "sentencepiece-0.2.0-cp36-cp36m-win_amd64.whl", hash = "sha256:0a91aaa3c769b52440df56fafda683b3aa48e3f2169cf7ee5b8c8454a7f3ae9b"}, + {file = "sentencepiece-0.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:787e480ca4c1d08c9985a7eb1eae4345c107729c99e9b5a9a00f2575fc7d4b4b"}, + {file = "sentencepiece-0.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4d158189eb2ecffea3a51edf6d25e110b3678ec47f1a40f2d541eafbd8f6250"}, + {file = "sentencepiece-0.2.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d1e5ca43013e8935f25457a4fca47e315780172c3e821b4b13a890668911c792"}, + {file = "sentencepiece-0.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7140d9e5a74a0908493bb4a13f1f16a401297bd755ada4c707e842fbf6f0f5bf"}, + {file = "sentencepiece-0.2.0-cp37-cp37m-win32.whl", hash = "sha256:6cf333625234f247ab357b0bd9836638405ea9082e1543d5b8408f014979dcbf"}, + {file = "sentencepiece-0.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:ff88712338b01031910e8e61e7239aff3ce8869ee31a47df63cb38aadd591bea"}, + {file = "sentencepiece-0.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:20813a68d4c221b1849c62c30e1281ea81687894d894b8d4a0f4677d9311e0f5"}, + {file = "sentencepiece-0.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:926ef920ae2e8182db31d3f5d081ada57804e3e1d3a8c4ef8b117f9d9fb5a945"}, + {file = "sentencepiece-0.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:89f65f69636b7e9c015b79dff9c9985a9bc7d19ded6f79ef9f1ec920fdd73ecf"}, + {file = "sentencepiece-0.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f67eae0dbe6f2d7d6ba50a354623d787c99965f068b81e145d53240198021b0"}, + {file = "sentencepiece-0.2.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:98501e075f35dd1a1d5a20f65be26839fcb1938752ec61539af008a5aa6f510b"}, + {file = "sentencepiece-0.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3d1d2cc4882e8d6a1adf9d5927d7716f80617fc693385661caff21888972269"}, + {file = "sentencepiece-0.2.0-cp38-cp38-win32.whl", hash = "sha256:b99a308a2e5e569031ab164b74e6fab0b6f37dfb493c32f7816225f4d411a6dd"}, + {file = "sentencepiece-0.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:cdb701eec783d3ec86b7cd4c763adad8eaf6b46db37ee1c36e5e6c44b3fe1b5f"}, + {file = "sentencepiece-0.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:1e0f9c4d0a6b0af59b613175f019916e28ade076e21242fd5be24340d8a2f64a"}, + {file = "sentencepiece-0.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:298f21cc1366eb60311aedba3169d30f885c363ddbf44214b0a587d2908141ad"}, + {file = "sentencepiece-0.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3f1ec95aa1e5dab11f37ac7eff190493fd87770f7a8b81ebc9dd768d1a3c8704"}, + {file = "sentencepiece-0.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b06b70af54daa4b4904cbb90b4eb6d35c9f3252fdc86c9c32d5afd4d30118d8"}, + {file = "sentencepiece-0.2.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:22e37bac44dd6603388cb598c64ff7a76e41ca774646f21c23aadfbf5a2228ab"}, + {file = "sentencepiece-0.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0461324897735512a32d222e3d886e24ad6a499761952b6bda2a9ee6e4313ea5"}, + {file = "sentencepiece-0.2.0-cp39-cp39-win32.whl", hash = "sha256:38aed822fb76435fa1f12185f10465a94ab9e51d5e8a9159e9a540ce926f0ffd"}, + {file = "sentencepiece-0.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:d8cf876516548b5a1d6ac4745d8b554f5c07891d55da557925e5c13ff0b4e6ad"}, + {file = "sentencepiece-0.2.0.tar.gz", hash = "sha256:a52c19171daaf2e697dc6cbe67684e0fa341b1248966f6aebb541de654d15843"}, +] + [[package]] name = "setuptools" version = "70.0.0" @@ -5054,4 +5116,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "70e87c46c783b44ad19f49a08a06b8cb1e90ab33467b80cb32a6f1ee9b221616" +content-hash = "d9abc569af387cd74eb903c00e081e203ea028606bde4e4e848121ce19a9d2f9" diff --git a/pyproject.toml b/pyproject.toml index d90b56f..a5ea6e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,7 @@ aiomcache = "^0.8.2" autocorrect = "^2.6.1" whisper-cpp-python = "^0.2.0" faster-whisper = "^1.0.2" +sentencepiece = "^0.2.0" [build-system] From 87db31fcf9a0f0fd28b175f35de6d4cb72ea0ceb Mon Sep 17 00:00:00 2001 From: Antony Redman Date: Mon, 24 Jun 2024 20:03:57 +0300 Subject: [PATCH 37/39] scrap whisper for time being --- inference/.env.dist | 3 -- inference/deps.py | 8 ----- inference/frame_video.py | 13 -------- inference/main.py | 12 +------ inference/requirements.txt | 1 - inference/settings.py | 1 - inference/translator.py | 32 ------------------ inference/whisper.py | 68 -------------------------------------- 8 files changed, 1 insertion(+), 137 deletions(-) delete mode 100644 inference/translator.py delete mode 100644 inference/whisper.py diff --git a/inference/.env.dist b/inference/.env.dist index 1f358fd..8b47efb 100644 --- a/inference/.env.dist +++ b/inference/.env.dist @@ -1,4 +1 @@ CLIP_MODEL=laion/CLIP-ViT-g-14-laion2B-s12B-b42K -WHISPER_MODEL=flyingleafe/faster-whisper-large-v3 -SUMMARIZATION_MODEL=facebook/bart-large-cnn -TRANSLATION_MODEL=Helsinki-NLP/opus-mt-ru-en diff --git a/inference/deps.py b/inference/deps.py index d937d8a..dc76a0b 100644 --- a/inference/deps.py +++ b/inference/deps.py @@ -5,7 +5,6 @@ from fastapi import Depends, FastAPI, Request from transformers import CLIPModel, CLIPProcessor -from whisper import WhisperService from settings import Settings @@ -22,8 +21,6 @@ async def lifespan(app: FastAPI): Settings.clip_model, cache_dir="./model_cache" ) - logger.info("Setting up Whisper service...") - app.state.whisper_model = WhisperService() yield @@ -35,10 +32,5 @@ def _get_clip_processor(request: Request) -> CLIPProcessor: return request.app.state.processor -def _get_whisper(request: Request) -> WhisperService: - return request.app.state.whisper_model - - Processor = Annotated[CLIPProcessor, Depends(_get_clip_processor)] Model = Annotated[CLIPModel, Depends(_get_clip_model)] -Whisper = Annotated[WhisperService, Depends(_get_whisper)] diff --git a/inference/frame_video.py b/inference/frame_video.py index e61cd13..374cc20 100644 --- a/inference/frame_video.py +++ b/inference/frame_video.py @@ -57,16 +57,3 @@ def create_frame_in_ram(video_path: str, timecode: str) -> BytesIO: process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) frame_data, _ = process.communicate() return BytesIO(frame_data) - -def get_audio_in_ram(video_path: str) -> BytesIO: - command = [ - "ffmpeg", - "-i", video_path, - "-acodec", "pcm_s16le", - "-ac", "1", - "-ar", "16000", - "-" - ] - process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - audio_data, _ = process.communicate() - return BytesIO(audio_data) diff --git a/inference/main.py b/inference/main.py index 0e9351a..bffefea 100644 --- a/inference/main.py +++ b/inference/main.py @@ -2,7 +2,7 @@ from fastapi import FastAPI from fastapi.responses import JSONResponse -from deps import Model, Processor, Whisper, lifespan +from deps import Model, Processor, lifespan from clip import CLIP from models import EncodeRequest, EncodeSearchRequest @@ -18,24 +18,14 @@ async def encode( request: EncodeRequest, processor: Processor, model: Model, - whisper: Whisper ): logger.info("Initializing CLIP module...") clip = CLIP(processor=processor, model=model, logger=logger) logger.info("CLIP module successfully initialized") video_features = clip(request.link, encode_type="video") - if request.description is not None: - description_features = clip(request.description, encode_type="text") - else: - description_features = None - - audio_transcription = whisper(request.link) - audio_features = clip(audio_transcription, encode_type="text") return { "video": video_features, - "audio": audio_features, - "description": description_features } @app.post("/encode-search") diff --git a/inference/requirements.txt b/inference/requirements.txt index 252865d..14962b3 100644 --- a/inference/requirements.txt +++ b/inference/requirements.txt @@ -9,6 +9,5 @@ pillow==10.3.0 scenedetect==0.6.3 opencv-python==4.10.0.82 environs==11.0.0 -faster-whisper==1.0.2 PyYAML>=6.0 sentencepiece==0.2.0 diff --git a/inference/settings.py b/inference/settings.py index 5c5f3e0..6242992 100644 --- a/inference/settings.py +++ b/inference/settings.py @@ -7,5 +7,4 @@ class Settings: clip_model: str = env.str("CLIP_MODEL") summarization_model: str = env.str("SUMMARIZATION_MODEL") - whisper_model: str = env.str("WHISPER_MODEL") translation_model: str = env.str("TRANSLATION_MODEL") diff --git a/inference/translator.py b/inference/translator.py deleted file mode 100644 index 65a1aa2..0000000 --- a/inference/translator.py +++ /dev/null @@ -1,32 +0,0 @@ -from dataclasses import dataclass - -from transformers import AutoModelForSeq2SeqLM, AutoTokenizer - -from settings import Settings - - -@dataclass -class OpusTranslatorModel: - _model: AutoModelForSeq2SeqLM | None = None - _tokenizer: AutoTokenizer | None = None - - _model_name: str = Settings.translation_model - _device: str = "cpu" - - def __post_init__(self): - self._tokenizer = AutoTokenizer.from_pretrained( - self._model_name, - cache_dir="./model_cache" - ) - self._model = AutoModelForSeq2SeqLM.from_pretrained( - self._model_name, - cache_dir="./model_cache" - ) - - - def __call__(self, translate_query: str) -> str: - input_ids = self._tokenizer.encode(translate_query, return_tensors="pt") - output_ids = self._model.generate(input_ids.to(self._device), max_new_tokens=100) - en_text = self._tokenizer.decode(output_ids[0], skip_special_tokens=True) - - return en_text diff --git a/inference/whisper.py b/inference/whisper.py deleted file mode 100644 index b094ad2..0000000 --- a/inference/whisper.py +++ /dev/null @@ -1,68 +0,0 @@ -from dataclasses import dataclass, field -from io import BytesIO -import logging -from typing import Callable - -from huggingface_hub import snapshot_download -import requests -from transformers import Pipeline, pipeline -from faster_whisper import WhisperModel - -from frame_video import get_audio_in_ram -from translator import OpusTranslatorModel -from settings import Settings - - -model = WhisperModel - - -@dataclass -class WhisperService: - _whisper: WhisperModel = field( - default_factory=lambda: WhisperModel( - snapshot_download(Settings.whisper_model), - device="cpu", - compute_type="int8", - cpu_threads=8, - num_workers=4, - ) - ) - _translator: OpusTranslatorModel = field( - default_factory=OpusTranslatorModel - ) - _summary_pipeline: Pipeline = field( - default_factory=lambda: pipeline( - "summarization", - model=Settings.summarization_model - ) - ) - _logger: logging.Logger = field( - default_factory=lambda: logging.getLogger(__name__) - ) - _get_audio_in_ram: Callable[[str], BytesIO] = get_audio_in_ram - - def __call__(self, link: str) -> str: - self._logger.info("Converting video file to transcript") - video_data = BytesIO(requests.get(link).content) - segments, info = self._whisper.transcribe( - video_data, - language="ru", - beam_size=5 - ) - if info.language_probability < 0.5: - self._logger.info( - "Cannot properly identify speech, probability=%s, returning empty string", - info.language_probability - ) - return "" - self._logger.info("summarizing transcript into 77 CLIP tokens") - full_translation = "" - for segment in segments: - if segment.no_speech_prob > 0.5: - continue - translated_segment = self._translator(segment.text) - full_translation += " " + translated_segment - summary = self._summary_pipeline(full_translation, max_length=77) - result: str = summary[0]["summary_text"] # type: ignore - self._logger.info("Processed video file into text description: %s, total length: %s", result, len(result)) - return result From 9aa10fdfd620a987cdccd5d799977cc54cc9031c Mon Sep 17 00:00:00 2001 From: Arseny Chebyshev Date: Tue, 25 Jun 2024 01:40:39 +0500 Subject: [PATCH 38/39] (upd) Key frame search, trunc text for features --- inference/clip.py | 1 + inference/frame_video.py | 16 +++++++++++----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/inference/clip.py b/inference/clip.py index 5109475..439ca56 100644 --- a/inference/clip.py +++ b/inference/clip.py @@ -27,6 +27,7 @@ def __call__(self, encode_source: str, encode_type: Literal["text"] | Literal["v return self._encode_video(encode_source) def _encode_text(self, description: str) -> list[float]: + description = description[:65] # meet the processor max length text_inputs = self.processor(text=[description], return_tensors="pt", padding=True) with torch.no_grad(): text_features = self.model.get_text_features(**text_inputs) diff --git a/inference/frame_video.py b/inference/frame_video.py index 374cc20..d5a089d 100644 --- a/inference/frame_video.py +++ b/inference/frame_video.py @@ -4,7 +4,7 @@ from io import BytesIO from dataclasses import dataclass import requests -from scenedetect import detect, ContentDetector +from scenedetect import detect, ContentDetector, AdaptiveDetector @dataclass class VideoFrame: @@ -14,6 +14,7 @@ class VideoFrame: def create_key_frames_for_video( video_link: str, frame_change_threshold: float = 7.5, + min_scene_len: int = 10, num_of_thumbnails: int = 10 ) -> list[VideoFrame]: frames: list[VideoFrame] = [] @@ -21,12 +22,16 @@ def create_key_frames_for_video( with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmp_file: tmp_file.write(video_data.getvalue()) video_path = tmp_file.name - scenes = detect(video_path, ContentDetector(threshold=frame_change_threshold)) + scenes = detect( + video_path=video_path, + detector=ContentDetector(threshold=frame_change_threshold, min_scene_len=min_scene_len) + ) - # Gradually reduce number of key frames with a sliding window + # Gradually reduce number of key frames with a increasingly smaller steps while len(scenes) > num_of_thumbnails: - scenes.pop() - scenes.pop(0) + step = len(scenes) / (num_of_thumbnails - 1) + to_remove_indices = [int(round(i * step)) for i in range(num_of_thumbnails)] + scenes = [scenes[i] for i in range(len(scenes)) if i not in to_remove_indices] for i, scene in enumerate(scenes): scene_start, _ = scene frame_data = create_frame_in_ram(video_path, scene_start.get_timecode()) @@ -39,6 +44,7 @@ def create_key_frames_for_video( return create_key_frames_for_video( video_link=video_link, frame_change_threshold=frame_change_threshold - 2.5, + min_scene_len=min_scene_len - 2 if min_scene_len > 2 else min_scene_len, num_of_thumbnails=num_of_thumbnails ) return frames From 7d17375ba0db399a2def10eabe8e59bf63a676a5 Mon Sep 17 00:00:00 2001 From: Arseny Chebyshev Date: Tue, 25 Jun 2024 02:03:28 +0500 Subject: [PATCH 39/39] (upd) Key frame search, trunc text for features --- inference/.env.dist | 1 + inference/settings.py | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/inference/.env.dist b/inference/.env.dist index 8b47efb..76b4fdc 100644 --- a/inference/.env.dist +++ b/inference/.env.dist @@ -1 +1,2 @@ CLIP_MODEL=laion/CLIP-ViT-g-14-laion2B-s12B-b42K +TRANSLATION_MODEL=Helsinki-NLP/opus-mt-ru-en diff --git a/inference/settings.py b/inference/settings.py index 6242992..55ade0a 100644 --- a/inference/settings.py +++ b/inference/settings.py @@ -5,6 +5,5 @@ class Settings: - clip_model: str = env.str("CLIP_MODEL") - summarization_model: str = env.str("SUMMARIZATION_MODEL") - translation_model: str = env.str("TRANSLATION_MODEL") + clip_model: str = env.str("CLIP_MODEL", default="laion/CLIP-ViT-g-14-laion2B-s12B-b42K") + translation_model: str = env.str("TRANSLATION_MODEL", default="Helsinki-NLP/opus-mt-ru-en")