From 07eb38a466ff1de10ef989932da3c94f712816fb Mon Sep 17 00:00:00 2001 From: michaelfeil Date: Fri, 15 Nov 2024 20:22:54 -0800 Subject: [PATCH 1/6] try merge 1 --- libs/infinity_emb/Docker.template.yaml | 2 + libs/infinity_emb/Dockerfile.cpu_auto | 3 + .../infinity_emb/_optional_imports.py | 1 + .../transformer/embedder/optimum.py | 22 ++- .../infinity_emb/transformer/utils_optimum.py | 186 +++++++++++++----- 5 files changed, 159 insertions(+), 55 deletions(-) diff --git a/libs/infinity_emb/Docker.template.yaml b/libs/infinity_emb/Docker.template.yaml index 9c19c40d..e1e06a92 100644 --- a/libs/infinity_emb/Docker.template.yaml +++ b/libs/infinity_emb/Docker.template.yaml @@ -20,6 +20,8 @@ cpu: extra_env_variables: | # Sets default to onnx ENV INFINITY_ENGINE="optimum" + RUN poetry run python -m pip install --upgrade --upgrade-strategy eager "optimum[openvino]" + amd: # 2 . command: jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s amd > Dockerfile.amd_auto diff --git a/libs/infinity_emb/Dockerfile.cpu_auto b/libs/infinity_emb/Dockerfile.cpu_auto index 97d0d03a..644eb286 100644 --- a/libs/infinity_emb/Dockerfile.cpu_auto +++ b/libs/infinity_emb/Dockerfile.cpu_auto @@ -43,12 +43,14 @@ COPY poetry.lock poetry.toml pyproject.toml README.md /app/ # "RUN poetry install --no-interaction --no-ansi --no-root --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all" COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh RUN ./requirements_install_from_poetry.sh --no-root --without lint,test "https://download.pytorch.org/whl/cpu" +RUN poetry run python -m pip install --upgrade --upgrade-strategy eager "optimum[openvino]" COPY infinity_emb infinity_emb # Install dependency with infinity_emb package # "RUN poetry install --no-interaction --no-ansi --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all" COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh RUN ./requirements_install_from_poetry.sh --without lint,test "https://download.pytorch.org/whl/cpu" +RUN poetry run python -m pip install --upgrade --upgrade-strategy eager "optimum[openvino]" # @@ -58,6 +60,7 @@ FROM builder as testing # "RUN poetry install --no-interaction --no-ansi --extras \"${EXTRAS}\" --with lint,test && poetry cache clear pypi --all" COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh RUN ./requirements_install_from_poetry.sh --with lint,test "https://download.pytorch.org/whl/cpu" +RUN poetry run python -m pip install --upgrade --upgrade-strategy eager "optimum[openvino]" # lint RUN poetry run ruff check . diff --git a/libs/infinity_emb/infinity_emb/_optional_imports.py b/libs/infinity_emb/infinity_emb/_optional_imports.py index 6606a146..ab51a9d7 100644 --- a/libs/infinity_emb/infinity_emb/_optional_imports.py +++ b/libs/infinity_emb/infinity_emb/_optional_imports.py @@ -69,6 +69,7 @@ def _raise_error(self) -> None: "optimum.neuron", "", ) +CHECK_OPTIMUM_INTEL = OptionalImports("optimum.intel", "optimum") CHECK_PIL = OptionalImports("PIL", "vision") CHECK_POSTHOG = OptionalImports("posthog", "server") CHECK_PYDANTIC = OptionalImports("pydantic", "server") diff --git a/libs/infinity_emb/infinity_emb/transformer/embedder/optimum.py b/libs/infinity_emb/infinity_emb/transformer/embedder/optimum.py index e209a742..64c48384 100644 --- a/libs/infinity_emb/infinity_emb/transformer/embedder/optimum.py +++ b/libs/infinity_emb/infinity_emb/transformer/embedder/optimum.py @@ -6,7 +6,11 @@ import numpy as np -from infinity_emb._optional_imports import CHECK_ONNXRUNTIME, CHECK_TRANSFORMERS +from infinity_emb._optional_imports import ( + CHECK_ONNXRUNTIME, + CHECK_TRANSFORMERS, + CHECK_OPTIMUM_INTEL, +) from infinity_emb.args import EngineArgs from infinity_emb.primitives import EmbeddingReturnType, PoolingMethod from infinity_emb.transformer.abstract import BaseEmbedder @@ -14,7 +18,7 @@ from infinity_emb.transformer.utils_optimum import ( cls_token_pooling, device_to_onnx, - get_onnx_files, + # get_onnx_files, mean_pooling, normalize, optimize_model, @@ -25,24 +29,34 @@ from optimum.onnxruntime import ( # type: ignore[import-untyped] ORTModelForFeatureExtraction, ) + from infinity_emb.transformer.utils_optimum import get_onnx_files except (ImportError, RuntimeError, Exception) as ex: CHECK_ONNXRUNTIME.mark_dirty(ex) + +if CHECK_OPTIMUM_INTEL.is_available: + try: + from optimum.intel import OVModelForFeatureExtraction # type: ignore[import-untyped] + from infinity_emb.transformer.utils_optimum import get_openvino_files + + except (ImportError, RuntimeError, Exception) as ex: + CHECK_OPTIMUM_INTEL.mark_dirty(ex) + + if CHECK_TRANSFORMERS.is_available: from transformers import AutoConfig, AutoTokenizer # type: ignore[import-untyped] class OptimumEmbedder(BaseEmbedder): def __init__(self, *, engine_args: EngineArgs): - CHECK_ONNXRUNTIME.mark_required() provider = device_to_onnx(engine_args.device) onnx_file = get_onnx_files( model_name_or_path=engine_args.model_name_or_path, revision=engine_args.revision, use_auth_token=True, - prefer_quantized=("cpu" in provider.lower() or "openvino" in provider.lower()), + prefer_quantized="cpu" in provider.lower(), ) self.pooling = ( diff --git a/libs/infinity_emb/infinity_emb/transformer/utils_optimum.py b/libs/infinity_emb/infinity_emb/transformer/utils_optimum.py index 76176c57..e5f7aae8 100644 --- a/libs/infinity_emb/infinity_emb/transformer/utils_optimum.py +++ b/libs/infinity_emb/infinity_emb/transformer/utils_optimum.py @@ -8,11 +8,16 @@ from huggingface_hub import HfApi, HfFolder # type: ignore from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE # type: ignore -from infinity_emb._optional_imports import CHECK_ONNXRUNTIME, CHECK_OPTIMUM_AMD - +from infinity_emb._optional_imports import ( + CHECK_ONNXRUNTIME, + CHECK_TORCH, + CHECK_OPTIMUM_INTEL, + CHECK_OPTIMUM_AMD, +) from infinity_emb.log_handler import logger from infinity_emb.primitives import Device + if CHECK_ONNXRUNTIME.is_available: try: import onnxruntime as ort # type: ignore @@ -26,6 +31,14 @@ CHECK_ONNXRUNTIME.mark_dirty(ex) +if CHECK_OPTIMUM_INTEL.is_available: + try: + from optimum.intel import OVModelForFeatureExtraction # type: ignore[import-untyped] + + except (ImportError, RuntimeError, Exception) as ex: + CHECK_OPTIMUM_INTEL.mark_dirty(ex) + + def mean_pooling(last_hidden_states: np.ndarray, attention_mask: np.ndarray): input_mask_expanded = (np.expand_dims(attention_mask, axis=-1)).astype(float) @@ -55,6 +68,8 @@ def device_to_onnx(device: Device) -> str: if "OpenVINOExecutionProvider" in available: return "OpenVINOExecutionProvider" return "CPUExecutionProvider" + elif device == Device.openvino: + return "OpenVINOExecutionProvider" elif device == Device.cuda: if "ROCMExecutionProvider" in available: return "ROCMExecutionProvider" @@ -106,7 +121,6 @@ def optimize_model( trust_remote_code (bool, optional): Whether to trust the remote code. Defaults to True. """ - ## If there is no need for optimization if execution_provider == "TensorrtExecutionProvider": return model_class.from_pretrained( model_name_or_path, @@ -125,34 +139,59 @@ def optimize_model( }, ) - elif execution_provider in ["ROCMExecutionProvider", "MIGraphXExecutionProvider"]: - CHECK_OPTIMUM_AMD.mark_required() - return model_class.from_pretrained( - model_name_or_path, - revision=revision, - trust_remote_code=trust_remote_code, - provider=execution_provider, - file_name=file_name, - ) + file_optimized: Union[str, list] = "" - ## path to find if model has been optimized - CHECK_ONNXRUNTIME.mark_required() - path_folder = ( - Path(HUGGINGFACE_HUB_CACHE) / "infinity_onnx" / execution_provider / model_name_or_path - ) - OPTIMIZED_SUFFIX = "_optimized.onnx" - files_optimized = list(path_folder.glob(f"**/*{OPTIMIZED_SUFFIX}")) + extra_args = {} + + logger.info(f"file_name: {file_name}") - logger.info(f"files_optimized: {files_optimized}") - if files_optimized: - file_optimized = files_optimized[-1] + if execution_provider == "OpenVINOExecutionProvider": # Optimum Intel OpenVINO path + CHECK_OPTIMUM_INTEL.mark_required() + path_folder = ( + Path(HUGGINGFACE_HUB_CACHE) + / "infinity_openvino" + / execution_provider + / model_name_or_path + ) + OPTIMIZED_PREFIX = "openvino_model" + files_optimized = sorted(list(path_folder.glob(f"**/{OPTIMIZED_PREFIX}*"))) + if files_optimized: + file_optimized = files_optimized[-1] + if file_name: + file_optimized = file_name + + extra_args = {"ov_config": {"INFERENCE_PRECISION_HINT": "bf16"}} + + elif execution_provider == "CPUExecutionProvider": # Optimum onnx cpu path + CHECK_ONNXRUNTIME.mark_required() + path_folder = ( + Path(HUGGINGFACE_HUB_CACHE) / "infinity_onnx" / execution_provider / model_name_or_path + ) + OPTIMIZED_SUFFIX = "_optimized.onnx" + files_optimized = list(path_folder.glob(f"**/*{OPTIMIZED_SUFFIX}")) + if files_optimized: + file_optimized = files_optimized[0] + else: + raise ValueError( + f"Does not support {execution_provider}." + "Optimum engine only support `OpenVINOExecutionProvider` " + "and `CPUExecutionProvider`." + ) + + if file_optimized: + # print("files_optimized: ", files_optimized) logger.info(f"Optimized model found at {file_optimized}, skipping optimization") return model_class.from_pretrained( - file_optimized.parent.as_posix(), + file_optimized.parent.as_posix() + if not isinstance(file_optimized, str) + else model_name_or_path, revision=revision, trust_remote_code=trust_remote_code, - provider=execution_provider, - file_name=file_optimized.name, + provider=execution_provider, # will be ignored by optimum intel + file_name=file_optimized.name + if not isinstance(file_optimized, str) + else file_optimized, + **extra_args, ) unoptimized_model = model_class.from_pretrained( @@ -166,35 +205,52 @@ def optimize_model( return unoptimized_model try: logger.info("Optimizing model") + if execution_provider == "OpenVINOExecutionProvider": + model = OVModelForFeatureExtraction.from_pretrained( + model_name_or_path, + export=True, + # ov_config={"INFERENCE_PRECISION_HINT": "fp32"} # fp16 for now as it has better precision than bf16 + # ov_config={"INFERENCE_PRECISION_HINT": "fp16"} # fp16 for now as it has better precision than bf16 + ov_config={ + "INFERENCE_PRECISION_HINT": "bf16" + }, # fp16 for now as it has better precision than bf16 + ) + model.save_pretrained(path_folder.as_posix()) # save the model + + elif execution_provider == "CPUExecutionProvider": # Optimum onnx cpu path + optimizer = ORTOptimizer.from_pretrained(unoptimized_model) + + is_gpu = "cpu" not in execution_provider.lower() + optimization_config = OptimizationConfig( + optimization_level=99, + optimize_with_onnxruntime_only=False, + optimize_for_gpu=is_gpu, + fp16=is_gpu, + # enable_gelu_approximation=True, + # enable_gemm_fast_gelu_fusion=True, # might not work + ) - optimizer = ORTOptimizer.from_pretrained(unoptimized_model) - - is_gpu = not ( - "cpu" in execution_provider.lower() or "openvino" in execution_provider.lower() - ) - optimization_config = OptimizationConfig( - optimization_level=99, - optimize_with_onnxruntime_only=False, - optimize_for_gpu=is_gpu, - fp16=is_gpu, - # enable_gelu_approximation=True, - # enable_gemm_fast_gelu_fusion=True, # might not work - ) + optimized_model_path = optimizer.optimize( + optimization_config=optimization_config, + save_dir=path_folder.as_posix(), + # if larger than 2gb use external data format + one_external_file=True, + ) - optimized_model_path = optimizer.optimize( - optimization_config=optimization_config, - save_dir=path_folder.as_posix(), - # if larger than 2gb use external data format - one_external_file=True, - ) + model = model_class.from_pretrained( + optimized_model_path, + revision=revision, + trust_remote_code=trust_remote_code, + provider=execution_provider, + file_name=Path(file_name).name.replace(".onnx", OPTIMIZED_SUFFIX), + ) + else: + raise ValueError( + f"Does not support {execution_provider}." + "Optimum engine only support `OpenVINOExecutionProvider` " + "and `CPUExecutionProvider`." + ) - model = model_class.from_pretrained( - optimized_model_path, - revision=revision, - trust_remote_code=trust_remote_code, - provider=execution_provider, - file_name=Path(file_name).name.replace(".onnx", OPTIMIZED_SUFFIX), - ) except Exception as e: logger.warning(f"Optimization failed with {e}. Going to use the unoptimized model.") model = unoptimized_model @@ -251,3 +307,31 @@ def get_onnx_files( return onnx_files[0] else: raise ValueError(f"No onnx files found for {model_name_or_path} and revision {revision}") + + +def get_openvino_files( + *, + model_name_or_path: str, + revision: Union[str, None] = None, + use_auth_token: Union[bool, str] = True, +) -> Path: + """gets the onnx files from the repo""" + repo_files = _list_all_repo_files( + model_name_or_path=model_name_or_path, + revision=revision, + use_auth_token=use_auth_token, + ) + pattern = "**openvino_model.*" + openvino_files = [p for p in repo_files if p.match(pattern)] + + if len(openvino_files) > 1: + logger.info(f"Found {len(openvino_files)} onnx files: {openvino_files}") + openvino_file = openvino_files[-1] + logger.info(f"Using {openvino_file} as the model") + return openvino_file + elif len(openvino_files) == 1: + return openvino_files[0] + else: + raise ValueError( + f"No openvino files found for {model_name_or_path} and revision {revision}" + ) From 91948cdac68cf2ba52d3f7dc9c2c6eb7eebdd30c Mon Sep 17 00:00:00 2001 From: michaelfeil Date: Fri, 15 Nov 2024 20:29:47 -0800 Subject: [PATCH 2/6] merge openvino --- .../transformer/embedder/optimum.py | 64 +++++++++++++------ .../infinity_emb/transformer/utils_optimum.py | 19 +++--- 2 files changed, 55 insertions(+), 28 deletions(-) diff --git a/libs/infinity_emb/infinity_emb/transformer/embedder/optimum.py b/libs/infinity_emb/infinity_emb/transformer/embedder/optimum.py index 64c48384..32ac5c08 100644 --- a/libs/infinity_emb/infinity_emb/transformer/embedder/optimum.py +++ b/libs/infinity_emb/infinity_emb/transformer/embedder/optimum.py @@ -52,30 +52,56 @@ class OptimumEmbedder(BaseEmbedder): def __init__(self, *, engine_args: EngineArgs): provider = device_to_onnx(engine_args.device) - onnx_file = get_onnx_files( - model_name_or_path=engine_args.model_name_or_path, - revision=engine_args.revision, - use_auth_token=True, - prefer_quantized="cpu" in provider.lower(), - ) + if provider == "OpenVINOExecutionProvider": + CHECK_OPTIMUM_INTEL.mark_required() + filename = "" + try: + openvino_file = get_openvino_files( + model_name_or_path=engine_args.model_name_or_path, + revision=engine_args.revision, + use_auth_token=True, + ) + filename = openvino_file.as_posix() + except Exception as e: # show error then let the optimum intel compress on the fly + print(str(e)) + + self.model = optimize_model( + model_name_or_path=engine_args.model_name_or_path, + revision=engine_args.revision, + trust_remote_code=engine_args.trust_remote_code, + execution_provider=provider, + file_name=filename, + optimize_model=not os.environ.get( + "INFINITY_ONNX_DISABLE_OPTIMIZE", False + ), # TODO: make this env variable public + model_class=OVModelForFeatureExtraction, + ) + + else: + CHECK_ONNXRUNTIME.mark_required() + onnx_file = get_onnx_files( + model_name_or_path=engine_args.model_name_or_path, + revision=engine_args.revision, + use_auth_token=True, + prefer_quantized="cpu" in provider.lower(), + ) + self.model = optimize_model( + model_name_or_path=engine_args.model_name_or_path, + revision=engine_args.revision, + trust_remote_code=engine_args.trust_remote_code, + execution_provider=provider, + file_name=onnx_file.as_posix(), + optimize_model=not os.environ.get( + "INFINITY_ONNX_DISABLE_OPTIMIZE", False + ), # TODO: make this env variable public + model_class=ORTModelForFeatureExtraction, + ) + self.model.use_io_binding = False self.pooling = ( mean_pooling if engine_args.pooling_method == PoolingMethod.mean else cls_token_pooling ) - self.model = optimize_model( - model_name_or_path=engine_args.model_name_or_path, - revision=engine_args.revision, - trust_remote_code=engine_args.trust_remote_code, - execution_provider=provider, - file_name=onnx_file.as_posix(), - optimize_model=not os.environ.get( - "INFINITY_ONNX_DISABLE_OPTIMIZE", False - ), # TODO: make this env variable public - model_class=ORTModelForFeatureExtraction, - ) - self.model.use_io_binding = False - self.tokenizer = AutoTokenizer.from_pretrained( engine_args.model_name_or_path, revision=engine_args.revision, diff --git a/libs/infinity_emb/infinity_emb/transformer/utils_optimum.py b/libs/infinity_emb/infinity_emb/transformer/utils_optimum.py index e5f7aae8..77c5a8ae 100644 --- a/libs/infinity_emb/infinity_emb/transformer/utils_optimum.py +++ b/libs/infinity_emb/infinity_emb/transformer/utils_optimum.py @@ -216,10 +216,18 @@ def optimize_model( }, # fp16 for now as it has better precision than bf16 ) model.save_pretrained(path_folder.as_posix()) # save the model - - elif execution_provider == "CPUExecutionProvider": # Optimum onnx cpu path + else: # Optimum onnx cpu path optimizer = ORTOptimizer.from_pretrained(unoptimized_model) + is_gpu = "cpu" not in execution_provider.lower() + optimization_config = OptimizationConfig( + optimization_level=99, + optimize_with_onnxruntime_only=False, + optimize_for_gpu=is_gpu, + fp16=is_gpu, + # enable_gelu_approximation=True, + # enable_gemm_fast_gelu_fusion=True, # might not work + ) is_gpu = "cpu" not in execution_provider.lower() optimization_config = OptimizationConfig( optimization_level=99, @@ -244,13 +252,6 @@ def optimize_model( provider=execution_provider, file_name=Path(file_name).name.replace(".onnx", OPTIMIZED_SUFFIX), ) - else: - raise ValueError( - f"Does not support {execution_provider}." - "Optimum engine only support `OpenVINOExecutionProvider` " - "and `CPUExecutionProvider`." - ) - except Exception as e: logger.warning(f"Optimization failed with {e}. Going to use the unoptimized model.") model = unoptimized_model From d588ffc803ad88e31c7791c674497854d70b34fb Mon Sep 17 00:00:00 2001 From: michaelfeil Date: Fri, 15 Nov 2024 20:33:03 -0800 Subject: [PATCH 3/6] update template --- libs/infinity_emb/Docker.template.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libs/infinity_emb/Docker.template.yaml b/libs/infinity_emb/Docker.template.yaml index e1e06a92..5b8a04ee 100644 --- a/libs/infinity_emb/Docker.template.yaml +++ b/libs/infinity_emb/Docker.template.yaml @@ -17,11 +17,10 @@ cpu: # "RUN poetry install --no-interaction --no-ansi --no-root --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all" COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh RUN ./requirements_install_from_poetry.sh --no-root --without lint,test "https://download.pytorch.org/whl/cpu" + RUN poetry run python -m pip install --upgrade --upgrade-strategy eager "optimum[openvino]" extra_env_variables: | # Sets default to onnx ENV INFINITY_ENGINE="optimum" - RUN poetry run python -m pip install --upgrade --upgrade-strategy eager "optimum[openvino]" - amd: # 2 . command: jinja2 Dockerfile.jinja2 Docker.template.yaml --format=yaml -s amd > Dockerfile.amd_auto From 4718e69f9ec718669097726bd31c01ccf8a82f41 Mon Sep 17 00:00:00 2001 From: michaelfeil Date: Fri, 15 Nov 2024 20:38:35 -0800 Subject: [PATCH 4/6] add optimum --- .../infinity_emb/transformer/utils_optimum.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/libs/infinity_emb/infinity_emb/transformer/utils_optimum.py b/libs/infinity_emb/infinity_emb/transformer/utils_optimum.py index 77c5a8ae..29aeec1b 100644 --- a/libs/infinity_emb/infinity_emb/transformer/utils_optimum.py +++ b/libs/infinity_emb/infinity_emb/transformer/utils_optimum.py @@ -8,12 +8,7 @@ from huggingface_hub import HfApi, HfFolder # type: ignore from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE # type: ignore -from infinity_emb._optional_imports import ( - CHECK_ONNXRUNTIME, - CHECK_TORCH, - CHECK_OPTIMUM_INTEL, - CHECK_OPTIMUM_AMD, -) +from infinity_emb._optional_imports import CHECK_ONNXRUNTIME, CHECK_OPTIMUM_INTEL from infinity_emb.log_handler import logger from infinity_emb.primitives import Device @@ -68,8 +63,6 @@ def device_to_onnx(device: Device) -> str: if "OpenVINOExecutionProvider" in available: return "OpenVINOExecutionProvider" return "CPUExecutionProvider" - elif device == Device.openvino: - return "OpenVINOExecutionProvider" elif device == Device.cuda: if "ROCMExecutionProvider" in available: return "ROCMExecutionProvider" @@ -139,8 +132,6 @@ def optimize_model( }, ) - file_optimized: Union[str, list] = "" - extra_args = {} logger.info(f"file_name: {file_name}") @@ -157,8 +148,10 @@ def optimize_model( files_optimized = sorted(list(path_folder.glob(f"**/{OPTIMIZED_PREFIX}*"))) if files_optimized: file_optimized = files_optimized[-1] - if file_name: - file_optimized = file_name + elif file_name: + file_optimized = Path(file_name) + else: + file_optimized = None extra_args = {"ov_config": {"INFERENCE_PRECISION_HINT": "bf16"}} @@ -171,6 +164,8 @@ def optimize_model( files_optimized = list(path_folder.glob(f"**/*{OPTIMIZED_SUFFIX}")) if files_optimized: file_optimized = files_optimized[0] + else: + file_optimized = None else: raise ValueError( f"Does not support {execution_provider}." From 5ae8c435be1439464b90938ce8682f255d6364c4 Mon Sep 17 00:00:00 2001 From: michaelfeil Date: Fri, 15 Nov 2024 22:20:25 -0800 Subject: [PATCH 5/6] latest push --- libs/infinity_emb/Docker.template.yaml | 4 +-- libs/infinity_emb/Dockerfile.cpu_auto | 8 +++--- libs/infinity_emb/Makefile | 3 +++ libs/infinity_emb/infinity_emb/primitives.py | 1 + .../transformer/embedder/optimum.py | 1 - .../infinity_emb/transformer/utils_optimum.py | 27 +++++++------------ libs/infinity_emb/pyproject.toml | 8 +++--- 7 files changed, 23 insertions(+), 29 deletions(-) diff --git a/libs/infinity_emb/Docker.template.yaml b/libs/infinity_emb/Docker.template.yaml index 5b8a04ee..e3fc5cd4 100644 --- a/libs/infinity_emb/Docker.template.yaml +++ b/libs/infinity_emb/Docker.template.yaml @@ -12,12 +12,12 @@ cpu: # RUN sed -i 's|torch = "2.4.1"|torch = "2.5.0"|' pyproject.toml # RUN sed -i 's|"pypi"|"pytorch_cpu"|' pyproject.toml # RUN poetry lock --no-update - poetry_extras: "all openvino" + poetry_extras: "all" main_install: | # "RUN poetry install --no-interaction --no-ansi --no-root --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all" COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh RUN ./requirements_install_from_poetry.sh --no-root --without lint,test "https://download.pytorch.org/whl/cpu" - RUN poetry run python -m pip install --upgrade --upgrade-strategy eager "optimum[openvino]" + RUN poetry run python -m pip install onnxruntime-openvino optimum-intel extra_env_variables: | # Sets default to onnx ENV INFINITY_ENGINE="optimum" diff --git a/libs/infinity_emb/Dockerfile.cpu_auto b/libs/infinity_emb/Dockerfile.cpu_auto index 644eb286..ea0093a2 100644 --- a/libs/infinity_emb/Dockerfile.cpu_auto +++ b/libs/infinity_emb/Dockerfile.cpu_auto @@ -17,7 +17,7 @@ ENV PYTHONUNBUFFERED=1 \ POETRY_VIRTUALENVS_IN_PROJECT="true" \ # do not ask any interactive question POETRY_NO_INTERACTION=1 \ - EXTRAS="all openvino" \ + EXTRAS="all" \ PYTHON="python3.11" RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl # Sets default to onnx @@ -43,14 +43,14 @@ COPY poetry.lock poetry.toml pyproject.toml README.md /app/ # "RUN poetry install --no-interaction --no-ansi --no-root --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all" COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh RUN ./requirements_install_from_poetry.sh --no-root --without lint,test "https://download.pytorch.org/whl/cpu" -RUN poetry run python -m pip install --upgrade --upgrade-strategy eager "optimum[openvino]" +RUN poetry run python -m pip install onnxruntime-openvino optimum-intel COPY infinity_emb infinity_emb # Install dependency with infinity_emb package # "RUN poetry install --no-interaction --no-ansi --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all" COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh RUN ./requirements_install_from_poetry.sh --without lint,test "https://download.pytorch.org/whl/cpu" -RUN poetry run python -m pip install --upgrade --upgrade-strategy eager "optimum[openvino]" +RUN poetry run python -m pip install onnxruntime-openvino optimum-intel # @@ -60,7 +60,7 @@ FROM builder as testing # "RUN poetry install --no-interaction --no-ansi --extras \"${EXTRAS}\" --with lint,test && poetry cache clear pypi --all" COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh RUN ./requirements_install_from_poetry.sh --with lint,test "https://download.pytorch.org/whl/cpu" -RUN poetry run python -m pip install --upgrade --upgrade-strategy eager "optimum[openvino]" +RUN poetry run python -m pip install onnxruntime-openvino optimum-intel # lint RUN poetry run ruff check . diff --git a/libs/infinity_emb/Makefile b/libs/infinity_emb/Makefile index a91c6be4..f1a0b6e7 100644 --- a/libs/infinity_emb/Makefile +++ b/libs/infinity_emb/Makefile @@ -54,6 +54,9 @@ build-amd: build-trt: docker buildx build -t michaelf34/infinity:$(VERSION)-trt-onnx -f Dockerfile.trt_onnx_auto --push . +build-cpu: + docker buildx build -t michaelf34/infinity:$(VERSION)-cpu -f Dockerfile.cpu_auto --push . + # Combined target to build both build-all-docker: docker buildx build -t michaelf34/infinity:$(VERSION)-amd -f Dockerfile.amd_auto --push . & \ diff --git a/libs/infinity_emb/infinity_emb/primitives.py b/libs/infinity_emb/infinity_emb/primitives.py index 7ff8d404..c3b2b001 100644 --- a/libs/infinity_emb/infinity_emb/primitives.py +++ b/libs/infinity_emb/infinity_emb/primitives.py @@ -106,6 +106,7 @@ def default_value(): class Device(EnumType): cpu = "cpu" + openvino = "openvino" cuda = "cuda" mps = "mps" tensorrt = "tensorrt" diff --git a/libs/infinity_emb/infinity_emb/transformer/embedder/optimum.py b/libs/infinity_emb/infinity_emb/transformer/embedder/optimum.py index 32ac5c08..50bd7320 100644 --- a/libs/infinity_emb/infinity_emb/transformer/embedder/optimum.py +++ b/libs/infinity_emb/infinity_emb/transformer/embedder/optimum.py @@ -18,7 +18,6 @@ from infinity_emb.transformer.utils_optimum import ( cls_token_pooling, device_to_onnx, - # get_onnx_files, mean_pooling, normalize, optimize_model, diff --git a/libs/infinity_emb/infinity_emb/transformer/utils_optimum.py b/libs/infinity_emb/infinity_emb/transformer/utils_optimum.py index 29aeec1b..9cb6512a 100644 --- a/libs/infinity_emb/infinity_emb/transformer/utils_optimum.py +++ b/libs/infinity_emb/infinity_emb/transformer/utils_optimum.py @@ -29,7 +29,7 @@ if CHECK_OPTIMUM_INTEL.is_available: try: from optimum.intel import OVModelForFeatureExtraction # type: ignore[import-untyped] - + # from optimum.intel import OVWeightQuantizationConfig, OVDynamicQuantizationConfig # type: ignore except (ImportError, RuntimeError, Exception) as ex: CHECK_OPTIMUM_INTEL.mark_dirty(ex) @@ -60,9 +60,9 @@ def device_to_onnx(device: Device) -> str: available = ort.get_available_providers() if device == Device.cpu: - if "OpenVINOExecutionProvider" in available: - return "OpenVINOExecutionProvider" return "CPUExecutionProvider" + elif device == Device.openvino: + return "OpenVINOExecutionProvider" elif device == Device.cuda: if "ROCMExecutionProvider" in available: return "ROCMExecutionProvider" @@ -153,9 +153,8 @@ def optimize_model( else: file_optimized = None - extra_args = {"ov_config": {"INFERENCE_PRECISION_HINT": "bf16"}} - - elif execution_provider == "CPUExecutionProvider": # Optimum onnx cpu path + extra_args = {"ov_config": {"INFERENCE_PRECISION_HINT": "bf16", "PERFORMANCE_HINT": "THROUGHPUT"}} + else: # Optimum onnx cpu path CHECK_ONNXRUNTIME.mark_required() path_folder = ( Path(HUGGINGFACE_HUB_CACHE) / "infinity_onnx" / execution_provider / model_name_or_path @@ -166,12 +165,6 @@ def optimize_model( file_optimized = files_optimized[0] else: file_optimized = None - else: - raise ValueError( - f"Does not support {execution_provider}." - "Optimum engine only support `OpenVINOExecutionProvider` " - "and `CPUExecutionProvider`." - ) if file_optimized: # print("files_optimized: ", files_optimized) @@ -199,16 +192,14 @@ def optimize_model( if not optimize_model or execution_provider == "TensorrtExecutionProvider": return unoptimized_model try: - logger.info("Optimizing model") + logger.info(f"Optimizing model for {execution_provider}") if execution_provider == "OpenVINOExecutionProvider": model = OVModelForFeatureExtraction.from_pretrained( model_name_or_path, export=True, - # ov_config={"INFERENCE_PRECISION_HINT": "fp32"} # fp16 for now as it has better precision than bf16 - # ov_config={"INFERENCE_PRECISION_HINT": "fp16"} # fp16 for now as it has better precision than bf16 - ov_config={ - "INFERENCE_PRECISION_HINT": "bf16" - }, # fp16 for now as it has better precision than bf16 + # quantization_config = OVDynamicQuantizationConfig(bits=8), + # ov_config={"INFERENCE_PRECISION_HINT": "f32", "PERFORMANCE_HINT": "THROUGHPUT"} # fp16 for now as it has better precision than bf16 + ov_config={"INFERENCE_PRECISION_HINT": "bf16", "PERFORMANCE_HINT": "THROUGHPUT"}, # fp16 for now as it has better precision than bf16 ) model.save_pretrained(path_folder.as_posix()) # save the model else: # Optimum onnx cpu path diff --git a/libs/infinity_emb/pyproject.toml b/libs/infinity_emb/pyproject.toml index 017c8502..ff90467a 100644 --- a/libs/infinity_emb/pyproject.toml +++ b/libs/infinity_emb/pyproject.toml @@ -41,9 +41,9 @@ timm = {version = "*", optional=true} colpali-engine = {version="^0.3.1", optional=true} # openvino # optimum-intel = {version=">=1.20.0", optional=true, extras=["openvino"]} -onnxruntime-openvino = {version=">=1.19.0", optional=true} -openvino = {version="2024.4.0", optional=true} -openvino-tokenizers = {version="2024.4.0.0", optional=true} +# onnxruntime-openvino = {version=">=1.19.0", optional=true} +# openvino = {version="2024.4.0", optional=true} +# openvino-tokenizers = {version="2024.4.0.0", optional=true} # pin torchvision to a specific source, but default to pypi. use sed to overwrite. @@ -106,7 +106,7 @@ einops=["einops"] logging=["rich"] cache=["diskcache"] vision=["colpali-engine","pillow","timm","torchvision"] -openvino=["onnxruntime-openvino","openvino","openvino-tokenizers"] +# openvino=["onnxruntime-openvino","openvino","openvino-tokenizers"] audio=["soundfile"] server=[ "fastapi", From 5961be9d9aaaf89902dbc7b39b82293a5fd0b38d Mon Sep 17 00:00:00 2001 From: michaelfeil Date: Fri, 15 Nov 2024 22:25:25 -0800 Subject: [PATCH 6/6] fmt --- .../infinity_emb/transformer/utils_optimum.py | 9 +- libs/infinity_emb/poetry.lock | 107 +----------------- 2 files changed, 9 insertions(+), 107 deletions(-) diff --git a/libs/infinity_emb/infinity_emb/transformer/utils_optimum.py b/libs/infinity_emb/infinity_emb/transformer/utils_optimum.py index 9cb6512a..4a35cbdf 100644 --- a/libs/infinity_emb/infinity_emb/transformer/utils_optimum.py +++ b/libs/infinity_emb/infinity_emb/transformer/utils_optimum.py @@ -153,7 +153,9 @@ def optimize_model( else: file_optimized = None - extra_args = {"ov_config": {"INFERENCE_PRECISION_HINT": "bf16", "PERFORMANCE_HINT": "THROUGHPUT"}} + extra_args = { + "ov_config": {"INFERENCE_PRECISION_HINT": "bf16", "PERFORMANCE_HINT": "THROUGHPUT"} + } else: # Optimum onnx cpu path CHECK_ONNXRUNTIME.mark_required() path_folder = ( @@ -199,7 +201,10 @@ def optimize_model( export=True, # quantization_config = OVDynamicQuantizationConfig(bits=8), # ov_config={"INFERENCE_PRECISION_HINT": "f32", "PERFORMANCE_HINT": "THROUGHPUT"} # fp16 for now as it has better precision than bf16 - ov_config={"INFERENCE_PRECISION_HINT": "bf16", "PERFORMANCE_HINT": "THROUGHPUT"}, # fp16 for now as it has better precision than bf16 + ov_config={ + "INFERENCE_PRECISION_HINT": "bf16", + "PERFORMANCE_HINT": "THROUGHPUT", + }, # fp16 for now as it has better precision than bf16 ) model.save_pretrained(path_folder.as_posix()) # save the model else: # Optimum onnx cpu path diff --git a/libs/infinity_emb/poetry.lock b/libs/infinity_emb/poetry.lock index 89c34b56..a6e24830 100644 --- a/libs/infinity_emb/poetry.lock +++ b/libs/infinity_emb/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "accelerate" @@ -2367,28 +2367,6 @@ packaging = "*" protobuf = "*" sympy = "*" -[[package]] -name = "onnxruntime-openvino" -version = "1.19.0" -description = "ONNX Runtime is a runtime accelerator for Machine Learning models" -optional = true -python-versions = "*" -files = [ - {file = "onnxruntime_openvino-1.19.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:8c5658da819b26d9f35f95204e1bdfb74a100a7533e74edab3af6316c1e316e8"}, - {file = "onnxruntime_openvino-1.19.0-cp310-cp310-win_amd64.whl", hash = "sha256:fb8de2a60cf78db6e201b0a489479995d166938e9c53b01ff342dc7f5f8251ff"}, - {file = "onnxruntime_openvino-1.19.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:f3a0b954026286421b3a769c746c403e8f141f3887d1dd601beb7c4dbf77488a"}, - {file = "onnxruntime_openvino-1.19.0-cp311-cp311-win_amd64.whl", hash = "sha256:12330922ecdb694ea28dbdcf08c172e47a5a84fee603040691341336ee3e42bc"}, - {file = "onnxruntime_openvino-1.19.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:be00502b1a46ba1891cbe49049033745f71c0b99df6d24b979f5b4084b9567d0"}, -] - -[package.dependencies] -coloredlogs = "*" -flatbuffers = "*" -numpy = ">=1.21.6" -packaging = "*" -protobuf = "*" -sympy = "*" - [[package]] name = "openai" version = "1.52.0" @@ -2413,81 +2391,6 @@ typing-extensions = ">=4.11,<5" [package.extras] datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] -[[package]] -name = "openvino" -version = "2024.4.0" -description = "OpenVINO(TM) Runtime" -optional = true -python-versions = "*" -files = [ - {file = "openvino-2024.4.0-16579-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:8f19d4200ea04ab315a02f8279268851362f434beaa1a70b4f35d2eea1efa402"}, - {file = "openvino-2024.4.0-16579-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4a1da4c8db12559bf2760c8a7c8455e0b4373a20364eaee2c9832a6bb23c88a9"}, - {file = "openvino-2024.4.0-16579-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:98325dec5ca8bd79f19ea10fd45ad4631a33d9ab50e30659a91a6239ae02d8f4"}, - {file = "openvino-2024.4.0-16579-cp310-cp310-manylinux_2_31_aarch64.whl", hash = "sha256:61f68366017262603be0d876e2e9b7015789ee6b319da8f1792da28b733193f8"}, - {file = "openvino-2024.4.0-16579-cp310-cp310-win_amd64.whl", hash = "sha256:a5499d6daa91c358803441561b8792231dd964c5432e838df653c1e5df8de945"}, - {file = "openvino-2024.4.0-16579-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:e333a5f8155ae357f74e54b664d52d85fa4036a5ccea5da49a7df7f78826c1ce"}, - {file = "openvino-2024.4.0-16579-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b59fb073da74c7ab6d89f2559e3024044f340750b3e519e25975426beb154942"}, - {file = "openvino-2024.4.0-16579-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:4bd3e21a70eff12166bae3b5ea824787b8c766f975f39e3f2d93729b47b74cb6"}, - {file = "openvino-2024.4.0-16579-cp311-cp311-manylinux_2_31_aarch64.whl", hash = "sha256:49b578c4d7325e4a519eb66ee5655871a2b7cd5be9d2de0d5109df23301d10a9"}, - {file = "openvino-2024.4.0-16579-cp311-cp311-win_amd64.whl", hash = "sha256:ab42204c185a4f0df5600a0adb4a4a0c97cebdf630696f94f9d06732714385bc"}, - {file = "openvino-2024.4.0-16579-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:d67d30f830fa3147eb37f31f53c9eaee424a4e93f33eed00d8288f304ef0250a"}, - {file = "openvino-2024.4.0-16579-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:50e05d59a90f7950c205d95bb1559e9a8a7d655fe843449d3d426c579fe665f1"}, - {file = "openvino-2024.4.0-16579-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:d4cf229fb240b8af44d14686b5bdd94f9eabc70120b9808fc804252fac9ef615"}, - {file = "openvino-2024.4.0-16579-cp312-cp312-manylinux_2_31_aarch64.whl", hash = "sha256:5c8ceeb537019280f69dbe86049c3136e648e94fa9f3da9ef0433975e479ad09"}, - {file = "openvino-2024.4.0-16579-cp312-cp312-win_amd64.whl", hash = "sha256:83af7df6f9b7e2a96dfc5d63a774e6ca3f87d64c7372d14f7ae339387474fc5c"}, - {file = "openvino-2024.4.0-16579-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:b23fd5114bf42f04773f293b16965a541d58e46e6847053f1417cd6e47acddf5"}, - {file = "openvino-2024.4.0-16579-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:fa6e9fffd31c095e052f6cecb9ac3ff95e0c122418b81b9926b7687465475742"}, - {file = "openvino-2024.4.0-16579-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:24b28379bd8d43963829b0b4df957d659269fa647f4f842bd0b3d2f8db76782b"}, - {file = "openvino-2024.4.0-16579-cp38-cp38-manylinux_2_31_aarch64.whl", hash = "sha256:4ed049ab7a2ffb624690e6cf38366383630cd58736320953cc62c78e8b31eae5"}, - {file = "openvino-2024.4.0-16579-cp38-cp38-win_amd64.whl", hash = "sha256:0cccaa53a61629b44408fe0c7537db637be913697b0f3c54c78756e95dfc4498"}, - {file = "openvino-2024.4.0-16579-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:3b0834492ff5bc129debb506a705d26b640bca99a10e641af8f710bd081c9af0"}, - {file = "openvino-2024.4.0-16579-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0e01c22a9989470ebcbb7b05fd07e4297bf6c5ecdca202b05f5dc9d2b3186f39"}, - {file = "openvino-2024.4.0-16579-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:fb4781dd9691dc4cfbc6b69f56f724286699e309d4ddc7894fed3ee77b683e2f"}, - {file = "openvino-2024.4.0-16579-cp39-cp39-manylinux_2_31_aarch64.whl", hash = "sha256:74094f8ad81c2ae8500d3361ac087455316a6583016f693d7e1dd1500217ceec"}, - {file = "openvino-2024.4.0-16579-cp39-cp39-win_amd64.whl", hash = "sha256:be834d95405fe3724f104b54f3836e7053646d952c4f8a5dad2267665f55d88f"}, -] - -[package.dependencies] -numpy = ">=1.16.6,<2.1.0" -openvino-telemetry = ">=2023.2.1" -packaging = "*" - -[[package]] -name = "openvino-telemetry" -version = "2024.1.0" -description = "OpenVINO™ Telemetry package for sending statistics with user's consent, used in combination with other OpenVINO™ packages." -optional = true -python-versions = "*" -files = [ - {file = "openvino-telemetry-2024.1.0.tar.gz", hash = "sha256:6df9a8f499e75d893d0bece3c272e798109f0bd40d1eb2488adca6a0da1d9b9f"}, - {file = "openvino_telemetry-2024.1.0-py3-none-any.whl", hash = "sha256:73ec3402a1bfb9050ee8885c7618dcb91456d6489dfa84cda7eed6cfa16a6c88"}, -] - -[[package]] -name = "openvino-tokenizers" -version = "2024.4.0.0" -description = "Convert tokenizers into OpenVINO models" -optional = true -python-versions = ">=3.8" -files = [ - {file = "openvino_tokenizers-2024.4.0.0-py3-none-macosx_10_15_x86_64.whl", hash = "sha256:0b9fe78b01e796e9124a04d21cf3074c98cf1a719136f7b14fbab933ac5b85dd"}, - {file = "openvino_tokenizers-2024.4.0.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a9649be7949be9c72fea02577ea61f320173f43d6ba47d735d1dc9c1901e97c8"}, - {file = "openvino_tokenizers-2024.4.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:0ec426329d951c2e13c843da42a0d01926b2f40f7f2844e51207dbb5073f1662"}, - {file = "openvino_tokenizers-2024.4.0.0-py3-none-manylinux_2_31_aarch64.whl", hash = "sha256:e43c9ef643ebccf713d551186efb68edc394834a19f894822e0366413079bfc2"}, - {file = "openvino_tokenizers-2024.4.0.0-py3-none-win_amd64.whl", hash = "sha256:ee9f99f6e230a364f0708d0625273295e4f60dee54d915f8dc6d015bc0921015"}, -] - -[package.dependencies] -openvino = "==2024.4.*" - -[package.extras] -all = ["openvino_tokenizers[dev,transformers]"] -benchmark = ["openvino_tokenizers[transformers]", "pandas", "seaborn", "tqdm"] -dev = ["bandit", "openvino_tokenizers[torch,transformers]", "pandas", "pytest", "pytest_harvest", "ruff"] -fuzzing = ["atheris", "openvino_tokenizers[transformers]"] -torch = ["torch"] -transformers = ["tiktoken", "transformers[sentencepiece] (>=4.36.0)"] - [[package]] name = "optimum" version = "1.23.3" @@ -4527,11 +4430,6 @@ files = [ {file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"}, {file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"}, {file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"}, - {file = "triton-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b052da883351fdf6be3d93cedae6db3b8e3988d3b09ed221bccecfa9612230"}, - {file = "triton-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd34f19a8582af96e6291d4afce25dac08cb2a5d218c599163761e8e0827208e"}, - {file = "triton-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5e10de8c011adeb7c878c6ce0dd6073b14367749e34467f1cff2bde1b78253"}, - {file = "triton-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8903767951bf86ec960b4fe4e21bc970055afc65e9d57e916d79ae3c93665e3"}, - {file = "triton-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41004fb1ae9a53fcb3e970745feb87f0e3c94c6ce1ba86e95fa3b8537894bef7"}, ] [package.dependencies] @@ -5300,7 +5198,6 @@ ct2 = ["ctranslate2", "sentence-transformers", "torch", "transformers"] einops = ["einops"] logging = ["rich"] onnxruntime-gpu = ["onnxruntime-gpu"] -openvino = ["onnxruntime-openvino", "openvino", "openvino-tokenizers"] optimum = ["optimum"] server = ["fastapi", "orjson", "posthog", "prometheus-fastapi-instrumentator", "pydantic", "rich", "typer", "uvicorn"] tensorrt = ["tensorrt"] @@ -5310,4 +5207,4 @@ vision = ["colpali-engine", "pillow", "timm", "torchvision"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<4" -content-hash = "23feae6cd9a95ff4a6ed50da692d28ba9b514d3067adc9bcc8e4860a70a13942" +content-hash = "7d3ee9f0dbde4965c672639f86264c05689b8753c266e23400e5fd72b78dfed2"