Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion libs/infinity_emb/Docker.template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@ cpu:
# RUN sed -i 's|torch = "2.4.1"|torch = "2.5.0"|' pyproject.toml
# RUN sed -i 's|"pypi"|"pytorch_cpu"|' pyproject.toml
# RUN poetry lock --no-update
poetry_extras: "all openvino"
poetry_extras: "all"
main_install: |
# "RUN poetry install --no-interaction --no-ansi --no-root --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all"
COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh
RUN ./requirements_install_from_poetry.sh --no-root --without lint,test "https://download.pytorch.org/whl/cpu"
RUN poetry run python -m pip install onnxruntime-openvino optimum-intel
extra_env_variables: |
# Sets default to onnx
ENV INFINITY_ENGINE="optimum"
Expand Down
5 changes: 4 additions & 1 deletion libs/infinity_emb/Dockerfile.cpu_auto
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ ENV PYTHONUNBUFFERED=1 \
POETRY_VIRTUALENVS_IN_PROJECT="true" \
# do not ask any interactive question
POETRY_NO_INTERACTION=1 \
EXTRAS="all openvino" \
EXTRAS="all" \
PYTHON="python3.11"
RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
# Sets default to onnx
Expand All @@ -43,12 +43,14 @@ COPY poetry.lock poetry.toml pyproject.toml README.md /app/
# "RUN poetry install --no-interaction --no-ansi --no-root --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all"
COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh
RUN ./requirements_install_from_poetry.sh --no-root --without lint,test "https://download.pytorch.org/whl/cpu"
RUN poetry run python -m pip install onnxruntime-openvino optimum-intel

COPY infinity_emb infinity_emb
# Install dependency with infinity_emb package
# "RUN poetry install --no-interaction --no-ansi --extras \"${EXTRAS}\" --without lint,test && poetry cache clear pypi --all"
COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh
RUN ./requirements_install_from_poetry.sh --without lint,test "https://download.pytorch.org/whl/cpu"
RUN poetry run python -m pip install onnxruntime-openvino optimum-intel

#

Expand All @@ -58,6 +60,7 @@ FROM builder as testing
# "RUN poetry install --no-interaction --no-ansi --extras \"${EXTRAS}\" --with lint,test && poetry cache clear pypi --all"
COPY requirements_install_from_poetry.sh requirements_install_from_poetry.sh
RUN ./requirements_install_from_poetry.sh --with lint,test "https://download.pytorch.org/whl/cpu"
RUN poetry run python -m pip install onnxruntime-openvino optimum-intel

# lint
RUN poetry run ruff check .
Expand Down
3 changes: 3 additions & 0 deletions libs/infinity_emb/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ build-amd:
build-trt:
docker buildx build -t michaelf34/infinity:$(VERSION)-trt-onnx -f Dockerfile.trt_onnx_auto --push .

build-cpu:
docker buildx build -t michaelf34/infinity:$(VERSION)-cpu -f Dockerfile.cpu_auto --push .

# Combined target to build both
build-all-docker:
docker buildx build -t michaelf34/infinity:$(VERSION)-amd -f Dockerfile.amd_auto --push . & \
Expand Down
1 change: 1 addition & 0 deletions libs/infinity_emb/infinity_emb/_optional_imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def _raise_error(self) -> None:
"optimum.neuron",
"<neuronx not available as extra, only runs on AMI image, no pip install possible.>",
)
CHECK_OPTIMUM_INTEL = OptionalImports("optimum.intel", "optimum")
CHECK_PIL = OptionalImports("PIL", "vision")
CHECK_POSTHOG = OptionalImports("posthog", "server")
CHECK_PYDANTIC = OptionalImports("pydantic", "server")
Expand Down
1 change: 1 addition & 0 deletions libs/infinity_emb/infinity_emb/primitives.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def default_value():

class Device(EnumType):
cpu = "cpu"
openvino = "openvino"
cuda = "cuda"
mps = "mps"
tensorrt = "tensorrt"
Expand Down
83 changes: 61 additions & 22 deletions libs/infinity_emb/infinity_emb/transformer/embedder/optimum.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,18 @@

import numpy as np

from infinity_emb._optional_imports import CHECK_ONNXRUNTIME, CHECK_TRANSFORMERS
from infinity_emb._optional_imports import (
CHECK_ONNXRUNTIME,
CHECK_TRANSFORMERS,
CHECK_OPTIMUM_INTEL,
)
from infinity_emb.args import EngineArgs
from infinity_emb.primitives import EmbeddingReturnType, PoolingMethod
from infinity_emb.transformer.abstract import BaseEmbedder
from infinity_emb.transformer.quantization.interface import quant_embedding_decorator
from infinity_emb.transformer.utils_optimum import (
cls_token_pooling,
device_to_onnx,
get_onnx_files,
mean_pooling,
normalize,
optimize_model,
Expand All @@ -25,43 +28,79 @@
from optimum.onnxruntime import ( # type: ignore[import-untyped]
ORTModelForFeatureExtraction,
)
from infinity_emb.transformer.utils_optimum import get_onnx_files

except (ImportError, RuntimeError, Exception) as ex:
CHECK_ONNXRUNTIME.mark_dirty(ex)


if CHECK_OPTIMUM_INTEL.is_available:
try:
from optimum.intel import OVModelForFeatureExtraction # type: ignore[import-untyped]
from infinity_emb.transformer.utils_optimum import get_openvino_files

except (ImportError, RuntimeError, Exception) as ex:
CHECK_OPTIMUM_INTEL.mark_dirty(ex)


if CHECK_TRANSFORMERS.is_available:
from transformers import AutoConfig, AutoTokenizer # type: ignore[import-untyped]


class OptimumEmbedder(BaseEmbedder):
def __init__(self, *, engine_args: EngineArgs):
CHECK_ONNXRUNTIME.mark_required()
provider = device_to_onnx(engine_args.device)

onnx_file = get_onnx_files(
model_name_or_path=engine_args.model_name_or_path,
revision=engine_args.revision,
use_auth_token=True,
prefer_quantized=("cpu" in provider.lower() or "openvino" in provider.lower()),
)
if provider == "OpenVINOExecutionProvider":
CHECK_OPTIMUM_INTEL.mark_required()
filename = ""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style: Empty filename could cause issues if exception occurs. Initialize with None instead to make the failure case more explicit.

try:
openvino_file = get_openvino_files(
model_name_or_path=engine_args.model_name_or_path,
revision=engine_args.revision,
use_auth_token=True,
)
filename = openvino_file.as_posix()
except Exception as e: # show error then let the optimum intel compress on the fly
print(str(e))
Comment on lines +64 to +65
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

logic: Silently printing errors and continuing is dangerous. Consider logging the error and/or raising a more specific exception if OpenVINO file loading fails.


self.model = optimize_model(
model_name_or_path=engine_args.model_name_or_path,
revision=engine_args.revision,
trust_remote_code=engine_args.trust_remote_code,
execution_provider=provider,
file_name=filename,
optimize_model=not os.environ.get(
"INFINITY_ONNX_DISABLE_OPTIMIZE", False
), # TODO: make this env variable public
model_class=OVModelForFeatureExtraction,
)

else:
CHECK_ONNXRUNTIME.mark_required()
onnx_file = get_onnx_files(
model_name_or_path=engine_args.model_name_or_path,
revision=engine_args.revision,
use_auth_token=True,
prefer_quantized="cpu" in provider.lower(),
)
self.model = optimize_model(
model_name_or_path=engine_args.model_name_or_path,
revision=engine_args.revision,
trust_remote_code=engine_args.trust_remote_code,
execution_provider=provider,
file_name=onnx_file.as_posix(),
optimize_model=not os.environ.get(
"INFINITY_ONNX_DISABLE_OPTIMIZE", False
), # TODO: make this env variable public
model_class=ORTModelForFeatureExtraction,
)
self.model.use_io_binding = False

self.pooling = (
mean_pooling if engine_args.pooling_method == PoolingMethod.mean else cls_token_pooling
)

self.model = optimize_model(
model_name_or_path=engine_args.model_name_or_path,
revision=engine_args.revision,
trust_remote_code=engine_args.trust_remote_code,
execution_provider=provider,
file_name=onnx_file.as_posix(),
optimize_model=not os.environ.get(
"INFINITY_ONNX_DISABLE_OPTIMIZE", False
), # TODO: make this env variable public
model_class=ORTModelForFeatureExtraction,
)
self.model.use_io_binding = False

self.tokenizer = AutoTokenizer.from_pretrained(
engine_args.model_name_or_path,
revision=engine_args.revision,
Expand Down
Loading
Loading