Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions libs/infinity_emb/infinity_emb/transformer/acceleration.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,21 @@
from infinity_emb.primitives import Device

if CHECK_OPTIMUM.is_available:
from optimum.bettertransformer import ( # type: ignore[import-untyped]
BetterTransformer,
BetterTransformerManager,
)
try:
from optimum.bettertransformer import ( # type: ignore[import-untyped]
BetterTransformer,
BetterTransformerManager,
)
BETTERTRANSFORMER_AVAILABLE = True
except (ImportError, RuntimeError):
# BetterTransformer is deprecated in newer versions of optimum
BETTERTRANSFORMER_AVAILABLE = False
BetterTransformer = None
BetterTransformerManager = None
else:
BETTERTRANSFORMER_AVAILABLE = False
BetterTransformer = None
BetterTransformerManager = None

if CHECK_TORCH.is_available:
import torch
Expand All @@ -37,6 +48,9 @@ def check_if_bettertransformer_possible(engine_args: "EngineArgs") -> bool:
if not engine_args.bettertransformer:
return False

if not BETTERTRANSFORMER_AVAILABLE:
return False

config = AutoConfig.from_pretrained(
pretrained_model_name_or_path=engine_args.model_name_or_path,
revision=engine_args.revision,
Expand All @@ -50,6 +64,10 @@ def to_bettertransformer(model: "PreTrainedModel", engine_args: "EngineArgs", lo
if not engine_args.bettertransformer:
return model

if not BETTERTRANSFORMER_AVAILABLE:
logger.info("BetterTransformer is not available due to version incompatibility. Continuing without optimization.")
return model

if engine_args.device == Device.mps or (
hasattr(model, "device") and model.device.type == "mps"
):
Expand Down
2 changes: 1 addition & 1 deletion libs/infinity_emb/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ jinja2-cli = "*"
torch = "2.8.0"
prometheus-fastapi-instrumentator = "7.0.0"
# sentence-transformers = "3.3.1"
transformers = "4.47.0"
transformers = "4.53.3"
fastapi = "0.115.2"

[tool.poetry.group.codespell.dependencies]
Expand Down