From f5af619b45ba157f865f4163d88765119d0c3513 Mon Sep 17 00:00:00 2001 From: Zhol Internet Date: Fri, 6 Feb 2026 12:12:31 -0800 Subject: [PATCH 1/2] add OpenVINO support Allows for accelerated inferencing on Intel hardware. To allow for this, a few small changes are made: * addition of requirements/openvino.txt, which just lets pip install the OpenVINO runtime * util.provider, to prevent code-reuse with * get_available_providers, which filters out supported ONNXRuntime providers * tasks.analysis.get_provider_options, which prevents code-reuse * modifications to Dockerfile, to allow for that optional package installation described earlier --- Dockerfile | 28 +++++++- Dockerfile-noavx2 | 3 + deployment/.env.example | 4 ++ requirements/openvino.txt | 2 + student_clap/data/clap_embedder.py | 14 ++-- student_clap/data/clap_text_embedder.py | 14 ++-- tasks/analysis.py | 90 +++++++++++-------------- tasks/clap_analyzer.py | 61 ++--------------- tasks/memory_utils.py | 14 ++-- tasks/mulan_analyzer.py | 14 ++-- tests/unit/test_analysis.py | 10 +-- util/__init__.py | 0 util/provider.py | 22 ++++++ 13 files changed, 127 insertions(+), 149 deletions(-) create mode 100644 requirements/openvino.txt create mode 100644 util/__init__.py create mode 100644 util/provider.py diff --git a/Dockerfile b/Dockerfile index 217fa941..0d3358dd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,8 +3,9 @@ # Supports both CPU (ubuntu:24.04) and GPU (nvidia/cuda:12.8.1-cudnn-runtime-ubuntu24.04) builds # # Build examples: -# CPU: docker build -t audiomuse-ai . -# GPU: docker build --build-arg BASE_IMAGE=nvidia/cuda:12.8.1-cudnn-runtime-ubuntu24.04 -t audiomuse-ai-gpu . +# CPU: docker build -t audiomuse-ai . +# OPENVINO: docker build --build-arg OPENVINO=true -t audiomuse-ai-openvino +# GPU: docker build --build-arg BASE_IMAGE=nvidia/cuda:12.8.1-cudnn-runtime-ubuntu24.04 -t audiomuse-ai-gpu . ARG BASE_IMAGE=ubuntu:24.04 @@ -73,6 +74,7 @@ RUN set -eux; \ FROM ${BASE_IMAGE} AS base ARG BASE_IMAGE +ARG OPENVINO=false SHELL ["/bin/bash", "-c"] @@ -111,12 +113,31 @@ RUN set -ux; \ apt-get autoremove -y || true && \ rm -f /usr/lib/python3.*/EXTERNALLY-MANAGED +# Install Intel GPU drivers for OpenVINO GPU support (when OPENVINO=true) +RUN if [ "$OPENVINO" = "true" ]; then \ + echo "Installing Intel GPU drivers for OpenVINO..." && \ + apt-get update && \ + apt-get install -y --no-install-recommends wget gpg && \ + wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \ + gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \ + echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu noble client" | \ + tee /etc/apt/sources.list.d/intel-gpu-noble.list && \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + ocl-icd-libopencl1 \ + intel-opencl-icd \ + intel-level-zero-gpu \ + level-zero && \ + rm -rf /var/lib/apt/lists/*; \ + fi + # ============================================================================ # Stage 3: Libraries - Python packages installation # ============================================================================ FROM base AS libraries ARG BASE_IMAGE +ARG OPENVINO=false WORKDIR /app @@ -130,6 +151,9 @@ COPY requirements/ /app/requirements/ RUN if [[ "$BASE_IMAGE" =~ ^nvidia/cuda: ]]; then \ echo "NVIDIA base image detected: installing GPU packages (cupy, cuml, onnxruntime-gpu, voyager, torch+cuda)"; \ uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/gpu.txt -r /app/requirements/common.txt || exit 1; \ + elif [[ "$OPENVINO" == true ]]; then\ + echo "OpenVINO base image detected: installing OpenVINO runtime packages (onnxruntime-openvino)"; \ + uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/openvino.txt -r /app/requirements/common.txt || exit 1; \ else \ echo "CPU base image: installing all packages together for dependency resolution"; \ uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/cpu.txt -r /app/requirements/common.txt || exit 1; \ diff --git a/Dockerfile-noavx2 b/Dockerfile-noavx2 index bf2faf73..42c7fc40 100644 --- a/Dockerfile-noavx2 +++ b/Dockerfile-noavx2 @@ -128,6 +128,9 @@ COPY requirements/ /app/requirements/ RUN if [[ "$BASE_IMAGE" =~ ^nvidia/cuda: ]]; then \ echo "NVIDIA base image detected: installing GPU packages (cupy, cuml, onnxruntime-gpu, voyager, torch+cuda)"; \ uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/gpu.txt -r /app/requirements/common-noavx2.txt || exit 1; \ + elif [[ "$BASEIMAGE" =~ ^openvino: ]]; then\ + echo "OpenVINO base image detected: installing OpenVINO runtime packages (onnxruntime-openvino)" \ + uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/openvino.txt -r /app/requirements/common.txt || exit 1; \ else \ echo "CPU base image: installing all packages together for dependency resolution"; \ uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/cpu-noavx2.txt -r /app/requirements/common-noavx2.txt || exit 1; \ diff --git a/deployment/.env.example b/deployment/.env.example index 0ec06cb6..15fcccdc 100644 --- a/deployment/.env.example +++ b/deployment/.env.example @@ -93,3 +93,7 @@ USE_GPU_CLUSTERING=false # Models: Audio model (~268MB) for analysis, Text model (~478MB) for search # Default: true CLAP_ENABLED=true + +# --- OpenVINO Acceleration --- +RENDER_GID= # render group ID (use `stat -c "%g" /dev/dri/renderD128` on host to verify) +OPENVINO_CONFIG_JSON_PATH= # path to have openvino load config https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#load_config \ No newline at end of file diff --git a/requirements/openvino.txt b/requirements/openvino.txt new file mode 100644 index 00000000..b330c2f1 --- /dev/null +++ b/requirements/openvino.txt @@ -0,0 +1,2 @@ +onnxruntime==1.19.2 +onnxruntime-openvino \ No newline at end of file diff --git a/student_clap/data/clap_embedder.py b/student_clap/data/clap_embedder.py index 697e17d2..8882452d 100644 --- a/student_clap/data/clap_embedder.py +++ b/student_clap/data/clap_embedder.py @@ -10,6 +10,7 @@ import librosa import onnxruntime as ort from typing import Tuple, Optional +from util import provider logger = logging.getLogger(__name__) @@ -53,17 +54,14 @@ def __init__(self, model_path: str): #sess_options.inter_op_num_threads = 2 # Parallel layers # Use CUDA if available, otherwise CPU - available_providers = ort.get_available_providers() - if 'CUDAExecutionProvider' in available_providers: - providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] - logger.info(f"CLAP model loaded: {model_path}") - logger.info(f"✅ Using CUDA for ONNX teacher model") - else: - providers = ['CPUExecutionProvider'] - logger.info(f"CLAP model loaded: {model_path}") + available_providers = provider.get_available_providers() + logger.info(f"✅ Using %s for ONNX teacher text model", + [provider.split('ExecutionProvider')[0] for provider in available_providers]) + elif len(available_providers) == 1: # only CPUExecutionProvider logger.info(f"✅ Using optimized CPU inference (8 threads)") logger.info(f" Performance: ~325ms/segment vs 713ms with CoreML") logger.info(f" Reason: Only 24% of ops supported by CoreML GPU, context switching overhead too high") + logger.info(f"CLAP model loaded: {model_path}") self.session = ort.InferenceSession( model_path, diff --git a/student_clap/data/clap_text_embedder.py b/student_clap/data/clap_text_embedder.py index 07b11dac..12520fa1 100644 --- a/student_clap/data/clap_text_embedder.py +++ b/student_clap/data/clap_text_embedder.py @@ -3,6 +3,7 @@ import numpy as np import onnxruntime as ort from typing import List +from util import provider logger = logging.getLogger(__name__) @@ -13,15 +14,10 @@ def __init__(self, model_path: str): sess_options = ort.SessionOptions() sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL sess_options.log_severity_level = 3 - available_providers = ort.get_available_providers() - if 'CUDAExecutionProvider' in available_providers: - providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] - logger.info(f"CLAP text model loaded: {model_path}") - logger.info(f"✅ Using CUDA for ONNX teacher text model") - else: - providers = ['CPUExecutionProvider'] - logger.info(f"CLAP text model loaded: {model_path}") - logger.info(f"✅ Using CPU for ONNX teacher text model") + available_providers = provider.get_available_providers() + logger.info(f"CLAP text model loaded: {model_path}") + logger.info(f"✅ Using %s for ONNX teacher text model", + [provider.split('ExecutionProvider')[0] for provider in available_providers]) self.session = ort.InferenceSession( model_path, sess_options=sess_options, diff --git a/tasks/analysis.py b/tasks/analysis.py index 9c3901d7..9809feb1 100644 --- a/tasks/analysis.py +++ b/tasks/analysis.py @@ -11,6 +11,7 @@ import uuid import traceback import gc +from typing import Any from pydub import AudioSegment from tempfile import NamedTemporaryFile @@ -64,6 +65,7 @@ SessionRecycler, comprehensive_memory_cleanup ) +from util import provider from psycopg2 import OperationalError @@ -414,25 +416,7 @@ def analyze_track(file_path, mood_labels_list, model_paths, onnx_sessions=None): should_cleanup_sessions = False # Configure provider options for GPU memory management (used for main and secondary models) - available_providers = ort.get_available_providers() - if 'CUDAExecutionProvider' in available_providers: - # Get GPU device ID from environment or default to 0 - gpu_device_id = 0 - cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '') - if cuda_visible and cuda_visible != '-1': - gpu_device_id = 0 - - cuda_options = { - 'device_id': gpu_device_id, - 'arena_extend_strategy': 'kSameAsRequested', # Prevent memory fragmentation - 'cudnn_conv_algo_search': 'EXHAUSTIVE', - 'do_copy_in_default_stream': True, - } - provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})] - logger.info(f"CUDA provider available - attempting to use GPU for analysis (device_id={gpu_device_id})") - else: - provider_options = [('CPUExecutionProvider', {})] - logger.info("CUDA provider not available - using CPU only") + provider_options = get_provider_options(cuda_do_copy_in_default_stream=True) try: # Use pre-loaded sessions if provided, otherwise load per-song @@ -811,22 +795,8 @@ def get_missing_mulan_track_ids(track_ids): if onnx_sessions is None: logger.info(f"Lazy-loading Essentia models for album: {album_name}") onnx_sessions = {} - available_providers = ort.get_available_providers() - - if 'CUDAExecutionProvider' in available_providers: - gpu_device_id = 0 - cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '') - if cuda_visible and cuda_visible != '-1': - gpu_device_id = 0 - cuda_options = { - 'device_id': gpu_device_id, - 'arena_extend_strategy': 'kSameAsRequested', # Prevent memory fragmentation - 'cudnn_conv_algo_search': 'EXHAUSTIVE', # Find memory-efficient algorithms - 'do_copy_in_default_stream': True, # Better memory sync - } - provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})] - else: - provider_options = [('CPUExecutionProvider', {})] + provider_options = get_provider_options( + cuda_do_copy_in_default_stream=True) try: for model_name, model_path in model_paths.items(): @@ -859,22 +829,9 @@ def get_missing_mulan_track_ids(track_ids): # Recreate sessions onnx_sessions = {} - available_providers = ort.get_available_providers() - - if 'CUDAExecutionProvider' in available_providers: - gpu_device_id = 0 - cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '') - if cuda_visible and cuda_visible != '-1': - gpu_device_id = 0 - cuda_options = { - 'device_id': gpu_device_id, - 'arena_extend_strategy': 'kSameAsRequested', # Prevent memory fragmentation - 'cudnn_conv_algo_search': 'EXHAUSTIVE', - 'do_copy_in_default_stream': True, - } - provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})] - else: - provider_options = [('CPUExecutionProvider', {})] + provider_options = get_provider_options( + cuda_do_copy_in_default_stream=True + ) try: for model_name, model_path in model_paths.items(): @@ -1367,3 +1324,34 @@ def monitor_and_clear_jobs(): logger.critical(f"FATAL ERROR: Analysis failed: {e}", exc_info=True) log_and_update_main(f"❌ Main analysis failed: {e}", current_progress, task_state=TASK_STATUS_FAILURE, error_message=str(e), traceback=traceback.format_exc()) raise + + +def get_provider_options(cuda_do_copy_in_default_stream: bool = False, + cuda_conv_algo_search_mode: str = 'EXHAUSTIVE') -> list[tuple[str, dict[str, Any]]]: + provider_options = [('CPUExecutionProvider', {})] + available_providers = provider.get_available_providers() + if 'OpenVINOExecutionProvider' in available_providers: + vino_options = { + 'device_type': 'AUTO', + } + if os.path.exists(os.environ.get('OPENVINO_CONFIG_JSON_PATH', '')): + vino_options['load_config'] = os.environ.get('OPENVINO_CONFIG_JSON_PATH') + provider_options.insert(0, ('OpenVINOExecutionProvider', vino_options)) + logger.info("OpenVINO provider available - Attempting to use OpenVINO for analysis...") + if 'CUDAExecutionProvider' in available_providers: + # Get GPU device ID from environment or default to 0 + gpu_device_id = 0 + cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '') + if cuda_visible and cuda_visible != '-1': + gpu_device_id = 0 + + cuda_options = { + 'device_id': gpu_device_id, + 'arena_extend_strategy': 'kSameAsRequested', # Prevent memory fragmentation + 'cudnn_conv_algo_search': cuda_conv_algo_search_mode, + } + if cuda_do_copy_in_default_stream: + cuda_options['do_copy_in_default_stream'] = True + provider_options.insert(0,('CUDAExecutionProvider', cuda_options)) + logger.info(f"CUDA provider available - attempting to use GPU for analysis (device_id={gpu_device_id})") + return provider_options \ No newline at end of file diff --git a/tasks/clap_analyzer.py b/tasks/clap_analyzer.py index d64501ff..32d183f6 100644 --- a/tasks/clap_analyzer.py +++ b/tasks/clap_analyzer.py @@ -30,6 +30,7 @@ from config import AUDIO_LOAD_TIMEOUT except Exception: AUDIO_LOAD_TIMEOUT = None +from tasks import analysis from tasks.memory_utils import cleanup_cuda_memory, handle_onnx_memory_error, comprehensive_memory_cleanup logger = logging.getLogger(__name__) @@ -70,23 +71,7 @@ def _load_audio_model(): session = None # Configure provider options with GPU memory management - available_providers = ort.get_available_providers() - if 'CUDAExecutionProvider' in available_providers: - gpu_device_id = 0 - cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '') - if cuda_visible and cuda_visible != '-1': - gpu_device_id = 0 - - cuda_options = { - 'device_id': gpu_device_id, - 'arena_extend_strategy': 'kSameAsRequested', - 'cudnn_conv_algo_search': 'DEFAULT', - } - provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})] - logger.info(f"CUDA provider available - will attempt to use GPU (device_id={gpu_device_id})") - else: - provider_options = [('CPUExecutionProvider', {})] - logger.info("CUDA provider not available - using CPU only") + provider_options = analysis.get_provider_options(cuda_conv_algo_search_mode='DEFAULT') # Create session try: @@ -143,25 +128,8 @@ def _load_text_model(): # Text model typically runs on CPU in Flask containers session = None - available_providers = ort.get_available_providers() - - if 'CUDAExecutionProvider' in available_providers: - gpu_device_id = 0 - cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '') - if cuda_visible and cuda_visible != '-1': - gpu_device_id = 0 - - cuda_options = { - 'device_id': gpu_device_id, - 'arena_extend_strategy': 'kSameAsRequested', - 'cudnn_conv_algo_search': 'DEFAULT', - } - provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})] - logger.info(f"CUDA provider available - will attempt to use GPU (device_id={gpu_device_id})") - else: - provider_options = [('CPUExecutionProvider', {})] - logger.info("CUDA provider not available - using CPU only") - + provider_options = analysis.get_provider_options(cuda_conv_algo_search_mode='DEFAULT') + # Create session try: session = ort.InferenceSession( @@ -231,26 +199,7 @@ def _load_onnx_model(): session = None # Configure provider options with GPU memory management - available_providers = ort.get_available_providers() - if 'CUDAExecutionProvider' in available_providers: - # Get GPU device ID from environment or default to 0 - # Docker sets NVIDIA_VISIBLE_DEVICES, CUDA runtime uses CUDA_VISIBLE_DEVICES - gpu_device_id = 0 - cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '') - if cuda_visible and cuda_visible != '-1': - # If CUDA_VISIBLE_DEVICES is set, use first device (already mapped to 0) - gpu_device_id = 0 - - cuda_options = { - 'device_id': gpu_device_id, - 'arena_extend_strategy': 'kSameAsRequested', # Prevent memory fragmentation - 'cudnn_conv_algo_search': 'DEFAULT', - } - provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})] - logger.info(f"CUDA provider available - will attempt to use GPU (device_id={gpu_device_id})") - else: - provider_options = [('CPUExecutionProvider', {})] - logger.info("CUDA provider not available - using CPU only") + provider_options = analysis.get_provider_options(cuda_conv_algo_search_mode='DEFAULT') # Create session with determined providers try: diff --git a/tasks/memory_utils.py b/tasks/memory_utils.py index 1754a407..9148ad0a 100644 --- a/tasks/memory_utils.py +++ b/tasks/memory_utils.py @@ -208,20 +208,16 @@ def reset_onnx_memory_pool() -> bool: """ try: import onnxruntime as ort + from util import provider # Force garbage collection first gc.collect() # Determine available providers - providers = ort.get_available_providers() - preferred_provider = None - - if 'CUDAExecutionProvider' in providers: - preferred_provider = 'CUDAExecutionProvider' - logger.debug("Using CUDA provider for ONNX memory pool reset") - elif 'CPUExecutionProvider' in providers: - preferred_provider = 'CPUExecutionProvider' - logger.debug("Using CPU provider for ONNX memory pool reset") + providers = provider.get_available_providers() + preferred_provider = providers[0] + if preferred_provider: + logger.debug("Using %s for ONNX memory pool reset", preferred_provider.split('ExecutionProvider')[0]) else: logger.debug("No suitable ONNX provider found for memory pool reset") return False diff --git a/tasks/mulan_analyzer.py b/tasks/mulan_analyzer.py index f6171607..9778004f 100644 --- a/tasks/mulan_analyzer.py +++ b/tasks/mulan_analyzer.py @@ -21,6 +21,7 @@ from typing import Tuple, Optional from transformers import AutoTokenizer from tasks.memory_utils import cleanup_cuda_memory, cleanup_onnx_session, handle_onnx_memory_error +from util import provider logger = logging.getLogger(__name__) @@ -66,12 +67,9 @@ def _load_mulan_models(load_text_models=False): logger.info("MuLan: Using ONNX Runtime automatic thread management") # Select execution provider (CPU or CUDA) - providers = ['CPUExecutionProvider'] - if ort.get_available_providers() and 'CUDAExecutionProvider' in ort.get_available_providers(): - providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] - logger.info("CUDA available - using GPU acceleration") - else: - logger.info("Using CPU execution") + providers = provider.get_available_providers() + logger.info("Using %s acceleration", + [provider.split('ExecutionProvider')[0] for provider in providers]) # Load audio encoder (with external data file) logger.info(f"Loading audio encoder: {config.AUDIO_MODEL_PATH}") @@ -172,9 +170,7 @@ def initialize_mulan_text_models(): # sess_options.intra_op_num_threads = num_threads # sess_options.inter_op_num_threads = num_threads - providers = ['CPUExecutionProvider'] - if ort.get_available_providers() and 'CUDAExecutionProvider' in ort.get_available_providers(): - providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] + providers = provider.get_available_providers() # Load text encoder _text_session = ort.InferenceSession( diff --git a/tests/unit/test_analysis.py b/tests/unit/test_analysis.py index 262b1587..1e596bd1 100644 --- a/tests/unit/test_analysis.py +++ b/tests/unit/test_analysis.py @@ -1035,7 +1035,7 @@ class TestOOMFallback: @patch('tasks.analysis.librosa.beat.beat_track') @patch('tasks.analysis.librosa.feature.melspectrogram') @patch('tasks.analysis.robust_load_audio_with_fallback') - @patch('tasks.analysis.ort.get_available_providers') + @patch('tasks.analysis.provider.get_available_providers') def test_embedding_oom_fallback_to_cpu(self, mock_providers, mock_audio_load, mock_mel, mock_beat, mock_rms, mock_chroma, mock_onnx_session): """Test GPU OOM during embedding inference triggers CPU fallback @@ -1123,7 +1123,7 @@ def create_session(model_path, providers=None, provider_options=None): @patch('tasks.analysis.librosa.beat.beat_track') @patch('tasks.analysis.librosa.feature.melspectrogram') @patch('tasks.analysis.robust_load_audio_with_fallback') - @patch('tasks.analysis.ort.get_available_providers') + @patch('tasks.analysis.provider.get_available_providers') def test_prediction_oom_fallback_to_cpu(self, mock_providers, mock_audio_load, mock_mel, mock_beat, mock_rms, mock_chroma, mock_onnx_session): """Test GPU OOM during prediction inference triggers CPU fallback @@ -1204,7 +1204,7 @@ def create_session(model_path, providers=None, provider_options=None): @patch('tasks.analysis.librosa.beat.beat_track') @patch('tasks.analysis.librosa.feature.melspectrogram') @patch('tasks.analysis.robust_load_audio_with_fallback') - @patch('tasks.analysis.ort.get_available_providers') + @patch('tasks.analysis.provider.get_available_providers') def test_secondary_model_oom_fallback_to_cpu(self, mock_providers, mock_audio_load, mock_mel, mock_beat, mock_rms, mock_chroma, mock_onnx_session): """Test GPU OOM during secondary model inference triggers CPU fallback @@ -1286,7 +1286,7 @@ def create_session(model_path, providers=None, provider_options=None): @patch('tasks.analysis.librosa.beat.beat_track') @patch('tasks.analysis.librosa.feature.melspectrogram') @patch('tasks.analysis.robust_load_audio_with_fallback') - @patch('tasks.analysis.ort.get_available_providers') + @patch('tasks.analysis.provider.get_available_providers') def test_non_oom_exception_is_reraised(self, mock_providers, mock_audio_load, mock_mel, mock_beat, mock_rms, mock_chroma, mock_onnx_session): """Test non-OOM exceptions are re-raised (not caught by OOM handler) @@ -1345,7 +1345,7 @@ def gpu_run(output_names, feed_dict): @patch('tasks.analysis.librosa.beat.beat_track') @patch('tasks.analysis.librosa.feature.melspectrogram') @patch('tasks.analysis.robust_load_audio_with_fallback') - @patch('tasks.analysis.ort.get_available_providers') + @patch('tasks.analysis.provider.get_available_providers') def test_successful_gpu_inference_no_fallback(self, mock_providers, mock_audio_load, mock_mel, mock_beat, mock_rms, mock_chroma, mock_onnx_session): """Test successful GPU inference doesn't trigger CPU fallback diff --git a/util/__init__.py b/util/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/util/provider.py b/util/provider.py new file mode 100644 index 00000000..244a14c9 --- /dev/null +++ b/util/provider.py @@ -0,0 +1,22 @@ +from functools import cache +import logging + +import onnxruntime as ort + +logger = logging.getLogger(__name__) + + +@cache +def get_available_providers() -> list[str]: + """ + Filters out ONNXRuntime providers to ones supported by Audiomuse-AI + """ + available_providers = ort.get_available_providers() + providers = ['CPUExecutionProvider'] + if 'OpenVINOExecutionProvider' in available_providers: + providers.insert(0, 'OpenVINOExecutionProvider') + if 'CUDAExecutionProvider' in available_providers: + providers.insert(0, 'CUDAExecutionProvider') + logger.info("Providers made available: %s", + [provider.split('ExecutionProvider')[0] for provider in available_providers]) + return providers \ No newline at end of file From fec4eed3fa08ecf3d9dfabe985ef3accd3770a8c Mon Sep 17 00:00:00 2001 From: Zhol Internet Date: Wed, 11 Mar 2026 19:59:18 -0700 Subject: [PATCH 2/2] have mode be set as env variable --- tasks/analysis.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tasks/analysis.py b/tasks/analysis.py index 9809feb1..9858fb9f 100644 --- a/tasks/analysis.py +++ b/tasks/analysis.py @@ -1331,8 +1331,11 @@ def get_provider_options(cuda_do_copy_in_default_stream: bool = False, provider_options = [('CPUExecutionProvider', {})] available_providers = provider.get_available_providers() if 'OpenVINOExecutionProvider' in available_providers: + device_type = os.environ.get('OPENVINO_DEVICE_TYPE', 'GPU') vino_options = { - 'device_type': 'AUTO', + 'device_type': device_type, + 'num_of_threads': int(os.environ.get('OPENVINO_NUM_OF_THREADS', '2')), + 'num_streams': int(os.environ.get('OPENVINO_NUM_STREAMS', '1')) } if os.path.exists(os.environ.get('OPENVINO_CONFIG_JSON_PATH', '')): vino_options['load_config'] = os.environ.get('OPENVINO_CONFIG_JSON_PATH')