NeptuneHub · zhol0777 · Feb 6, 2026 · Mar 12, 2026 · Mar 12, 2026 · zhol0777
diff --git a/Dockerfile b/Dockerfile
@@ -3,8 +3,9 @@
 # Supports both CPU (ubuntu:24.04) and GPU (nvidia/cuda:12.8.1-cudnn-runtime-ubuntu24.04) builds
 #
 # Build examples:
-#   CPU:  docker build -t audiomuse-ai .
-#   GPU:  docker build --build-arg BASE_IMAGE=nvidia/cuda:12.8.1-cudnn-runtime-ubuntu24.04 -t audiomuse-ai-gpu .
+#      CPU:  docker build -t audiomuse-ai .
+# OPENVINO:  docker build --build-arg OPENVINO=true -t audiomuse-ai-openvino
+#      GPU:  docker build --build-arg BASE_IMAGE=nvidia/cuda:12.8.1-cudnn-runtime-ubuntu24.04 -t audiomuse-ai-gpu .
 
 ARG BASE_IMAGE=ubuntu:24.04
 
@@ -69,6 +70,7 @@ RUN set -eux; \
 FROM ${BASE_IMAGE} AS base
 
 ARG BASE_IMAGE
+ARG OPENVINO=false
 
 SHELL ["/bin/bash", "-c"]
 
@@ -107,12 +109,31 @@ RUN set -ux; \
     apt-get autoremove -y || true && \
     rm -f /usr/lib/python3.*/EXTERNALLY-MANAGED
 
+# Install Intel GPU drivers for OpenVINO GPU support (when OPENVINO=true)
+RUN if [ "$OPENVINO" = "true" ]; then \
+        echo "Installing Intel GPU drivers for OpenVINO..." && \
+        apt-get update && \
+        apt-get install -y --no-install-recommends wget gpg && \
+        wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
+            gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \
+        echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu noble client" | \
+            tee /etc/apt/sources.list.d/intel-gpu-noble.list && \
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+            ocl-icd-libopencl1 \
+            intel-opencl-icd \
+            intel-level-zero-gpu \
+            level-zero && \
+        rm -rf /var/lib/apt/lists/*; \
+    fi
+
 # ============================================================================
 # Stage 3: Libraries - Python packages installation
 # ============================================================================
 FROM base AS libraries
 
 ARG BASE_IMAGE
+ARG OPENVINO=false
 
 WORKDIR /app
 
@@ -126,6 +147,9 @@ COPY requirements/ /app/requirements/
 RUN if [[ "$BASE_IMAGE" =~ ^nvidia/cuda: ]]; then \
         echo "NVIDIA base image detected: installing GPU packages (cupy, cuml, onnxruntime-gpu, voyager, torch+cuda)"; \
         uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/gpu.txt -r /app/requirements/common.txt || exit 1; \
+    elif [[ "$OPENVINO" == true ]]; then\
+        echo "OpenVINO base image detected: installing OpenVINO runtime packages (onnxruntime-openvino)"; \
+        uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/openvino.txt -r /app/requirements/common.txt || exit 1; \
     else \
         echo "CPU base image: installing all packages together for dependency resolution"; \
         uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/cpu.txt -r /app/requirements/common.txt || exit 1; \

diff --git a/Dockerfile-noavx2 b/Dockerfile-noavx2
@@ -124,6 +124,9 @@ COPY requirements/ /app/requirements/
 RUN if [[ "$BASE_IMAGE" =~ ^nvidia/cuda: ]]; then \
         echo "NVIDIA base image detected: installing GPU packages (cupy, cuml, onnxruntime-gpu, voyager, torch+cuda)"; \
         uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/gpu.txt -r /app/requirements/common-noavx2.txt || exit 1; \
+    elif [[ "$BASEIMAGE" =~ ^openvino: ]]; then\
+        echo "OpenVINO base image detected: installing OpenVINO runtime packages (onnxruntime-openvino)" \
+        uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/openvino.txt -r /app/requirements/common.txt || exit 1; \
     else \
         echo "CPU base image: installing all packages together for dependency resolution"; \
         uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/cpu-noavx2.txt -r /app/requirements/common-noavx2.txt || exit 1; \

diff --git a/deployment/.env.example b/deployment/.env.example
@@ -94,6 +94,11 @@ USE_GPU_CLUSTERING=false
 # Default: true
 CLAP_ENABLED=true
 
+# --- OpenVINO Acceleration ---
+RENDER_GID=  # render group ID (use `stat -c "%g" /dev/dri/renderD128` on host to verify)
+OPENVINO_CONFIG_JSON_PATH=  # path to have openvino load config https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#load_config
+OPENVINO_DEVICE_TYPE=auto  # device selection https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#device_type
+
 # ── Authentication (optional) ─────────────────────────────────────────────────
 # Leave ALL of these blank/unset to disable authentication entirely (default).
 # Set ALL THREE (AUDIOMUSE_USER, AUDIOMUSE_PASSWORD, API_TOKEN) to enable auth.

diff --git a/requirements/openvino.txt b/requirements/openvino.txt
@@ -0,0 +1,2 @@
+onnxruntime==1.19.2
+onnxruntime-openvino
diff --git a/student_clap/data/clap_embedder.py b/student_clap/data/clap_embedder.py
@@ -10,6 +10,7 @@
 import librosa
 import onnxruntime as ort
 from typing import Tuple, Optional
+from util import provider
 
 logger = logging.getLogger(__name__)
 
@@ -53,17 +54,14 @@ def __init__(self, model_path: str):
         #sess_options.inter_op_num_threads = 2  # Parallel layers
 
         # Use CUDA if available, otherwise CPU
-        available_providers = ort.get_available_providers()
-        if 'CUDAExecutionProvider' in available_providers:
-            providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-            logger.info(f"CLAP model loaded: {model_path}")
-            logger.info(f"✅ Using CUDA for ONNX teacher model")
-        else:
-            providers = ['CPUExecutionProvider']
-            logger.info(f"CLAP model loaded: {model_path}")
+        available_providers = provider.get_available_providers()
+        logger.info(f"✅ Using %s for ONNX teacher text model",
+                    [provider.split('ExecutionProvider')[0] for provider in available_providers])
+        elif len(available_providers) == 1:  # only CPUExecutionProvider
             logger.info(f"✅ Using optimized CPU inference (8 threads)")
             logger.info(f"   Performance: ~325ms/segment vs 713ms with CoreML")
             logger.info(f"   Reason: Only 24% of ops supported by CoreML GPU, context switching overhead too high")
+        logger.info(f"CLAP model loaded: {model_path}")
 
         self.session = ort.InferenceSession(
             model_path,

diff --git a/student_clap/data/clap_text_embedder.py b/student_clap/data/clap_text_embedder.py
@@ -3,6 +3,7 @@
 import numpy as np
 import onnxruntime as ort
 from typing import List
+from util import provider
 
 logger = logging.getLogger(__name__)
 
@@ -13,15 +14,10 @@ def __init__(self, model_path: str):
         sess_options = ort.SessionOptions()
         sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
         sess_options.log_severity_level = 3
-        available_providers = ort.get_available_providers()
-        if 'CUDAExecutionProvider' in available_providers:
-            providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-            logger.info(f"CLAP text model loaded: {model_path}")
-            logger.info(f"✅ Using CUDA for ONNX teacher text model")
-        else:
-            providers = ['CPUExecutionProvider']
-            logger.info(f"CLAP text model loaded: {model_path}")
-            logger.info(f"✅ Using CPU for ONNX teacher text model")
+        available_providers = provider.get_available_providers()
+        logger.info(f"CLAP text model loaded: {model_path}")
+        logger.info(f"✅ Using %s for ONNX teacher text model",
+                    [provider.split('ExecutionProvider')[0] for provider in available_providers])
         self.session = ort.InferenceSession(
             model_path,
             sess_options=sess_options,

diff --git a/tasks/analysis.py b/tasks/analysis.py
@@ -11,6 +11,7 @@
 import uuid
 import traceback
 import gc
+from typing import Any
 from pydub import AudioSegment
 from tempfile import NamedTemporaryFile
 
@@ -63,6 +64,7 @@
     SessionRecycler,
     comprehensive_memory_cleanup
 )
+from util import provider
 
 
 from psycopg2 import OperationalError
@@ -380,25 +382,7 @@ def analyze_track(file_path, mood_labels_list, model_paths, onnx_sessions=None):
     should_cleanup_sessions = False
 
     # Configure provider options for GPU memory management (used for main and secondary models)
-    available_providers = ort.get_available_providers()
-    if 'CUDAExecutionProvider' in available_providers:
-        # Get GPU device ID from environment or default to 0
-        gpu_device_id = 0
-        cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '')
-        if cuda_visible and cuda_visible != '-1':
-            gpu_device_id = 0
-
-        cuda_options = {
-            'device_id': gpu_device_id,
-            'arena_extend_strategy': 'kSameAsRequested',  # Prevent memory fragmentation
-            'cudnn_conv_algo_search': 'EXHAUSTIVE',
-            'do_copy_in_default_stream': True,
-        }
-        provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})]
-        logger.info(f"CUDA provider available - attempting to use GPU for analysis (device_id={gpu_device_id})")
-    else:
-        provider_options = [('CPUExecutionProvider', {})]
-        logger.info("CUDA provider not available - using CPU only")
+    provider_options = get_provider_options(cuda_do_copy_in_default_stream=True)
 
     try:
         # Use pre-loaded sessions if provided, otherwise load per-song
@@ -715,22 +699,8 @@ def get_missing_mulan_track_ids(track_ids):
                         if onnx_sessions is None:
                             logger.info(f"Lazy-loading MusiCNN models for album: {album_name}")
                             onnx_sessions = {}
-                            available_providers = ort.get_available_providers()
-
-                            if 'CUDAExecutionProvider' in available_providers:
-                                gpu_device_id = 0
-                                cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '')
-                                if cuda_visible and cuda_visible != '-1':
-                                    gpu_device_id = 0
-                                cuda_options = {
-                                    'device_id': gpu_device_id,
-                                    'arena_extend_strategy': 'kSameAsRequested',  # Prevent memory fragmentation
-                                    'cudnn_conv_algo_search': 'EXHAUSTIVE',      # Find memory-efficient algorithms
-                                    'do_copy_in_default_stream': True,           # Better memory sync
-                                }
-                                provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})]
-                            else:
-                                provider_options = [('CPUExecutionProvider', {})]
+                            provider_options = get_provider_options(
+                                cuda_do_copy_in_default_stream=True)
 
                             try:
                                 for model_name, model_path in model_paths.items():
@@ -763,22 +733,9 @@ def get_missing_mulan_track_ids(track_ids):
 
                             # Recreate sessions
                             onnx_sessions = {}
-                            available_providers = ort.get_available_providers()
-
-                            if 'CUDAExecutionProvider' in available_providers:
-                                gpu_device_id = 0
-                                cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '')
-                                if cuda_visible and cuda_visible != '-1':
-                                    gpu_device_id = 0
-                                cuda_options = {
-                                    'device_id': gpu_device_id,
-                                    'arena_extend_strategy': 'kSameAsRequested',  # Prevent memory fragmentation
-                                    'cudnn_conv_algo_search': 'EXHAUSTIVE',
-                                    'do_copy_in_default_stream': True,
-                                }
-                                provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})]
-                            else:
-                                provider_options = [('CPUExecutionProvider', {})]
+                            provider_options = get_provider_options(
+                                cuda_do_copy_in_default_stream=True
+                            )
 
                             try:
                                 for model_name, model_path in model_paths.items():
@@ -1309,3 +1266,37 @@ def monitor_and_clear_jobs():
             logger.critical(f"FATAL ERROR: Analysis failed: {e}", exc_info=True)
             log_and_update_main(f"❌ Main analysis failed: {e}", current_progress, task_state=TASK_STATUS_FAILURE, error_message=str(e), traceback=traceback.format_exc())
             raise
+
+
+def get_provider_options(cuda_do_copy_in_default_stream: bool = False,
+                         cuda_conv_algo_search_mode: str = 'EXHAUSTIVE') -> list[tuple[str, dict[str, Any]]]:
+    provider_options = [('CPUExecutionProvider', {})]
+    available_providers = provider.get_available_providers()
+    if 'OpenVINOExecutionProvider' in available_providers:
+        device_type = os.environ.get('OPENVINO_DEVICE_TYPE', 'GPU')
+        vino_options = {
+            'device_type': device_type,
+            'num_of_threads': int(os.environ.get('OPENVINO_NUM_OF_THREADS', '2')),
+            'num_streams': int(os.environ.get('OPENVINO_NUM_STREAMS', '1'))
+        }
+        if os.path.exists(os.environ.get('OPENVINO_CONFIG_JSON_PATH', '')):
+            vino_options['load_config'] = os.environ.get('OPENVINO_CONFIG_JSON_PATH')
+        provider_options.insert(0, ('OpenVINOExecutionProvider', vino_options))
+        logger.info("OpenVINO provider available - Attempting to use OpenVINO for analysis...")
+    if 'CUDAExecutionProvider' in available_providers:
+        # Get GPU device ID from environment or default to 0
+        gpu_device_id = 0
+        cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '')
+        if cuda_visible and cuda_visible != '-1':
+            gpu_device_id = 0
+
+        cuda_options = {
+            'device_id': gpu_device_id,
+            'arena_extend_strategy': 'kSameAsRequested',  # Prevent memory fragmentation
+            'cudnn_conv_algo_search': cuda_conv_algo_search_mode,
+        }
+        if cuda_do_copy_in_default_stream:
+            cuda_options['do_copy_in_default_stream'] = True
+        provider_options.insert(0,('CUDAExecutionProvider', cuda_options))
+        logger.info(f"CUDA provider available - attempting to use GPU for analysis (device_id={gpu_device_id})")
+    return provider_options
diff --git a/tasks/clap_analyzer.py b/tasks/clap_analyzer.py
@@ -30,6 +30,7 @@
     from config import AUDIO_LOAD_TIMEOUT
 except Exception:
     AUDIO_LOAD_TIMEOUT = None
+from tasks import analysis
 from tasks.memory_utils import cleanup_cuda_memory, handle_onnx_memory_error, comprehensive_memory_cleanup
 
 logger = logging.getLogger(__name__)
@@ -70,23 +71,7 @@ def _load_audio_model():
     session = None
 
     # Configure provider options with GPU memory management
-    available_providers = ort.get_available_providers()
-    if 'CUDAExecutionProvider' in available_providers:
-        gpu_device_id = 0
-        cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '')
-        if cuda_visible and cuda_visible != '-1':
-            gpu_device_id = 0
-
-        cuda_options = {
-            'device_id': gpu_device_id,
-            'arena_extend_strategy': 'kSameAsRequested',
-            'cudnn_conv_algo_search': 'DEFAULT',
-        }
-        provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})]
-        logger.info(f"CUDA provider available - will attempt to use GPU (device_id={gpu_device_id})")
-    else:
-        provider_options = [('CPUExecutionProvider', {})]
-        logger.info("CUDA provider not available - using CPU only")
+    provider_options = analysis.get_provider_options(cuda_conv_algo_search_mode='DEFAULT')
 
     # Create session
     try:
@@ -143,25 +128,8 @@ def _load_text_model():
 
     # Text model typically runs on CPU in Flask containers
     session = None
-    available_providers = ort.get_available_providers()
-
-    if 'CUDAExecutionProvider' in available_providers:
-        gpu_device_id = 0
-        cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '')
-        if cuda_visible and cuda_visible != '-1':
-            gpu_device_id = 0
-
-        cuda_options = {
-            'device_id': gpu_device_id,
-            'arena_extend_strategy': 'kSameAsRequested',
-            'cudnn_conv_algo_search': 'DEFAULT',
-        }
-        provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})]
-        logger.info(f"CUDA provider available - will attempt to use GPU (device_id={gpu_device_id})")
-    else:
-        provider_options = [('CPUExecutionProvider', {})]
-        logger.info("CUDA provider not available - using CPU only")
-
+    provider_options = analysis.get_provider_options(cuda_conv_algo_search_mode='DEFAULT')
+
     # Create session
     try:
         session = ort.InferenceSession(
@@ -231,26 +199,7 @@ def _load_onnx_model():
     session = None
 
     # Configure provider options with GPU memory management
-    available_providers = ort.get_available_providers()
-    if 'CUDAExecutionProvider' in available_providers:
-        # Get GPU device ID from environment or default to 0
-        # Docker sets NVIDIA_VISIBLE_DEVICES, CUDA runtime uses CUDA_VISIBLE_DEVICES
-        gpu_device_id = 0
-        cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '')
-        if cuda_visible and cuda_visible != '-1':
-            # If CUDA_VISIBLE_DEVICES is set, use first device (already mapped to 0)
-            gpu_device_id = 0
-
-        cuda_options = {
-            'device_id': gpu_device_id,
-            'arena_extend_strategy': 'kSameAsRequested',  # Prevent memory fragmentation
-            'cudnn_conv_algo_search': 'DEFAULT',
-        }
-        provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})]
-        logger.info(f"CUDA provider available - will attempt to use GPU (device_id={gpu_device_id})")
-    else:
-        provider_options = [('CPUExecutionProvider', {})]
-        logger.info("CUDA provider not available - using CPU only")
+    provider_options = analysis.get_provider_options(cuda_conv_algo_search_mode='DEFAULT')
 
     # Create session with determined providers
     try:

diff --git a/tasks/memory_utils.py b/tasks/memory_utils.py
@@ -208,20 +208,16 @@ def reset_onnx_memory_pool() -> bool:
     """
     try:
         import onnxruntime as ort
+        from util import provider
 
         # Force garbage collection first
         gc.collect()
 
         # Determine available providers
-        providers = ort.get_available_providers()
-        preferred_provider = None
-
-        if 'CUDAExecutionProvider' in providers:
-            preferred_provider = 'CUDAExecutionProvider'
-            logger.debug("Using CUDA provider for ONNX memory pool reset")
-        elif 'CPUExecutionProvider' in providers:
-            preferred_provider = 'CPUExecutionProvider'
-            logger.debug("Using CPU provider for ONNX memory pool reset")
+        providers = provider.get_available_providers()
+        preferred_provider = providers[0]
+        if preferred_provider:
+            logger.debug("Using %s for ONNX memory pool reset", preferred_provider.split('ExecutionProvider')[0])
         else:
             logger.debug("No suitable ONNX provider found for memory pool reset")
             return False