diff --git a/Dockerfile b/Dockerfile
index b91429fd..7f5df4b8 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,8 +3,9 @@
 # Supports both CPU (ubuntu:24.04) and GPU (nvidia/cuda:12.8.1-cudnn-runtime-ubuntu24.04) builds
 #
 # Build examples:
-#   CPU:  docker build -t audiomuse-ai .
-#   GPU:  docker build --build-arg BASE_IMAGE=nvidia/cuda:12.8.1-cudnn-runtime-ubuntu24.04 -t audiomuse-ai-gpu .
+#      CPU:  docker build -t audiomuse-ai .
+# OPENVINO:  docker build --build-arg OPENVINO=true -t audiomuse-ai-openvino
+#      GPU:  docker build --build-arg BASE_IMAGE=nvidia/cuda:12.8.1-cudnn-runtime-ubuntu24.04 -t audiomuse-ai-gpu .
 
 ARG BASE_IMAGE=ubuntu:24.04
 
@@ -69,6 +70,7 @@ RUN set -eux; \
 FROM ${BASE_IMAGE} AS base
 
 ARG BASE_IMAGE
+ARG OPENVINO=false
 
 SHELL ["/bin/bash", "-c"]
 
@@ -107,12 +109,31 @@ RUN set -ux; \
     apt-get autoremove -y || true && \
     rm -f /usr/lib/python3.*/EXTERNALLY-MANAGED
 
+# Install Intel GPU drivers for OpenVINO GPU support (when OPENVINO=true)
+RUN if [ "$OPENVINO" = "true" ]; then \
+        echo "Installing Intel GPU drivers for OpenVINO..." && \
+        apt-get update && \
+        apt-get install -y --no-install-recommends wget gpg && \
+        wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
+            gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \
+        echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu noble client" | \
+            tee /etc/apt/sources.list.d/intel-gpu-noble.list && \
+        apt-get update && \
+        apt-get install -y --no-install-recommends \
+            ocl-icd-libopencl1 \
+            intel-opencl-icd \
+            intel-level-zero-gpu \
+            level-zero && \
+        rm -rf /var/lib/apt/lists/*; \
+    fi
+
 # ============================================================================
 # Stage 3: Libraries - Python packages installation
 # ============================================================================
 FROM base AS libraries
 
 ARG BASE_IMAGE
+ARG OPENVINO=false
 
 WORKDIR /app
 
@@ -126,6 +147,9 @@ COPY requirements/ /app/requirements/
 RUN if [[ "$BASE_IMAGE" =~ ^nvidia/cuda: ]]; then \
         echo "NVIDIA base image detected: installing GPU packages (cupy, cuml, onnxruntime-gpu, voyager, torch+cuda)"; \
         uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/gpu.txt -r /app/requirements/common.txt || exit 1; \
+    elif [[ "$OPENVINO" == true ]]; then\
+        echo "OpenVINO base image detected: installing OpenVINO runtime packages (onnxruntime-openvino)"; \
+        uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/openvino.txt -r /app/requirements/common.txt || exit 1; \
     else \
         echo "CPU base image: installing all packages together for dependency resolution"; \
         uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/cpu.txt -r /app/requirements/common.txt || exit 1; \
diff --git a/Dockerfile-noavx2 b/Dockerfile-noavx2
index 8fa53849..0c3a5eed 100644
--- a/Dockerfile-noavx2
+++ b/Dockerfile-noavx2
@@ -124,6 +124,9 @@ COPY requirements/ /app/requirements/
 RUN if [[ "$BASE_IMAGE" =~ ^nvidia/cuda: ]]; then \
         echo "NVIDIA base image detected: installing GPU packages (cupy, cuml, onnxruntime-gpu, voyager, torch+cuda)"; \
         uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/gpu.txt -r /app/requirements/common-noavx2.txt || exit 1; \
+    elif [[ "$BASEIMAGE" =~ ^openvino: ]]; then\
+        echo "OpenVINO base image detected: installing OpenVINO runtime packages (onnxruntime-openvino)" \
+        uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/openvino.txt -r /app/requirements/common.txt || exit 1; \
     else \
         echo "CPU base image: installing all packages together for dependency resolution"; \
         uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/cpu-noavx2.txt -r /app/requirements/common-noavx2.txt || exit 1; \
diff --git a/deployment/.env.example b/deployment/.env.example
index ed0a59d6..ffde4383 100644
--- a/deployment/.env.example
+++ b/deployment/.env.example
@@ -94,6 +94,11 @@ USE_GPU_CLUSTERING=false
 # Default: true
 CLAP_ENABLED=true
 
+# --- OpenVINO Acceleration ---
+RENDER_GID=  # render group ID (use `stat -c "%g" /dev/dri/renderD128` on host to verify)
+OPENVINO_CONFIG_JSON_PATH=  # path to have openvino load config https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#load_config
+OPENVINO_DEVICE_TYPE=auto  # device selection https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#device_type
+
 # ── Authentication (optional) ─────────────────────────────────────────────────
 # Leave ALL of these blank/unset to disable authentication entirely (default).
 # Set ALL THREE (AUDIOMUSE_USER, AUDIOMUSE_PASSWORD, API_TOKEN) to enable auth.
diff --git a/requirements/openvino.txt b/requirements/openvino.txt
new file mode 100644
index 00000000..b330c2f1
--- /dev/null
+++ b/requirements/openvino.txt
@@ -0,0 +1,2 @@
+onnxruntime==1.19.2
+onnxruntime-openvino
\ No newline at end of file
diff --git a/student_clap/data/clap_embedder.py b/student_clap/data/clap_embedder.py
index 697e17d2..8882452d 100644
--- a/student_clap/data/clap_embedder.py
+++ b/student_clap/data/clap_embedder.py
@@ -10,6 +10,7 @@
 import librosa
 import onnxruntime as ort
 from typing import Tuple, Optional
+from util import provider
 
 logger = logging.getLogger(__name__)
 
@@ -53,17 +54,14 @@ def __init__(self, model_path: str):
         #sess_options.inter_op_num_threads = 2  # Parallel layers
         
         # Use CUDA if available, otherwise CPU
-        available_providers = ort.get_available_providers()
-        if 'CUDAExecutionProvider' in available_providers:
-            providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-            logger.info(f"CLAP model loaded: {model_path}")
-            logger.info(f"✅ Using CUDA for ONNX teacher model")
-        else:
-            providers = ['CPUExecutionProvider']
-            logger.info(f"CLAP model loaded: {model_path}")
+        available_providers = provider.get_available_providers()
+        logger.info(f"✅ Using %s for ONNX teacher text model",
+                    [provider.split('ExecutionProvider')[0] for provider in available_providers])
+        elif len(available_providers) == 1:  # only CPUExecutionProvider
             logger.info(f"✅ Using optimized CPU inference (8 threads)")
             logger.info(f"   Performance: ~325ms/segment vs 713ms with CoreML")
             logger.info(f"   Reason: Only 24% of ops supported by CoreML GPU, context switching overhead too high")
+        logger.info(f"CLAP model loaded: {model_path}")
 
         self.session = ort.InferenceSession(
             model_path,
diff --git a/student_clap/data/clap_text_embedder.py b/student_clap/data/clap_text_embedder.py
index 07b11dac..12520fa1 100644
--- a/student_clap/data/clap_text_embedder.py
+++ b/student_clap/data/clap_text_embedder.py
@@ -3,6 +3,7 @@
 import numpy as np
 import onnxruntime as ort
 from typing import List
+from util import provider
 
 logger = logging.getLogger(__name__)
 
@@ -13,15 +14,10 @@ def __init__(self, model_path: str):
         sess_options = ort.SessionOptions()
         sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
         sess_options.log_severity_level = 3
-        available_providers = ort.get_available_providers()
-        if 'CUDAExecutionProvider' in available_providers:
-            providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-            logger.info(f"CLAP text model loaded: {model_path}")
-            logger.info(f"✅ Using CUDA for ONNX teacher text model")
-        else:
-            providers = ['CPUExecutionProvider']
-            logger.info(f"CLAP text model loaded: {model_path}")
-            logger.info(f"✅ Using CPU for ONNX teacher text model")
+        available_providers = provider.get_available_providers()
+        logger.info(f"CLAP text model loaded: {model_path}")
+        logger.info(f"✅ Using %s for ONNX teacher text model",
+                    [provider.split('ExecutionProvider')[0] for provider in available_providers])
         self.session = ort.InferenceSession(
             model_path,
             sess_options=sess_options,
diff --git a/tasks/analysis.py b/tasks/analysis.py
index 404c54a9..ea5bf2a5 100644
--- a/tasks/analysis.py
+++ b/tasks/analysis.py
@@ -11,6 +11,7 @@
 import uuid
 import traceback
 import gc
+from typing import Any
 from pydub import AudioSegment
 from tempfile import NamedTemporaryFile
 
@@ -63,6 +64,7 @@
     SessionRecycler,
     comprehensive_memory_cleanup
 )
+from util import provider
 
 
 from psycopg2 import OperationalError
@@ -380,25 +382,7 @@ def analyze_track(file_path, mood_labels_list, model_paths, onnx_sessions=None):
     should_cleanup_sessions = False
     
     # Configure provider options for GPU memory management (used for main and secondary models)
-    available_providers = ort.get_available_providers()
-    if 'CUDAExecutionProvider' in available_providers:
-        # Get GPU device ID from environment or default to 0
-        gpu_device_id = 0
-        cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '')
-        if cuda_visible and cuda_visible != '-1':
-            gpu_device_id = 0
-        
-        cuda_options = {
-            'device_id': gpu_device_id,
-            'arena_extend_strategy': 'kSameAsRequested',  # Prevent memory fragmentation
-            'cudnn_conv_algo_search': 'EXHAUSTIVE',
-            'do_copy_in_default_stream': True,
-        }
-        provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})]
-        logger.info(f"CUDA provider available - attempting to use GPU for analysis (device_id={gpu_device_id})")
-    else:
-        provider_options = [('CPUExecutionProvider', {})]
-        logger.info("CUDA provider not available - using CPU only")
+    provider_options = get_provider_options(cuda_do_copy_in_default_stream=True)
     
     try:
         # Use pre-loaded sessions if provided, otherwise load per-song
@@ -715,22 +699,8 @@ def get_missing_mulan_track_ids(track_ids):
                         if onnx_sessions is None:
                             logger.info(f"Lazy-loading MusiCNN models for album: {album_name}")
                             onnx_sessions = {}
-                            available_providers = ort.get_available_providers()
-                            
-                            if 'CUDAExecutionProvider' in available_providers:
-                                gpu_device_id = 0
-                                cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '')
-                                if cuda_visible and cuda_visible != '-1':
-                                    gpu_device_id = 0
-                                cuda_options = {
-                                    'device_id': gpu_device_id,
-                                    'arena_extend_strategy': 'kSameAsRequested',  # Prevent memory fragmentation
-                                    'cudnn_conv_algo_search': 'EXHAUSTIVE',      # Find memory-efficient algorithms
-                                    'do_copy_in_default_stream': True,           # Better memory sync
-                                }
-                                provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})]
-                            else:
-                                provider_options = [('CPUExecutionProvider', {})]
+                            provider_options = get_provider_options(
+                                cuda_do_copy_in_default_stream=True)
                             
                             try:
                                 for model_name, model_path in model_paths.items():
@@ -763,22 +733,9 @@ def get_missing_mulan_track_ids(track_ids):
                             
                             # Recreate sessions
                             onnx_sessions = {}
-                            available_providers = ort.get_available_providers()
-                            
-                            if 'CUDAExecutionProvider' in available_providers:
-                                gpu_device_id = 0
-                                cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '')
-                                if cuda_visible and cuda_visible != '-1':
-                                    gpu_device_id = 0
-                                cuda_options = {
-                                    'device_id': gpu_device_id,
-                                    'arena_extend_strategy': 'kSameAsRequested',  # Prevent memory fragmentation
-                                    'cudnn_conv_algo_search': 'EXHAUSTIVE',
-                                    'do_copy_in_default_stream': True,
-                                }
-                                provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})]
-                            else:
-                                provider_options = [('CPUExecutionProvider', {})]
+                            provider_options = get_provider_options(
+                                cuda_do_copy_in_default_stream=True
+                            )
                             
                             try:
                                 for model_name, model_path in model_paths.items():
@@ -1309,3 +1266,37 @@ def monitor_and_clear_jobs():
             logger.critical(f"FATAL ERROR: Analysis failed: {e}", exc_info=True)
             log_and_update_main(f"❌ Main analysis failed: {e}", current_progress, task_state=TASK_STATUS_FAILURE, error_message=str(e), traceback=traceback.format_exc())
             raise
+
+
+def get_provider_options(cuda_do_copy_in_default_stream: bool = False,
+                         cuda_conv_algo_search_mode: str = 'EXHAUSTIVE') -> list[tuple[str, dict[str, Any]]]:
+    provider_options = [('CPUExecutionProvider', {})]
+    available_providers = provider.get_available_providers()
+    if 'OpenVINOExecutionProvider' in available_providers:
+        device_type = os.environ.get('OPENVINO_DEVICE_TYPE', 'GPU')
+        vino_options = {
+            'device_type': device_type,
+            'num_of_threads': int(os.environ.get('OPENVINO_NUM_OF_THREADS', '2')),
+            'num_streams': int(os.environ.get('OPENVINO_NUM_STREAMS', '1'))
+        }
+        if os.path.exists(os.environ.get('OPENVINO_CONFIG_JSON_PATH', '')):
+            vino_options['load_config'] = os.environ.get('OPENVINO_CONFIG_JSON_PATH')
+        provider_options.insert(0, ('OpenVINOExecutionProvider', vino_options))
+        logger.info("OpenVINO provider available - Attempting to use OpenVINO for analysis...")
+    if 'CUDAExecutionProvider' in available_providers:
+        # Get GPU device ID from environment or default to 0
+        gpu_device_id = 0
+        cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '')
+        if cuda_visible and cuda_visible != '-1':
+            gpu_device_id = 0
+        
+        cuda_options = {
+            'device_id': gpu_device_id,
+            'arena_extend_strategy': 'kSameAsRequested',  # Prevent memory fragmentation
+            'cudnn_conv_algo_search': cuda_conv_algo_search_mode,
+        }
+        if cuda_do_copy_in_default_stream:
+            cuda_options['do_copy_in_default_stream'] = True
+        provider_options.insert(0,('CUDAExecutionProvider', cuda_options))
+        logger.info(f"CUDA provider available - attempting to use GPU for analysis (device_id={gpu_device_id})")
+    return provider_options
\ No newline at end of file
diff --git a/tasks/clap_analyzer.py b/tasks/clap_analyzer.py
index f993f599..45f39292 100644
--- a/tasks/clap_analyzer.py
+++ b/tasks/clap_analyzer.py
@@ -30,6 +30,7 @@
     from config import AUDIO_LOAD_TIMEOUT
 except Exception:
     AUDIO_LOAD_TIMEOUT = None
+from tasks import analysis
 from tasks.memory_utils import cleanup_cuda_memory, handle_onnx_memory_error, comprehensive_memory_cleanup
 
 logger = logging.getLogger(__name__)
@@ -70,23 +71,7 @@ def _load_audio_model():
     session = None
     
     # Configure provider options with GPU memory management
-    available_providers = ort.get_available_providers()
-    if 'CUDAExecutionProvider' in available_providers:
-        gpu_device_id = 0
-        cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '')
-        if cuda_visible and cuda_visible != '-1':
-            gpu_device_id = 0
-        
-        cuda_options = {
-            'device_id': gpu_device_id,
-            'arena_extend_strategy': 'kSameAsRequested',
-            'cudnn_conv_algo_search': 'DEFAULT',
-        }
-        provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})]
-        logger.info(f"CUDA provider available - will attempt to use GPU (device_id={gpu_device_id})")
-    else:
-        provider_options = [('CPUExecutionProvider', {})]
-        logger.info("CUDA provider not available - using CPU only")
+    provider_options = analysis.get_provider_options(cuda_conv_algo_search_mode='DEFAULT')
     
     # Create session
     try:
@@ -143,25 +128,8 @@ def _load_text_model():
     
     # Text model typically runs on CPU in Flask containers
     session = None
-    available_providers = ort.get_available_providers()
-    
-    if 'CUDAExecutionProvider' in available_providers:
-        gpu_device_id = 0
-        cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '')
-        if cuda_visible and cuda_visible != '-1':
-            gpu_device_id = 0
-        
-        cuda_options = {
-            'device_id': gpu_device_id,
-            'arena_extend_strategy': 'kSameAsRequested',
-            'cudnn_conv_algo_search': 'DEFAULT',
-        }
-        provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})]
-        logger.info(f"CUDA provider available - will attempt to use GPU (device_id={gpu_device_id})")
-    else:
-        provider_options = [('CPUExecutionProvider', {})]
-        logger.info("CUDA provider not available - using CPU only")
-    
+    provider_options = analysis.get_provider_options(cuda_conv_algo_search_mode='DEFAULT')
+
     # Create session
     try:
         session = ort.InferenceSession(
@@ -231,26 +199,7 @@ def _load_onnx_model():
     session = None
     
     # Configure provider options with GPU memory management
-    available_providers = ort.get_available_providers()
-    if 'CUDAExecutionProvider' in available_providers:
-        # Get GPU device ID from environment or default to 0
-        # Docker sets NVIDIA_VISIBLE_DEVICES, CUDA runtime uses CUDA_VISIBLE_DEVICES
-        gpu_device_id = 0
-        cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '')
-        if cuda_visible and cuda_visible != '-1':
-            # If CUDA_VISIBLE_DEVICES is set, use first device (already mapped to 0)
-            gpu_device_id = 0
-        
-        cuda_options = {
-            'device_id': gpu_device_id,
-            'arena_extend_strategy': 'kSameAsRequested',  # Prevent memory fragmentation
-            'cudnn_conv_algo_search': 'DEFAULT',
-        }
-        provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})]
-        logger.info(f"CUDA provider available - will attempt to use GPU (device_id={gpu_device_id})")
-    else:
-        provider_options = [('CPUExecutionProvider', {})]
-        logger.info("CUDA provider not available - using CPU only")
+    provider_options = analysis.get_provider_options(cuda_conv_algo_search_mode='DEFAULT')
     
     # Create session with determined providers
     try:
diff --git a/tasks/memory_utils.py b/tasks/memory_utils.py
index 1754a407..9148ad0a 100644
--- a/tasks/memory_utils.py
+++ b/tasks/memory_utils.py
@@ -208,20 +208,16 @@ def reset_onnx_memory_pool() -> bool:
     """
     try:
         import onnxruntime as ort
+        from util import provider
         
         # Force garbage collection first
         gc.collect()
         
         # Determine available providers
-        providers = ort.get_available_providers()
-        preferred_provider = None
-        
-        if 'CUDAExecutionProvider' in providers:
-            preferred_provider = 'CUDAExecutionProvider'
-            logger.debug("Using CUDA provider for ONNX memory pool reset")
-        elif 'CPUExecutionProvider' in providers:
-            preferred_provider = 'CPUExecutionProvider'
-            logger.debug("Using CPU provider for ONNX memory pool reset")
+        providers = provider.get_available_providers()
+        preferred_provider = providers[0]
+        if preferred_provider:
+            logger.debug("Using %s for ONNX memory pool reset", preferred_provider.split('ExecutionProvider')[0])
         else:
             logger.debug("No suitable ONNX provider found for memory pool reset")
             return False
diff --git a/tasks/mulan_analyzer.py b/tasks/mulan_analyzer.py
index f6171607..9778004f 100644
--- a/tasks/mulan_analyzer.py
+++ b/tasks/mulan_analyzer.py
@@ -21,6 +21,7 @@
 from typing import Tuple, Optional
 from transformers import AutoTokenizer
 from tasks.memory_utils import cleanup_cuda_memory, cleanup_onnx_session, handle_onnx_memory_error
+from util import provider
 
 logger = logging.getLogger(__name__)
 
@@ -66,12 +67,9 @@ def _load_mulan_models(load_text_models=False):
         logger.info("MuLan: Using ONNX Runtime automatic thread management")
         
         # Select execution provider (CPU or CUDA)
-        providers = ['CPUExecutionProvider']
-        if ort.get_available_providers() and 'CUDAExecutionProvider' in ort.get_available_providers():
-            providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
-            logger.info("CUDA available - using GPU acceleration")
-        else:
-            logger.info("Using CPU execution")
+        providers = provider.get_available_providers()
+        logger.info("Using %s acceleration",
+                    [provider.split('ExecutionProvider')[0] for provider in providers])
         
         # Load audio encoder (with external data file)
         logger.info(f"Loading audio encoder: {config.AUDIO_MODEL_PATH}")
@@ -172,9 +170,7 @@ def initialize_mulan_text_models():
         # sess_options.intra_op_num_threads = num_threads
         # sess_options.inter_op_num_threads = num_threads
         
-        providers = ['CPUExecutionProvider']
-        if ort.get_available_providers() and 'CUDAExecutionProvider' in ort.get_available_providers():
-            providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
+        providers = provider.get_available_providers()
         
         # Load text encoder
         _text_session = ort.InferenceSession(
diff --git a/tests/unit/test_analysis.py b/tests/unit/test_analysis.py
index 52e72137..d4737bfb 100644
--- a/tests/unit/test_analysis.py
+++ b/tests/unit/test_analysis.py
@@ -1035,7 +1035,7 @@ class TestOOMFallback:
     @patch('tasks.analysis.librosa.beat.beat_track')
     @patch('tasks.analysis.librosa.feature.melspectrogram')
     @patch('tasks.analysis.robust_load_audio_with_fallback')
-    @patch('tasks.analysis.ort.get_available_providers')
+    @patch('tasks.analysis.provider.get_available_providers')
     def test_embedding_oom_fallback_to_cpu(self, mock_providers, mock_audio_load, mock_mel, 
                                            mock_beat, mock_rms, mock_chroma, mock_onnx_session):
         """Test GPU OOM during embedding inference triggers CPU fallback
@@ -1123,7 +1123,7 @@ def create_session(model_path, providers=None, provider_options=None):
     @patch('tasks.analysis.librosa.beat.beat_track')
     @patch('tasks.analysis.librosa.feature.melspectrogram')
     @patch('tasks.analysis.robust_load_audio_with_fallback')
-    @patch('tasks.analysis.ort.get_available_providers')
+    @patch('tasks.analysis.provider.get_available_providers')
     def test_prediction_oom_fallback_to_cpu(self, mock_providers, mock_audio_load, mock_mel, 
                                             mock_beat, mock_rms, mock_chroma, mock_onnx_session):
         """Test GPU OOM during prediction inference triggers CPU fallback
@@ -1208,7 +1208,89 @@ def create_session(model_path, providers=None, provider_options=None):
     @patch('tasks.analysis.librosa.beat.beat_track')
     @patch('tasks.analysis.librosa.feature.melspectrogram')
     @patch('tasks.analysis.robust_load_audio_with_fallback')
-    @patch('tasks.analysis.ort.get_available_providers')
+    @patch('tasks.analysis.provider.get_available_providers')
+    def test_secondary_model_oom_fallback_to_cpu(self, mock_providers, mock_audio_load, mock_mel, 
+                                                  mock_beat, mock_rms, mock_chroma, mock_onnx_session):
+        """Test GPU OOM during secondary model inference triggers CPU fallback
+        
+        TESTS: OOM detection and automatic CPU fallback for secondary models
+        """
+        mock_providers.return_value = ['CUDAExecutionProvider', 'CPUExecutionProvider']
+        
+        mock_audio = np.random.rand(16000)
+        mock_audio_load.return_value = (mock_audio, 16000)
+        
+        mock_beat.return_value = (120.0, np.array([0, 100]))
+        mock_rms.return_value = np.array([[0.5]])
+        mock_chroma.return_value = np.random.rand(12, 100)
+        mock_mel.return_value = np.random.rand(96, 1000)
+        
+        gpu_session_call_count = [0]
+        cpu_session_call_count = [0]
+        
+        def gpu_run(output_names, feed_dict):
+            gpu_session_call_count[0] += 1
+            # Make secondary models OOM (after embedding and prediction)
+            if gpu_session_call_count[0] > 2:
+                import onnxruntime as ort
+                raise ort.capi.onnxruntime_pybind11_state.RuntimeException(
+                    "Failed to allocate memory"
+                )
+            return [np.random.rand(5, 200) if gpu_session_call_count[0] <= 2 else np.random.rand(5, 2)]
+        
+        def cpu_run(output_names, feed_dict):
+            cpu_session_call_count[0] += 1
+            return [np.random.rand(5, 2)]
+        
+        sessions_created = []
+        
+        def create_session(model_path, providers=None, provider_options=None):
+            mock_session = Mock()
+            mock_input = Mock()
+            mock_input.name = 'input'
+            mock_output = Mock()
+            mock_output.name = 'output'
+            mock_session.get_inputs.return_value = [mock_input]
+            mock_session.get_outputs.return_value = [mock_output]
+            
+            if isinstance(providers, list) and 'CPUExecutionProvider' in providers and len(providers) == 1:
+                mock_session.run.side_effect = cpu_run
+                sessions_created.append('CPU')
+            else:
+                mock_session.run.side_effect = gpu_run
+                sessions_created.append('GPU')
+            
+            return mock_session
+        
+        mock_onnx_session.side_effect = create_session
+        
+        mood_labels = ['happy']
+        model_paths = {
+            'embedding': '/path/to/embedding.onnx',
+            'prediction': '/path/to/prediction.onnx',
+            'danceable': '/path/to/danceable.onnx',
+            'aggressive': '/path/to/aggressive.onnx',
+            'happy': '/path/to/happy.onnx',
+            'party': '/path/to/party.onnx',
+            'relaxed': '/path/to/relaxed.onnx',
+            'sad': '/path/to/sad.onnx'
+        }
+        
+        result, embeddings = analyze_track('test.mp3', mood_labels, model_paths)
+        
+        assert result is not None
+        assert embeddings is not None
+        # Verify CPU fallback sessions were created for secondary models
+        assert 'CPU' in sessions_created
+        assert cpu_session_call_count[0] > 0
+
+    @patch('tasks.analysis.ort.InferenceSession')
+    @patch('tasks.analysis.librosa.feature.chroma_stft')
+    @patch('tasks.analysis.librosa.feature.rms')
+    @patch('tasks.analysis.librosa.beat.beat_track')
+    @patch('tasks.analysis.librosa.feature.melspectrogram')
+    @patch('tasks.analysis.robust_load_audio_with_fallback')
+    @patch('tasks.analysis.provider.get_available_providers')
     def test_non_oom_exception_is_reraised(self, mock_providers, mock_audio_load, mock_mel, 
                                            mock_beat, mock_rms, mock_chroma, mock_onnx_session):
         """Test non-OOM exceptions are re-raised (not caught by OOM handler)
@@ -1267,7 +1349,7 @@ def gpu_run(output_names, feed_dict):
     @patch('tasks.analysis.librosa.beat.beat_track')
     @patch('tasks.analysis.librosa.feature.melspectrogram')
     @patch('tasks.analysis.robust_load_audio_with_fallback')
-    @patch('tasks.analysis.ort.get_available_providers')
+    @patch('tasks.analysis.provider.get_available_providers')
     def test_successful_gpu_inference_no_fallback(self, mock_providers, mock_audio_load, mock_mel, 
                                                   mock_beat, mock_rms, mock_chroma, mock_onnx_session):
         """Test successful GPU inference doesn't trigger CPU fallback
diff --git a/util/__init__.py b/util/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/util/provider.py b/util/provider.py
new file mode 100644
index 00000000..244a14c9
--- /dev/null
+++ b/util/provider.py
@@ -0,0 +1,22 @@
+from functools import cache
+import logging
+
+import onnxruntime as ort
+
+logger = logging.getLogger(__name__)
+
+
+@cache
+def get_available_providers() -> list[str]:
+    """
+    Filters out ONNXRuntime providers to ones supported by Audiomuse-AI
+    """
+    available_providers = ort.get_available_providers()
+    providers = ['CPUExecutionProvider']
+    if 'OpenVINOExecutionProvider' in available_providers:
+        providers.insert(0, 'OpenVINOExecutionProvider')
+    if 'CUDAExecutionProvider' in available_providers:
+        providers.insert(0, 'CUDAExecutionProvider')
+    logger.info("Providers made available: %s",
+                [provider.split('ExecutionProvider')[0] for provider in available_providers])
+    return providers
\ No newline at end of file