diff --git a/Dockerfile b/Dockerfile index b91429fd..7f5df4b8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,8 +3,9 @@ # Supports both CPU (ubuntu:24.04) and GPU (nvidia/cuda:12.8.1-cudnn-runtime-ubuntu24.04) builds # # Build examples: -# CPU: docker build -t audiomuse-ai . -# GPU: docker build --build-arg BASE_IMAGE=nvidia/cuda:12.8.1-cudnn-runtime-ubuntu24.04 -t audiomuse-ai-gpu . +# CPU: docker build -t audiomuse-ai . +# OPENVINO: docker build --build-arg OPENVINO=true -t audiomuse-ai-openvino +# GPU: docker build --build-arg BASE_IMAGE=nvidia/cuda:12.8.1-cudnn-runtime-ubuntu24.04 -t audiomuse-ai-gpu . ARG BASE_IMAGE=ubuntu:24.04 @@ -69,6 +70,7 @@ RUN set -eux; \ FROM ${BASE_IMAGE} AS base ARG BASE_IMAGE +ARG OPENVINO=false SHELL ["/bin/bash", "-c"] @@ -107,12 +109,31 @@ RUN set -ux; \ apt-get autoremove -y || true && \ rm -f /usr/lib/python3.*/EXTERNALLY-MANAGED +# Install Intel GPU drivers for OpenVINO GPU support (when OPENVINO=true) +RUN if [ "$OPENVINO" = "true" ]; then \ + echo "Installing Intel GPU drivers for OpenVINO..." && \ + apt-get update && \ + apt-get install -y --no-install-recommends wget gpg && \ + wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \ + gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \ + echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu noble client" | \ + tee /etc/apt/sources.list.d/intel-gpu-noble.list && \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + ocl-icd-libopencl1 \ + intel-opencl-icd \ + intel-level-zero-gpu \ + level-zero && \ + rm -rf /var/lib/apt/lists/*; \ + fi + # ============================================================================ # Stage 3: Libraries - Python packages installation # ============================================================================ FROM base AS libraries ARG BASE_IMAGE +ARG OPENVINO=false WORKDIR /app @@ -126,6 +147,9 @@ COPY requirements/ /app/requirements/ RUN if [[ "$BASE_IMAGE" =~ ^nvidia/cuda: ]]; then \ echo "NVIDIA base image detected: installing GPU packages (cupy, cuml, onnxruntime-gpu, voyager, torch+cuda)"; \ uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/gpu.txt -r /app/requirements/common.txt || exit 1; \ + elif [[ "$OPENVINO" == true ]]; then\ + echo "OpenVINO base image detected: installing OpenVINO runtime packages (onnxruntime-openvino)"; \ + uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/openvino.txt -r /app/requirements/common.txt || exit 1; \ else \ echo "CPU base image: installing all packages together for dependency resolution"; \ uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/cpu.txt -r /app/requirements/common.txt || exit 1; \ diff --git a/Dockerfile-noavx2 b/Dockerfile-noavx2 index 8fa53849..0c3a5eed 100644 --- a/Dockerfile-noavx2 +++ b/Dockerfile-noavx2 @@ -124,6 +124,9 @@ COPY requirements/ /app/requirements/ RUN if [[ "$BASE_IMAGE" =~ ^nvidia/cuda: ]]; then \ echo "NVIDIA base image detected: installing GPU packages (cupy, cuml, onnxruntime-gpu, voyager, torch+cuda)"; \ uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/gpu.txt -r /app/requirements/common-noavx2.txt || exit 1; \ + elif [[ "$BASEIMAGE" =~ ^openvino: ]]; then\ + echo "OpenVINO base image detected: installing OpenVINO runtime packages (onnxruntime-openvino)" \ + uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/openvino.txt -r /app/requirements/common.txt || exit 1; \ else \ echo "CPU base image: installing all packages together for dependency resolution"; \ uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/cpu-noavx2.txt -r /app/requirements/common-noavx2.txt || exit 1; \ diff --git a/deployment/.env.example b/deployment/.env.example index ed0a59d6..ffde4383 100644 --- a/deployment/.env.example +++ b/deployment/.env.example @@ -94,6 +94,11 @@ USE_GPU_CLUSTERING=false # Default: true CLAP_ENABLED=true +# --- OpenVINO Acceleration --- +RENDER_GID= # render group ID (use `stat -c "%g" /dev/dri/renderD128` on host to verify) +OPENVINO_CONFIG_JSON_PATH= # path to have openvino load config https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#load_config +OPENVINO_DEVICE_TYPE=auto # device selection https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#device_type + # ── Authentication (optional) ───────────────────────────────────────────────── # Leave ALL of these blank/unset to disable authentication entirely (default). # Set ALL THREE (AUDIOMUSE_USER, AUDIOMUSE_PASSWORD, API_TOKEN) to enable auth. diff --git a/requirements/openvino.txt b/requirements/openvino.txt new file mode 100644 index 00000000..b330c2f1 --- /dev/null +++ b/requirements/openvino.txt @@ -0,0 +1,2 @@ +onnxruntime==1.19.2 +onnxruntime-openvino \ No newline at end of file diff --git a/student_clap/data/clap_embedder.py b/student_clap/data/clap_embedder.py index 697e17d2..8882452d 100644 --- a/student_clap/data/clap_embedder.py +++ b/student_clap/data/clap_embedder.py @@ -10,6 +10,7 @@ import librosa import onnxruntime as ort from typing import Tuple, Optional +from util import provider logger = logging.getLogger(__name__) @@ -53,17 +54,14 @@ def __init__(self, model_path: str): #sess_options.inter_op_num_threads = 2 # Parallel layers # Use CUDA if available, otherwise CPU - available_providers = ort.get_available_providers() - if 'CUDAExecutionProvider' in available_providers: - providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] - logger.info(f"CLAP model loaded: {model_path}") - logger.info(f"✅ Using CUDA for ONNX teacher model") - else: - providers = ['CPUExecutionProvider'] - logger.info(f"CLAP model loaded: {model_path}") + available_providers = provider.get_available_providers() + logger.info(f"✅ Using %s for ONNX teacher text model", + [provider.split('ExecutionProvider')[0] for provider in available_providers]) + elif len(available_providers) == 1: # only CPUExecutionProvider logger.info(f"✅ Using optimized CPU inference (8 threads)") logger.info(f" Performance: ~325ms/segment vs 713ms with CoreML") logger.info(f" Reason: Only 24% of ops supported by CoreML GPU, context switching overhead too high") + logger.info(f"CLAP model loaded: {model_path}") self.session = ort.InferenceSession( model_path, diff --git a/student_clap/data/clap_text_embedder.py b/student_clap/data/clap_text_embedder.py index 07b11dac..12520fa1 100644 --- a/student_clap/data/clap_text_embedder.py +++ b/student_clap/data/clap_text_embedder.py @@ -3,6 +3,7 @@ import numpy as np import onnxruntime as ort from typing import List +from util import provider logger = logging.getLogger(__name__) @@ -13,15 +14,10 @@ def __init__(self, model_path: str): sess_options = ort.SessionOptions() sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL sess_options.log_severity_level = 3 - available_providers = ort.get_available_providers() - if 'CUDAExecutionProvider' in available_providers: - providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] - logger.info(f"CLAP text model loaded: {model_path}") - logger.info(f"✅ Using CUDA for ONNX teacher text model") - else: - providers = ['CPUExecutionProvider'] - logger.info(f"CLAP text model loaded: {model_path}") - logger.info(f"✅ Using CPU for ONNX teacher text model") + available_providers = provider.get_available_providers() + logger.info(f"CLAP text model loaded: {model_path}") + logger.info(f"✅ Using %s for ONNX teacher text model", + [provider.split('ExecutionProvider')[0] for provider in available_providers]) self.session = ort.InferenceSession( model_path, sess_options=sess_options, diff --git a/tasks/analysis.py b/tasks/analysis.py index 404c54a9..ea5bf2a5 100644 --- a/tasks/analysis.py +++ b/tasks/analysis.py @@ -11,6 +11,7 @@ import uuid import traceback import gc +from typing import Any from pydub import AudioSegment from tempfile import NamedTemporaryFile @@ -63,6 +64,7 @@ SessionRecycler, comprehensive_memory_cleanup ) +from util import provider from psycopg2 import OperationalError @@ -380,25 +382,7 @@ def analyze_track(file_path, mood_labels_list, model_paths, onnx_sessions=None): should_cleanup_sessions = False # Configure provider options for GPU memory management (used for main and secondary models) - available_providers = ort.get_available_providers() - if 'CUDAExecutionProvider' in available_providers: - # Get GPU device ID from environment or default to 0 - gpu_device_id = 0 - cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '') - if cuda_visible and cuda_visible != '-1': - gpu_device_id = 0 - - cuda_options = { - 'device_id': gpu_device_id, - 'arena_extend_strategy': 'kSameAsRequested', # Prevent memory fragmentation - 'cudnn_conv_algo_search': 'EXHAUSTIVE', - 'do_copy_in_default_stream': True, - } - provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})] - logger.info(f"CUDA provider available - attempting to use GPU for analysis (device_id={gpu_device_id})") - else: - provider_options = [('CPUExecutionProvider', {})] - logger.info("CUDA provider not available - using CPU only") + provider_options = get_provider_options(cuda_do_copy_in_default_stream=True) try: # Use pre-loaded sessions if provided, otherwise load per-song @@ -715,22 +699,8 @@ def get_missing_mulan_track_ids(track_ids): if onnx_sessions is None: logger.info(f"Lazy-loading MusiCNN models for album: {album_name}") onnx_sessions = {} - available_providers = ort.get_available_providers() - - if 'CUDAExecutionProvider' in available_providers: - gpu_device_id = 0 - cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '') - if cuda_visible and cuda_visible != '-1': - gpu_device_id = 0 - cuda_options = { - 'device_id': gpu_device_id, - 'arena_extend_strategy': 'kSameAsRequested', # Prevent memory fragmentation - 'cudnn_conv_algo_search': 'EXHAUSTIVE', # Find memory-efficient algorithms - 'do_copy_in_default_stream': True, # Better memory sync - } - provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})] - else: - provider_options = [('CPUExecutionProvider', {})] + provider_options = get_provider_options( + cuda_do_copy_in_default_stream=True) try: for model_name, model_path in model_paths.items(): @@ -763,22 +733,9 @@ def get_missing_mulan_track_ids(track_ids): # Recreate sessions onnx_sessions = {} - available_providers = ort.get_available_providers() - - if 'CUDAExecutionProvider' in available_providers: - gpu_device_id = 0 - cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '') - if cuda_visible and cuda_visible != '-1': - gpu_device_id = 0 - cuda_options = { - 'device_id': gpu_device_id, - 'arena_extend_strategy': 'kSameAsRequested', # Prevent memory fragmentation - 'cudnn_conv_algo_search': 'EXHAUSTIVE', - 'do_copy_in_default_stream': True, - } - provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})] - else: - provider_options = [('CPUExecutionProvider', {})] + provider_options = get_provider_options( + cuda_do_copy_in_default_stream=True + ) try: for model_name, model_path in model_paths.items(): @@ -1309,3 +1266,37 @@ def monitor_and_clear_jobs(): logger.critical(f"FATAL ERROR: Analysis failed: {e}", exc_info=True) log_and_update_main(f"❌ Main analysis failed: {e}", current_progress, task_state=TASK_STATUS_FAILURE, error_message=str(e), traceback=traceback.format_exc()) raise + + +def get_provider_options(cuda_do_copy_in_default_stream: bool = False, + cuda_conv_algo_search_mode: str = 'EXHAUSTIVE') -> list[tuple[str, dict[str, Any]]]: + provider_options = [('CPUExecutionProvider', {})] + available_providers = provider.get_available_providers() + if 'OpenVINOExecutionProvider' in available_providers: + device_type = os.environ.get('OPENVINO_DEVICE_TYPE', 'GPU') + vino_options = { + 'device_type': device_type, + 'num_of_threads': int(os.environ.get('OPENVINO_NUM_OF_THREADS', '2')), + 'num_streams': int(os.environ.get('OPENVINO_NUM_STREAMS', '1')) + } + if os.path.exists(os.environ.get('OPENVINO_CONFIG_JSON_PATH', '')): + vino_options['load_config'] = os.environ.get('OPENVINO_CONFIG_JSON_PATH') + provider_options.insert(0, ('OpenVINOExecutionProvider', vino_options)) + logger.info("OpenVINO provider available - Attempting to use OpenVINO for analysis...") + if 'CUDAExecutionProvider' in available_providers: + # Get GPU device ID from environment or default to 0 + gpu_device_id = 0 + cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '') + if cuda_visible and cuda_visible != '-1': + gpu_device_id = 0 + + cuda_options = { + 'device_id': gpu_device_id, + 'arena_extend_strategy': 'kSameAsRequested', # Prevent memory fragmentation + 'cudnn_conv_algo_search': cuda_conv_algo_search_mode, + } + if cuda_do_copy_in_default_stream: + cuda_options['do_copy_in_default_stream'] = True + provider_options.insert(0,('CUDAExecutionProvider', cuda_options)) + logger.info(f"CUDA provider available - attempting to use GPU for analysis (device_id={gpu_device_id})") + return provider_options \ No newline at end of file diff --git a/tasks/clap_analyzer.py b/tasks/clap_analyzer.py index f993f599..45f39292 100644 --- a/tasks/clap_analyzer.py +++ b/tasks/clap_analyzer.py @@ -30,6 +30,7 @@ from config import AUDIO_LOAD_TIMEOUT except Exception: AUDIO_LOAD_TIMEOUT = None +from tasks import analysis from tasks.memory_utils import cleanup_cuda_memory, handle_onnx_memory_error, comprehensive_memory_cleanup logger = logging.getLogger(__name__) @@ -70,23 +71,7 @@ def _load_audio_model(): session = None # Configure provider options with GPU memory management - available_providers = ort.get_available_providers() - if 'CUDAExecutionProvider' in available_providers: - gpu_device_id = 0 - cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '') - if cuda_visible and cuda_visible != '-1': - gpu_device_id = 0 - - cuda_options = { - 'device_id': gpu_device_id, - 'arena_extend_strategy': 'kSameAsRequested', - 'cudnn_conv_algo_search': 'DEFAULT', - } - provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})] - logger.info(f"CUDA provider available - will attempt to use GPU (device_id={gpu_device_id})") - else: - provider_options = [('CPUExecutionProvider', {})] - logger.info("CUDA provider not available - using CPU only") + provider_options = analysis.get_provider_options(cuda_conv_algo_search_mode='DEFAULT') # Create session try: @@ -143,25 +128,8 @@ def _load_text_model(): # Text model typically runs on CPU in Flask containers session = None - available_providers = ort.get_available_providers() - - if 'CUDAExecutionProvider' in available_providers: - gpu_device_id = 0 - cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '') - if cuda_visible and cuda_visible != '-1': - gpu_device_id = 0 - - cuda_options = { - 'device_id': gpu_device_id, - 'arena_extend_strategy': 'kSameAsRequested', - 'cudnn_conv_algo_search': 'DEFAULT', - } - provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})] - logger.info(f"CUDA provider available - will attempt to use GPU (device_id={gpu_device_id})") - else: - provider_options = [('CPUExecutionProvider', {})] - logger.info("CUDA provider not available - using CPU only") - + provider_options = analysis.get_provider_options(cuda_conv_algo_search_mode='DEFAULT') + # Create session try: session = ort.InferenceSession( @@ -231,26 +199,7 @@ def _load_onnx_model(): session = None # Configure provider options with GPU memory management - available_providers = ort.get_available_providers() - if 'CUDAExecutionProvider' in available_providers: - # Get GPU device ID from environment or default to 0 - # Docker sets NVIDIA_VISIBLE_DEVICES, CUDA runtime uses CUDA_VISIBLE_DEVICES - gpu_device_id = 0 - cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '') - if cuda_visible and cuda_visible != '-1': - # If CUDA_VISIBLE_DEVICES is set, use first device (already mapped to 0) - gpu_device_id = 0 - - cuda_options = { - 'device_id': gpu_device_id, - 'arena_extend_strategy': 'kSameAsRequested', # Prevent memory fragmentation - 'cudnn_conv_algo_search': 'DEFAULT', - } - provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})] - logger.info(f"CUDA provider available - will attempt to use GPU (device_id={gpu_device_id})") - else: - provider_options = [('CPUExecutionProvider', {})] - logger.info("CUDA provider not available - using CPU only") + provider_options = analysis.get_provider_options(cuda_conv_algo_search_mode='DEFAULT') # Create session with determined providers try: diff --git a/tasks/memory_utils.py b/tasks/memory_utils.py index 1754a407..9148ad0a 100644 --- a/tasks/memory_utils.py +++ b/tasks/memory_utils.py @@ -208,20 +208,16 @@ def reset_onnx_memory_pool() -> bool: """ try: import onnxruntime as ort + from util import provider # Force garbage collection first gc.collect() # Determine available providers - providers = ort.get_available_providers() - preferred_provider = None - - if 'CUDAExecutionProvider' in providers: - preferred_provider = 'CUDAExecutionProvider' - logger.debug("Using CUDA provider for ONNX memory pool reset") - elif 'CPUExecutionProvider' in providers: - preferred_provider = 'CPUExecutionProvider' - logger.debug("Using CPU provider for ONNX memory pool reset") + providers = provider.get_available_providers() + preferred_provider = providers[0] + if preferred_provider: + logger.debug("Using %s for ONNX memory pool reset", preferred_provider.split('ExecutionProvider')[0]) else: logger.debug("No suitable ONNX provider found for memory pool reset") return False diff --git a/tasks/mulan_analyzer.py b/tasks/mulan_analyzer.py index f6171607..9778004f 100644 --- a/tasks/mulan_analyzer.py +++ b/tasks/mulan_analyzer.py @@ -21,6 +21,7 @@ from typing import Tuple, Optional from transformers import AutoTokenizer from tasks.memory_utils import cleanup_cuda_memory, cleanup_onnx_session, handle_onnx_memory_error +from util import provider logger = logging.getLogger(__name__) @@ -66,12 +67,9 @@ def _load_mulan_models(load_text_models=False): logger.info("MuLan: Using ONNX Runtime automatic thread management") # Select execution provider (CPU or CUDA) - providers = ['CPUExecutionProvider'] - if ort.get_available_providers() and 'CUDAExecutionProvider' in ort.get_available_providers(): - providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] - logger.info("CUDA available - using GPU acceleration") - else: - logger.info("Using CPU execution") + providers = provider.get_available_providers() + logger.info("Using %s acceleration", + [provider.split('ExecutionProvider')[0] for provider in providers]) # Load audio encoder (with external data file) logger.info(f"Loading audio encoder: {config.AUDIO_MODEL_PATH}") @@ -172,9 +170,7 @@ def initialize_mulan_text_models(): # sess_options.intra_op_num_threads = num_threads # sess_options.inter_op_num_threads = num_threads - providers = ['CPUExecutionProvider'] - if ort.get_available_providers() and 'CUDAExecutionProvider' in ort.get_available_providers(): - providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] + providers = provider.get_available_providers() # Load text encoder _text_session = ort.InferenceSession( diff --git a/tests/unit/test_analysis.py b/tests/unit/test_analysis.py index 52e72137..d4737bfb 100644 --- a/tests/unit/test_analysis.py +++ b/tests/unit/test_analysis.py @@ -1035,7 +1035,7 @@ class TestOOMFallback: @patch('tasks.analysis.librosa.beat.beat_track') @patch('tasks.analysis.librosa.feature.melspectrogram') @patch('tasks.analysis.robust_load_audio_with_fallback') - @patch('tasks.analysis.ort.get_available_providers') + @patch('tasks.analysis.provider.get_available_providers') def test_embedding_oom_fallback_to_cpu(self, mock_providers, mock_audio_load, mock_mel, mock_beat, mock_rms, mock_chroma, mock_onnx_session): """Test GPU OOM during embedding inference triggers CPU fallback @@ -1123,7 +1123,7 @@ def create_session(model_path, providers=None, provider_options=None): @patch('tasks.analysis.librosa.beat.beat_track') @patch('tasks.analysis.librosa.feature.melspectrogram') @patch('tasks.analysis.robust_load_audio_with_fallback') - @patch('tasks.analysis.ort.get_available_providers') + @patch('tasks.analysis.provider.get_available_providers') def test_prediction_oom_fallback_to_cpu(self, mock_providers, mock_audio_load, mock_mel, mock_beat, mock_rms, mock_chroma, mock_onnx_session): """Test GPU OOM during prediction inference triggers CPU fallback @@ -1208,7 +1208,89 @@ def create_session(model_path, providers=None, provider_options=None): @patch('tasks.analysis.librosa.beat.beat_track') @patch('tasks.analysis.librosa.feature.melspectrogram') @patch('tasks.analysis.robust_load_audio_with_fallback') - @patch('tasks.analysis.ort.get_available_providers') + @patch('tasks.analysis.provider.get_available_providers') + def test_secondary_model_oom_fallback_to_cpu(self, mock_providers, mock_audio_load, mock_mel, + mock_beat, mock_rms, mock_chroma, mock_onnx_session): + """Test GPU OOM during secondary model inference triggers CPU fallback + + TESTS: OOM detection and automatic CPU fallback for secondary models + """ + mock_providers.return_value = ['CUDAExecutionProvider', 'CPUExecutionProvider'] + + mock_audio = np.random.rand(16000) + mock_audio_load.return_value = (mock_audio, 16000) + + mock_beat.return_value = (120.0, np.array([0, 100])) + mock_rms.return_value = np.array([[0.5]]) + mock_chroma.return_value = np.random.rand(12, 100) + mock_mel.return_value = np.random.rand(96, 1000) + + gpu_session_call_count = [0] + cpu_session_call_count = [0] + + def gpu_run(output_names, feed_dict): + gpu_session_call_count[0] += 1 + # Make secondary models OOM (after embedding and prediction) + if gpu_session_call_count[0] > 2: + import onnxruntime as ort + raise ort.capi.onnxruntime_pybind11_state.RuntimeException( + "Failed to allocate memory" + ) + return [np.random.rand(5, 200) if gpu_session_call_count[0] <= 2 else np.random.rand(5, 2)] + + def cpu_run(output_names, feed_dict): + cpu_session_call_count[0] += 1 + return [np.random.rand(5, 2)] + + sessions_created = [] + + def create_session(model_path, providers=None, provider_options=None): + mock_session = Mock() + mock_input = Mock() + mock_input.name = 'input' + mock_output = Mock() + mock_output.name = 'output' + mock_session.get_inputs.return_value = [mock_input] + mock_session.get_outputs.return_value = [mock_output] + + if isinstance(providers, list) and 'CPUExecutionProvider' in providers and len(providers) == 1: + mock_session.run.side_effect = cpu_run + sessions_created.append('CPU') + else: + mock_session.run.side_effect = gpu_run + sessions_created.append('GPU') + + return mock_session + + mock_onnx_session.side_effect = create_session + + mood_labels = ['happy'] + model_paths = { + 'embedding': '/path/to/embedding.onnx', + 'prediction': '/path/to/prediction.onnx', + 'danceable': '/path/to/danceable.onnx', + 'aggressive': '/path/to/aggressive.onnx', + 'happy': '/path/to/happy.onnx', + 'party': '/path/to/party.onnx', + 'relaxed': '/path/to/relaxed.onnx', + 'sad': '/path/to/sad.onnx' + } + + result, embeddings = analyze_track('test.mp3', mood_labels, model_paths) + + assert result is not None + assert embeddings is not None + # Verify CPU fallback sessions were created for secondary models + assert 'CPU' in sessions_created + assert cpu_session_call_count[0] > 0 + + @patch('tasks.analysis.ort.InferenceSession') + @patch('tasks.analysis.librosa.feature.chroma_stft') + @patch('tasks.analysis.librosa.feature.rms') + @patch('tasks.analysis.librosa.beat.beat_track') + @patch('tasks.analysis.librosa.feature.melspectrogram') + @patch('tasks.analysis.robust_load_audio_with_fallback') + @patch('tasks.analysis.provider.get_available_providers') def test_non_oom_exception_is_reraised(self, mock_providers, mock_audio_load, mock_mel, mock_beat, mock_rms, mock_chroma, mock_onnx_session): """Test non-OOM exceptions are re-raised (not caught by OOM handler) @@ -1267,7 +1349,7 @@ def gpu_run(output_names, feed_dict): @patch('tasks.analysis.librosa.beat.beat_track') @patch('tasks.analysis.librosa.feature.melspectrogram') @patch('tasks.analysis.robust_load_audio_with_fallback') - @patch('tasks.analysis.ort.get_available_providers') + @patch('tasks.analysis.provider.get_available_providers') def test_successful_gpu_inference_no_fallback(self, mock_providers, mock_audio_load, mock_mel, mock_beat, mock_rms, mock_chroma, mock_onnx_session): """Test successful GPU inference doesn't trigger CPU fallback diff --git a/util/__init__.py b/util/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/util/provider.py b/util/provider.py new file mode 100644 index 00000000..244a14c9 --- /dev/null +++ b/util/provider.py @@ -0,0 +1,22 @@ +from functools import cache +import logging + +import onnxruntime as ort + +logger = logging.getLogger(__name__) + + +@cache +def get_available_providers() -> list[str]: + """ + Filters out ONNXRuntime providers to ones supported by Audiomuse-AI + """ + available_providers = ort.get_available_providers() + providers = ['CPUExecutionProvider'] + if 'OpenVINOExecutionProvider' in available_providers: + providers.insert(0, 'OpenVINOExecutionProvider') + if 'CUDAExecutionProvider' in available_providers: + providers.insert(0, 'CUDAExecutionProvider') + logger.info("Providers made available: %s", + [provider.split('ExecutionProvider')[0] for provider in available_providers]) + return providers \ No newline at end of file