Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 26 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
# Supports both CPU (ubuntu:24.04) and GPU (nvidia/cuda:12.8.1-cudnn-runtime-ubuntu24.04) builds
#
# Build examples:
# CPU: docker build -t audiomuse-ai .
# GPU: docker build --build-arg BASE_IMAGE=nvidia/cuda:12.8.1-cudnn-runtime-ubuntu24.04 -t audiomuse-ai-gpu .
# CPU: docker build -t audiomuse-ai .
# OPENVINO: docker build --build-arg OPENVINO=true -t audiomuse-ai-openvino
# GPU: docker build --build-arg BASE_IMAGE=nvidia/cuda:12.8.1-cudnn-runtime-ubuntu24.04 -t audiomuse-ai-gpu .

ARG BASE_IMAGE=ubuntu:24.04

Expand Down Expand Up @@ -69,6 +70,7 @@ RUN set -eux; \
FROM ${BASE_IMAGE} AS base

ARG BASE_IMAGE
ARG OPENVINO=false

SHELL ["/bin/bash", "-c"]

Expand Down Expand Up @@ -107,12 +109,31 @@ RUN set -ux; \
apt-get autoremove -y || true && \
rm -f /usr/lib/python3.*/EXTERNALLY-MANAGED

# Install Intel GPU drivers for OpenVINO GPU support (when OPENVINO=true)
RUN if [ "$OPENVINO" = "true" ]; then \
echo "Installing Intel GPU drivers for OpenVINO..." && \
apt-get update && \
apt-get install -y --no-install-recommends wget gpg && \
wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg && \
echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu noble client" | \
tee /etc/apt/sources.list.d/intel-gpu-noble.list && \
apt-get update && \
apt-get install -y --no-install-recommends \
ocl-icd-libopencl1 \
intel-opencl-icd \
intel-level-zero-gpu \
level-zero && \
rm -rf /var/lib/apt/lists/*; \
fi

# ============================================================================
# Stage 3: Libraries - Python packages installation
# ============================================================================
FROM base AS libraries

ARG BASE_IMAGE
ARG OPENVINO=false

WORKDIR /app

Expand All @@ -126,6 +147,9 @@ COPY requirements/ /app/requirements/
RUN if [[ "$BASE_IMAGE" =~ ^nvidia/cuda: ]]; then \
echo "NVIDIA base image detected: installing GPU packages (cupy, cuml, onnxruntime-gpu, voyager, torch+cuda)"; \
uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/gpu.txt -r /app/requirements/common.txt || exit 1; \
elif [[ "$OPENVINO" == true ]]; then\
echo "OpenVINO base image detected: installing OpenVINO runtime packages (onnxruntime-openvino)"; \
uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/openvino.txt -r /app/requirements/common.txt || exit 1; \
else \
echo "CPU base image: installing all packages together for dependency resolution"; \
uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/cpu.txt -r /app/requirements/common.txt || exit 1; \
Expand Down
3 changes: 3 additions & 0 deletions Dockerfile-noavx2
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ COPY requirements/ /app/requirements/
RUN if [[ "$BASE_IMAGE" =~ ^nvidia/cuda: ]]; then \
echo "NVIDIA base image detected: installing GPU packages (cupy, cuml, onnxruntime-gpu, voyager, torch+cuda)"; \
uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/gpu.txt -r /app/requirements/common-noavx2.txt || exit 1; \
elif [[ "$BASEIMAGE" =~ ^openvino: ]]; then\
echo "OpenVINO base image detected: installing OpenVINO runtime packages (onnxruntime-openvino)" \
uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/openvino.txt -r /app/requirements/common.txt || exit 1; \
else \
echo "CPU base image: installing all packages together for dependency resolution"; \
uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/cpu-noavx2.txt -r /app/requirements/common-noavx2.txt || exit 1; \
Expand Down
5 changes: 5 additions & 0 deletions deployment/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ USE_GPU_CLUSTERING=false
# Default: true
CLAP_ENABLED=true

# --- OpenVINO Acceleration ---
RENDER_GID= # render group ID (use `stat -c "%g" /dev/dri/renderD128` on host to verify)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

gotta add to config.py still, as well as docker-compose reference files

OPENVINO_CONFIG_JSON_PATH= # path to have openvino load config https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#load_config
OPENVINO_DEVICE_TYPE=auto # device selection https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#device_type

# ── Authentication (optional) ─────────────────────────────────────────────────
# Leave ALL of these blank/unset to disable authentication entirely (default).
# Set ALL THREE (AUDIOMUSE_USER, AUDIOMUSE_PASSWORD, API_TOKEN) to enable auth.
Expand Down
2 changes: 2 additions & 0 deletions requirements/openvino.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
onnxruntime==1.19.2
onnxruntime-openvino
14 changes: 6 additions & 8 deletions student_clap/data/clap_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import librosa
import onnxruntime as ort
from typing import Tuple, Optional
from util import provider

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -53,17 +54,14 @@ def __init__(self, model_path: str):
#sess_options.inter_op_num_threads = 2 # Parallel layers

# Use CUDA if available, otherwise CPU
available_providers = ort.get_available_providers()
if 'CUDAExecutionProvider' in available_providers:
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
logger.info(f"CLAP model loaded: {model_path}")
logger.info(f"✅ Using CUDA for ONNX teacher model")
else:
providers = ['CPUExecutionProvider']
logger.info(f"CLAP model loaded: {model_path}")
available_providers = provider.get_available_providers()
logger.info(f"✅ Using %s for ONNX teacher text model",
[provider.split('ExecutionProvider')[0] for provider in available_providers])
elif len(available_providers) == 1: # only CPUExecutionProvider
logger.info(f"✅ Using optimized CPU inference (8 threads)")
logger.info(f" Performance: ~325ms/segment vs 713ms with CoreML")
logger.info(f" Reason: Only 24% of ops supported by CoreML GPU, context switching overhead too high")
logger.info(f"CLAP model loaded: {model_path}")

self.session = ort.InferenceSession(
model_path,
Expand Down
14 changes: 5 additions & 9 deletions student_clap/data/clap_text_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np
import onnxruntime as ort
from typing import List
from util import provider

logger = logging.getLogger(__name__)

Expand All @@ -13,15 +14,10 @@ def __init__(self, model_path: str):
sess_options = ort.SessionOptions()
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
sess_options.log_severity_level = 3
available_providers = ort.get_available_providers()
if 'CUDAExecutionProvider' in available_providers:
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider']
logger.info(f"CLAP text model loaded: {model_path}")
logger.info(f"✅ Using CUDA for ONNX teacher text model")
else:
providers = ['CPUExecutionProvider']
logger.info(f"CLAP text model loaded: {model_path}")
logger.info(f"✅ Using CPU for ONNX teacher text model")
available_providers = provider.get_available_providers()
logger.info(f"CLAP text model loaded: {model_path}")
logger.info(f"✅ Using %s for ONNX teacher text model",
[provider.split('ExecutionProvider')[0] for provider in available_providers])
self.session = ort.InferenceSession(
model_path,
sess_options=sess_options,
Expand Down
93 changes: 42 additions & 51 deletions tasks/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import uuid
import traceback
import gc
from typing import Any
from pydub import AudioSegment
from tempfile import NamedTemporaryFile

Expand Down Expand Up @@ -63,6 +64,7 @@
SessionRecycler,
comprehensive_memory_cleanup
)
from util import provider


from psycopg2 import OperationalError
Expand Down Expand Up @@ -380,25 +382,7 @@ def analyze_track(file_path, mood_labels_list, model_paths, onnx_sessions=None):
should_cleanup_sessions = False

# Configure provider options for GPU memory management (used for main and secondary models)
available_providers = ort.get_available_providers()
if 'CUDAExecutionProvider' in available_providers:
# Get GPU device ID from environment or default to 0
gpu_device_id = 0
cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '')
if cuda_visible and cuda_visible != '-1':
gpu_device_id = 0

cuda_options = {
'device_id': gpu_device_id,
'arena_extend_strategy': 'kSameAsRequested', # Prevent memory fragmentation
'cudnn_conv_algo_search': 'EXHAUSTIVE',
'do_copy_in_default_stream': True,
}
provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})]
logger.info(f"CUDA provider available - attempting to use GPU for analysis (device_id={gpu_device_id})")
else:
provider_options = [('CPUExecutionProvider', {})]
logger.info("CUDA provider not available - using CPU only")
provider_options = get_provider_options(cuda_do_copy_in_default_stream=True)

try:
# Use pre-loaded sessions if provided, otherwise load per-song
Expand Down Expand Up @@ -715,22 +699,8 @@ def get_missing_mulan_track_ids(track_ids):
if onnx_sessions is None:
logger.info(f"Lazy-loading MusiCNN models for album: {album_name}")
onnx_sessions = {}
available_providers = ort.get_available_providers()

if 'CUDAExecutionProvider' in available_providers:
gpu_device_id = 0
cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '')
if cuda_visible and cuda_visible != '-1':
gpu_device_id = 0
cuda_options = {
'device_id': gpu_device_id,
'arena_extend_strategy': 'kSameAsRequested', # Prevent memory fragmentation
'cudnn_conv_algo_search': 'EXHAUSTIVE', # Find memory-efficient algorithms
'do_copy_in_default_stream': True, # Better memory sync
}
provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})]
else:
provider_options = [('CPUExecutionProvider', {})]
provider_options = get_provider_options(
cuda_do_copy_in_default_stream=True)

try:
for model_name, model_path in model_paths.items():
Expand Down Expand Up @@ -763,22 +733,9 @@ def get_missing_mulan_track_ids(track_ids):

# Recreate sessions
onnx_sessions = {}
available_providers = ort.get_available_providers()

if 'CUDAExecutionProvider' in available_providers:
gpu_device_id = 0
cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '')
if cuda_visible and cuda_visible != '-1':
gpu_device_id = 0
cuda_options = {
'device_id': gpu_device_id,
'arena_extend_strategy': 'kSameAsRequested', # Prevent memory fragmentation
'cudnn_conv_algo_search': 'EXHAUSTIVE',
'do_copy_in_default_stream': True,
}
provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})]
else:
provider_options = [('CPUExecutionProvider', {})]
provider_options = get_provider_options(
cuda_do_copy_in_default_stream=True
)

try:
for model_name, model_path in model_paths.items():
Expand Down Expand Up @@ -1309,3 +1266,37 @@ def monitor_and_clear_jobs():
logger.critical(f"FATAL ERROR: Analysis failed: {e}", exc_info=True)
log_and_update_main(f"❌ Main analysis failed: {e}", current_progress, task_state=TASK_STATUS_FAILURE, error_message=str(e), traceback=traceback.format_exc())
raise


def get_provider_options(cuda_do_copy_in_default_stream: bool = False,
cuda_conv_algo_search_mode: str = 'EXHAUSTIVE') -> list[tuple[str, dict[str, Any]]]:
Comment on lines +1271 to +1272
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unsure if this is the right place to put the function, or in new util.provider

provider_options = [('CPUExecutionProvider', {})]
available_providers = provider.get_available_providers()
if 'OpenVINOExecutionProvider' in available_providers:
device_type = os.environ.get('OPENVINO_DEVICE_TYPE', 'GPU')
vino_options = {
'device_type': device_type,
'num_of_threads': int(os.environ.get('OPENVINO_NUM_OF_THREADS', '2')),
'num_streams': int(os.environ.get('OPENVINO_NUM_STREAMS', '1'))
}
if os.path.exists(os.environ.get('OPENVINO_CONFIG_JSON_PATH', '')):
vino_options['load_config'] = os.environ.get('OPENVINO_CONFIG_JSON_PATH')
provider_options.insert(0, ('OpenVINOExecutionProvider', vino_options))
logger.info("OpenVINO provider available - Attempting to use OpenVINO for analysis...")
if 'CUDAExecutionProvider' in available_providers:
# Get GPU device ID from environment or default to 0
gpu_device_id = 0
cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '')
if cuda_visible and cuda_visible != '-1':
gpu_device_id = 0

cuda_options = {
'device_id': gpu_device_id,
'arena_extend_strategy': 'kSameAsRequested', # Prevent memory fragmentation
'cudnn_conv_algo_search': cuda_conv_algo_search_mode,
}
if cuda_do_copy_in_default_stream:
cuda_options['do_copy_in_default_stream'] = True
provider_options.insert(0,('CUDAExecutionProvider', cuda_options))
logger.info(f"CUDA provider available - attempting to use GPU for analysis (device_id={gpu_device_id})")
return provider_options
61 changes: 5 additions & 56 deletions tasks/clap_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from config import AUDIO_LOAD_TIMEOUT
except Exception:
AUDIO_LOAD_TIMEOUT = None
from tasks import analysis
from tasks.memory_utils import cleanup_cuda_memory, handle_onnx_memory_error, comprehensive_memory_cleanup

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -70,23 +71,7 @@ def _load_audio_model():
session = None

# Configure provider options with GPU memory management
available_providers = ort.get_available_providers()
if 'CUDAExecutionProvider' in available_providers:
gpu_device_id = 0
cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '')
if cuda_visible and cuda_visible != '-1':
gpu_device_id = 0

cuda_options = {
'device_id': gpu_device_id,
'arena_extend_strategy': 'kSameAsRequested',
'cudnn_conv_algo_search': 'DEFAULT',
}
provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})]
logger.info(f"CUDA provider available - will attempt to use GPU (device_id={gpu_device_id})")
else:
provider_options = [('CPUExecutionProvider', {})]
logger.info("CUDA provider not available - using CPU only")
provider_options = analysis.get_provider_options(cuda_conv_algo_search_mode='DEFAULT')

# Create session
try:
Expand Down Expand Up @@ -143,25 +128,8 @@ def _load_text_model():

# Text model typically runs on CPU in Flask containers
session = None
available_providers = ort.get_available_providers()

if 'CUDAExecutionProvider' in available_providers:
gpu_device_id = 0
cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '')
if cuda_visible and cuda_visible != '-1':
gpu_device_id = 0

cuda_options = {
'device_id': gpu_device_id,
'arena_extend_strategy': 'kSameAsRequested',
'cudnn_conv_algo_search': 'DEFAULT',
}
provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})]
logger.info(f"CUDA provider available - will attempt to use GPU (device_id={gpu_device_id})")
else:
provider_options = [('CPUExecutionProvider', {})]
logger.info("CUDA provider not available - using CPU only")

provider_options = analysis.get_provider_options(cuda_conv_algo_search_mode='DEFAULT')

# Create session
try:
session = ort.InferenceSession(
Expand Down Expand Up @@ -231,26 +199,7 @@ def _load_onnx_model():
session = None

# Configure provider options with GPU memory management
available_providers = ort.get_available_providers()
if 'CUDAExecutionProvider' in available_providers:
# Get GPU device ID from environment or default to 0
# Docker sets NVIDIA_VISIBLE_DEVICES, CUDA runtime uses CUDA_VISIBLE_DEVICES
gpu_device_id = 0
cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '')
if cuda_visible and cuda_visible != '-1':
# If CUDA_VISIBLE_DEVICES is set, use first device (already mapped to 0)
gpu_device_id = 0

cuda_options = {
'device_id': gpu_device_id,
'arena_extend_strategy': 'kSameAsRequested', # Prevent memory fragmentation
'cudnn_conv_algo_search': 'DEFAULT',
}
provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})]
logger.info(f"CUDA provider available - will attempt to use GPU (device_id={gpu_device_id})")
else:
provider_options = [('CPUExecutionProvider', {})]
logger.info("CUDA provider not available - using CPU only")
provider_options = analysis.get_provider_options(cuda_conv_algo_search_mode='DEFAULT')

# Create session with determined providers
try:
Expand Down
14 changes: 5 additions & 9 deletions tasks/memory_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,20 +208,16 @@ def reset_onnx_memory_pool() -> bool:
"""
try:
import onnxruntime as ort
from util import provider

# Force garbage collection first
gc.collect()

# Determine available providers
providers = ort.get_available_providers()
preferred_provider = None

if 'CUDAExecutionProvider' in providers:
preferred_provider = 'CUDAExecutionProvider'
logger.debug("Using CUDA provider for ONNX memory pool reset")
elif 'CPUExecutionProvider' in providers:
preferred_provider = 'CPUExecutionProvider'
logger.debug("Using CPU provider for ONNX memory pool reset")
providers = provider.get_available_providers()
preferred_provider = providers[0]
if preferred_provider:
logger.debug("Using %s for ONNX memory pool reset", preferred_provider.split('ExecutionProvider')[0])
else:
logger.debug("No suitable ONNX provider found for memory pool reset")
return False
Expand Down
Loading