Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ RUN set -ux; \
supervisor procps \
gcc g++ \
git vim redis-tools strace iputils-ping \
$(if [[ "$BASE_IMAGE" =~ ^nvidia/cuda: ]]; then echo libnvinfer10; fi) \
$(if [[ "$BASE_IMAGE" =~ ^nvidia/cuda: ]]; then echo libnvinfer-plugin10; fi) \
"$(if [[ "$BASE_IMAGE" =~ ^nvidia/cuda:([0-9]+)\.([0-9]+).+$ ]]; then echo "cuda-compiler-${BASH_REMATCH[1]}-${BASH_REMATCH[2]}"; fi)"; then \
break; \
fi; \
Expand Down Expand Up @@ -125,10 +127,10 @@ COPY requirements/ /app/requirements/
# Note: --index-strategy unsafe-best-match resolves conflicts between pypi.nvidia.com and pypi.org
RUN if [[ "$BASE_IMAGE" =~ ^nvidia/cuda: ]]; then \
echo "NVIDIA base image detected: installing GPU packages (cupy, cuml, onnxruntime-gpu, voyager, torch+cuda)"; \
uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/gpu.txt -r /app/requirements/common.txt || exit 1; \
UV_HTTP_TIMEOUT=600 uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/gpu.txt -r /app/requirements/common.txt || exit 1; \
else \
echo "CPU base image: installing all packages together for dependency resolution"; \
uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/cpu.txt -r /app/requirements/common.txt || exit 1; \
UV_HTTP_TIMEOUT=600 uv pip install --system --no-cache --index-strategy unsafe-best-match -r /app/requirements/cpu.txt -r /app/requirements/common.txt || exit 1; \
fi \
&& echo "Verifying psycopg2 installation..." \
&& python3 -c "import psycopg2; print('psycopg2 OK')" \
Expand Down
5 changes: 5 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@
# --- GPU Acceleration for Clustering (Optional, requires NVIDIA GPU and RAPIDS cuML) ---
USE_GPU_CLUSTERING = os.environ.get("USE_GPU_CLUSTERING", "False").lower() == "true"

# --- TensorRT Execution Provider for ONNX Runtime (Optional, NVIDIA builds only) ---
# When true and TensorRT libraries are available, ONNX Runtime will prefer
# TensorrtExecutionProvider before CUDAExecutionProvider.
USE_TENSORRT = os.environ.get("USE_TENSORRT", "False").lower() == "true"

# --- DBSCAN Only Constants (Ranges for Evolutionary Approach) ---
# Default ranges for DBSCAN parameters
DBSCAN_EPS_MIN = float(os.getenv("DBSCAN_EPS_MIN", "0.1"))
Expand Down
7 changes: 7 additions & 0 deletions deployment/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,13 @@ MISTRAL_API_KEY=
# Default: false (CPU only)
USE_GPU_CLUSTERING=false

# --- TensorRT Execution Provider (NVIDIA images only) ---
# Enable TensorRT for ONNX Runtime inference (MusiCNN, CLAP, MuLan) when available.
# Requires NVIDIA images with TensorRT runtime libs included.
# If disabled or unavailable, ONNX Runtime falls back to CUDA, then CPU.
# Default: false
USE_TENSORRT=false

# --- CLAP Text Search Configuration ---
# Enable CLAP (Contrastive Language-Audio Pretraining) for natural language music search
# CLAP allows searching your music collection using text queries like "upbeat summer songs" or "relaxing piano music"
Expand Down
2 changes: 2 additions & 0 deletions deployment/docker-compose-navidrome_local.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ services:
GEMINI_API_KEY: "${GEMINI_API_KEY}"
MISTRAL_API_KEY: "${MISTRAL_API_KEY}"
CLAP_ENABLED: "${CLAP_ENABLED:-true}" # Enable CLAP text search (set to false for slower systems)
USE_TENSORRT: "${USE_TENSORRT:-false}" # Prefer TensorRT EP for ONNX Runtime when available
TEMP_DIR: "/app/temp_audio"
# Authentication (optional) – leave blank to disable
API_TOKEN: "${API_TOKEN:-}"
Expand Down Expand Up @@ -99,6 +100,7 @@ services:
GEMINI_API_KEY: "${GEMINI_API_KEY}"
MISTRAL_API_KEY: "${MISTRAL_API_KEY}"
CLAP_ENABLED: "${CLAP_ENABLED:-true}" # Enable CLAP text search (set to false for slower systems)
USE_TENSORRT: "${USE_TENSORRT:-false}" # Prefer TensorRT EP for ONNX Runtime when available
TEMP_DIR: "/app/temp_audio"
# Authentication (optional) – leave blank to disable
API_TOKEN: "${API_TOKEN:-}"
Expand Down
2 changes: 2 additions & 0 deletions deployment/docker-compose-nvidia-local.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ services:
OLLAMA_SERVER_URL: "${OLLAMA_SERVER_URL:-http://192.168.1.71:11434/api/generate}"
OLLAMA_MODEL_NAME: "${OLLAMA_MODEL_NAME:-qwen3:1.7b}"
CLAP_ENABLED: "${CLAP_ENABLED:-true}"
USE_TENSORRT: "${USE_TENSORRT:-false}"
TEMP_DIR: "/app/temp_audio"
# Authentication (optional) – leave blank to disable
API_TOKEN: "${API_TOKEN:-}"
Expand Down Expand Up @@ -106,6 +107,7 @@ services:
GEMINI_API_KEY: "${GEMINI_API_KEY}"
MISTRAL_API_KEY: "${MISTRAL_API_KEY}"
CLAP_ENABLED: "${CLAP_ENABLED:-true}"
USE_TENSORRT: "${USE_TENSORRT:-false}"
NVIDIA_VISIBLE_DEVICES: "0"
NVIDIA_DRIVER_CAPABILITIES: "compute,utility"
USE_GPU_CLUSTERING: "${USE_GPU_CLUSTERING:-true}"
Expand Down
2 changes: 2 additions & 0 deletions deployment/docker-compose-nvidia.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ services:
GEMINI_API_KEY: "${GEMINI_API_KEY}"
MISTRAL_API_KEY: "${MISTRAL_API_KEY}"
CLAP_ENABLED: "${CLAP_ENABLED:-true}" # Enable CLAP text search (set to false for slower systems)
USE_TENSORRT: "${USE_TENSORRT:-false}" # Prefer TensorRT EP for ONNX Runtime when available
TEMP_DIR: "/app/temp_audio"
volumes:
- temp-audio-flask:/app/temp_audio # Volume for temporary audio files
Expand Down Expand Up @@ -91,6 +92,7 @@ services:
GEMINI_API_KEY: "${GEMINI_API_KEY}"
MISTRAL_API_KEY: "${MISTRAL_API_KEY}"
CLAP_ENABLED: "${CLAP_ENABLED:-true}" # Enable CLAP text search (set to false for slower systems)
USE_TENSORRT: "${USE_TENSORRT:-false}" # Prefer TensorRT EP for ONNX Runtime when available
NVIDIA_VISIBLE_DEVICES: "0"
NVIDIA_DRIVER_CAPABILITIES: "compute,utility"
USE_GPU_CLUSTERING: "${USE_GPU_CLUSTERING:-true}"
Expand Down
1 change: 1 addition & 0 deletions deployment/docker-compose-server.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ services:
GEMINI_API_KEY: "${GEMINI_API_KEY}"
MISTRAL_API_KEY: "${MISTRAL_API_KEY}"
CLAP_ENABLED: "${CLAP_ENABLED:-true}" # Enable CLAP text search (set to false for slower systems)
USE_TENSORRT: "${USE_TENSORRT:-false}" # Prefer TensorRT EP for ONNX Runtime when available
TEMP_DIR: "/app/temp_audio"
API_TOKEN: "${API_TOKEN:-}"
AUDIOMUSE_USER: "${AUDIOMUSE_USER:-}"
Expand Down
1 change: 1 addition & 0 deletions deployment/docker-compose-worker-nvidia.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ services:
GEMINI_API_KEY: "${GEMINI_API_KEY}"
MISTRAL_API_KEY: "${MISTRAL_API_KEY}"
CLAP_ENABLED: "${CLAP_ENABLED:-true}" # Enable CLAP text search (set to false for slower systems)
USE_TENSORRT: "${USE_TENSORRT:-false}" # Prefer TensorRT EP for ONNX Runtime when available
TEMP_DIR: "/app/temp_audio"
NVIDIA_VISIBLE_DEVICES: "0"
NVIDIA_DRIVER_CAPABILITIES: "compute,utility"
Expand Down
1 change: 1 addition & 0 deletions deployment/docker-compose-worker_local.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ services:
GEMINI_API_KEY: "${GEMINI_API_KEY}"
MISTRAL_API_KEY: "${MISTRAL_API_KEY}"
CLAP_ENABLED: "${CLAP_ENABLED:-true}" # Enable CLAP text search (set to false for slower systems)
USE_TENSORRT: "${USE_TENSORRT:-false}" # Prefer TensorRT EP for ONNX Runtime when available
TEMP_DIR: "/app/temp_audio"
# Authentication (optional) – leave blank to disable
API_TOKEN: "${API_TOKEN:-}"
Expand Down
15 changes: 15 additions & 0 deletions docs/GPU.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,21 @@ We suggest **8GB VRAM** on GPU, with less you can experience the NON BLOCKING Ou
3. Ensure NVIDIA Container Toolkit is installed on your host
4. Use docker-compose files with GPU support (e.g., `docker-compose-nvidia.yaml` or `docker-compose-worker-nvidia.yaml`)

**TensorRT (optional ONNX acceleration):**

NVIDIA images now include TensorRT runtime libraries required by ONNX Runtime.
TensorRT remains **opt-in** to avoid changing existing behavior.

Set in your `.env` file:

```
USE_TENSORRT=true
```

When enabled and available, ONNX Runtime provider order becomes:
`TensorrtExecutionProvider -> CUDAExecutionProvider -> CPUExecutionProvider`.
If TensorRT cannot be used, inference falls back automatically.

**Performance Impact:**
- **KMeans**: 10-50x faster than CPU
- **DBSCAN**: 5-100x faster than CPU
Expand Down
1 change: 1 addition & 0 deletions docs/PARAMETERS.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ These are the default parameters used when launching analysis or clustering task
| `CLUSTERING_RUNS` | Iterations for Monte Carlo evolutionary search. | `1000` |
| `TOP_N_PLAYLISTS` | POST Clustering it keep only the top N diverse playlist. | `8` |
| `USE_GPU_CLUSTERING` | When true enalbe the use of GPU on K-Means, DBSCAN and PCA | `false` |
| `USE_TENSORRT` | When true and TensorRT is available (NVIDIA images), ONNX Runtime prefers TensorRT EP before CUDA for MusiCNN/CLAP/MuLan | `false` |
| **Similarity General** | | |
| `INDEX_NAME` | Name of the index, no need to change. | `music_library` |
| `VOYAGER_EF_CONSTRUCTION` | Number of element analyzed to create the neighbor list in the index. | `1024` |
Expand Down
93 changes: 34 additions & 59 deletions tasks/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@
SessionRecycler,
comprehensive_memory_cleanup
)
from .onnx_providers import (
build_ort_provider_options,
split_provider_options,
log_provider_selection,
)


from psycopg2 import OperationalError
Expand Down Expand Up @@ -380,25 +385,13 @@ def analyze_track(file_path, mood_labels_list, model_paths, onnx_sessions=None):
should_cleanup_sessions = False

# Configure provider options for GPU memory management (used for main and secondary models)
available_providers = ort.get_available_providers()
if 'CUDAExecutionProvider' in available_providers:
# Get GPU device ID from environment or default to 0
gpu_device_id = 0
cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '')
if cuda_visible and cuda_visible != '-1':
gpu_device_id = 0

cuda_options = {
'device_id': gpu_device_id,
'arena_extend_strategy': 'kSameAsRequested', # Prevent memory fragmentation
'cudnn_conv_algo_search': 'EXHAUSTIVE',
'do_copy_in_default_stream': True,
}
provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})]
logger.info(f"CUDA provider available - attempting to use GPU for analysis (device_id={gpu_device_id})")
else:
provider_options = [('CPUExecutionProvider', {})]
logger.info("CUDA provider not available - using CPU only")
provider_options, available_providers = build_ort_provider_options(
ort,
cuda_algo_search='EXHAUSTIVE',
include_copy_stream=True,
)
providers, provider_opts = split_provider_options(provider_options)
log_provider_selection(logger, "MusiCNN", provider_options, available_providers)

try:
# Use pre-loaded sessions if provided, otherwise load per-song
Expand All @@ -411,8 +404,8 @@ def analyze_track(file_path, mood_labels_list, model_paths, onnx_sessions=None):
try:
embedding_sess = ort.InferenceSession(
model_paths['embedding'],
providers=[p[0] for p in provider_options],
provider_options=[p[1] for p in provider_options]
providers=providers,
provider_options=provider_opts
)
except Exception:
# Fallback to CPU if preferred providers fail
Expand All @@ -425,8 +418,8 @@ def analyze_track(file_path, mood_labels_list, model_paths, onnx_sessions=None):
try:
prediction_sess = ort.InferenceSession(
model_paths['prediction'],
providers=[p[0] for p in provider_options],
provider_options=[p[1] for p in provider_options]
providers=providers,
provider_options=provider_opts
)
except Exception:
# Fallback to CPU if preferred providers fail
Expand Down Expand Up @@ -715,30 +708,21 @@ def get_missing_mulan_track_ids(track_ids):
if onnx_sessions is None:
logger.info(f"Lazy-loading MusiCNN models for album: {album_name}")
onnx_sessions = {}
available_providers = ort.get_available_providers()

if 'CUDAExecutionProvider' in available_providers:
gpu_device_id = 0
cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '')
if cuda_visible and cuda_visible != '-1':
gpu_device_id = 0
cuda_options = {
'device_id': gpu_device_id,
'arena_extend_strategy': 'kSameAsRequested', # Prevent memory fragmentation
'cudnn_conv_algo_search': 'EXHAUSTIVE', # Find memory-efficient algorithms
'do_copy_in_default_stream': True, # Better memory sync
}
provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})]
else:
provider_options = [('CPUExecutionProvider', {})]
provider_options, available_providers = build_ort_provider_options(
ort,
cuda_algo_search='EXHAUSTIVE',
include_copy_stream=True,
)
providers, provider_opts = split_provider_options(provider_options)
log_provider_selection(logger, "MusiCNN lazy-load", provider_options, available_providers)

try:
for model_name, model_path in model_paths.items():
try:
onnx_sessions[model_name] = ort.InferenceSession(
model_path,
providers=[p[0] for p in provider_options],
provider_options=[p[1] for p in provider_options]
providers=providers,
provider_options=provider_opts
)
except Exception:
onnx_sessions[model_name] = ort.InferenceSession(
Expand All @@ -763,30 +747,21 @@ def get_missing_mulan_track_ids(track_ids):

# Recreate sessions
onnx_sessions = {}
available_providers = ort.get_available_providers()

if 'CUDAExecutionProvider' in available_providers:
gpu_device_id = 0
cuda_visible = os.environ.get('CUDA_VISIBLE_DEVICES', '')
if cuda_visible and cuda_visible != '-1':
gpu_device_id = 0
cuda_options = {
'device_id': gpu_device_id,
'arena_extend_strategy': 'kSameAsRequested', # Prevent memory fragmentation
'cudnn_conv_algo_search': 'EXHAUSTIVE',
'do_copy_in_default_stream': True,
}
provider_options = [('CUDAExecutionProvider', cuda_options), ('CPUExecutionProvider', {})]
else:
provider_options = [('CPUExecutionProvider', {})]
provider_options, available_providers = build_ort_provider_options(
ort,
cuda_algo_search='EXHAUSTIVE',
include_copy_stream=True,
)
providers, provider_opts = split_provider_options(provider_options)
log_provider_selection(logger, "MusiCNN recycle", provider_options, available_providers)

try:
for model_name, model_path in model_paths.items():
try:
onnx_sessions[model_name] = ort.InferenceSession(
model_path,
providers=[p[0] for p in provider_options],
provider_options=[p[1] for p in provider_options]
providers=providers,
provider_options=provider_opts
)
except Exception:
onnx_sessions[model_name] = ort.InferenceSession(
Expand Down
Loading
Loading