bleugreen · bleugreen · Feb 22, 2026 · Dec 7, 2024 · Dec 7, 2024 · Feb 22, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -15,12 +15,20 @@ jobs:
       - uses: actions/setup-python@v5
         with:
           python-version: "3.10"
+          cache: pip
 
       - name: Install dependencies
-        run: pip install -e ".[dev]" soundfile
+        run: |
+          pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu
+          pip install -e ".[dev]" soundfile
 
       - name: Lint
         run: ruff check src/
 
+      - name: Install model weights
+        run: |
+          mkdir -p ~/.local/share/deeprhythm
+          cp weights/deeprhythm-0.7.pth ~/.local/share/deeprhythm/
+
       - name: Test
         run: pytest
diff --git a/.gitignore b/.gitignore
@@ -17,4 +17,5 @@ dist/
 *.csv
 *.pb
 .workspace
-.venv
+.venv
+.DS_Store
diff --git a/pyproject.toml b/pyproject.toml
@@ -41,3 +41,6 @@ select = ["E", "F", "I"]
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
+markers = [
+    "slow: tests that require model weights (deselect with '-m \"not slow\"')",
+]
diff --git a/src/deeprhythm/audio_proc/bandfilter.py b/src/deeprhythm/audio_proc/bandfilter.py
@@ -4,25 +4,21 @@
 
 def create_log_filter(num_bins, num_bands, device='cuda'):
     """
-    Create a logarithmically spaced filter matrix for audio processing.
-
-    This function generates a filter matrix with logarithmically spaced bands. The filters have
-    unity gain, meaning that the sum of the filter coefficients in each band is equal to one.
+    Create a logarithmically spaced filter matrix.
 
     Parameters
     ----------
     num_bins : int
-        The number of bins in the spectrogram (e.g., the number of frequency bins).
+        Number of frequency bins in the spectrogram
     num_bands : int
-        The number of bands for the filter matrix. These bands are spaced logarithmically.
+        Number of logarithmically spaced bands
     device : str, optional
-        The device on which the filter matrix will be created.
+        Target device for the filter matrix
 
     Returns
     -------
     torch.Tensor
-        A tensor representing the filter matrix with shape (num_bands, num_bins). Each row
-        corresponds to a filter for a specific band.
+        Filter matrix of shape (num_bands, num_bins)
     """
     log_bins = np.logspace(np.log10(1), np.log10(num_bins), num=num_bands+1, base=10.0) - 1
     log_bins = np.unique(np.round(log_bins).astype(int))
@@ -38,25 +34,19 @@ def create_log_filter(num_bins, num_bands, device='cuda'):
 
 def apply_log_filter(stft_output, filter_matrix):
     """
-    Apply the logarithmic filter matrix to the Short-Time Fourier Transform (STFT) output.
-
-    This function applies a precomputed logarithmic filter matrix to the STFT output of an audio signal
-    to reduce its dimensionality and to capture the energy in logarithmically spaced frequency bands.
+    Apply logarithmic filter matrix to STFT output.
 
     Parameters
     ----------
     stft_output : torch.Tensor
-        A tensor representing the STFT output with shape (batch_size, num_bins, num_frames), where
-        num_bins is the number of frequency bins and num_frames is the number of time frames.
+        STFT output of shape (batch_size, num_bins, num_frames)
     filter_matrix : torch.Tensor
-        A tensor representing the logarithmic filter matrix with shape (num_bands, num_bins), where
-        num_bands is the number of logarithmically spaced frequency bands.
+        Filter matrix of shape (num_bands, num_bins)
 
     Returns
     -------
     torch.Tensor
-        A tensor representing the filtered STFT output with shape (batch_size, num_bands, num_frames).
-        Each band contains the aggregated energy from the corresponding set of frequency bins.
+        Filtered output of shape (batch_size, num_bands, num_frames)
     """
     stft_output_transposed = stft_output.transpose(1, 2)
     filtered_output_transposed = torch.matmul(stft_output_transposed, filter_matrix.T)

diff --git a/src/deeprhythm/audio_proc/onset.py b/src/deeprhythm/audio_proc/onset.py
@@ -5,7 +5,7 @@
 
 def onset_strength(
     y=None, n_fft=2048, hop_length=512, lag=1, ref=None,
-    detrend=False, center=True,  aggregate=None
+    detrend=False, center=True, aggregate=None
 ):
     """
     Compute the onset strength of an audio signal or a spectrogram.
@@ -50,8 +50,8 @@ def onset_strength(
 
     # Compute difference to reference, spaced by lag
     onset_env = S[..., lag:] - ref[..., :-lag]
-    onset_env = torch.clamp(onset_env, min=0.0)  # Discard negatives
-
+    onset_env = torch.clamp(onset_env, min=0.0)  
+    
     if aggregate is None:
         aggregate = torch.mean
     if callable(aggregate):

diff --git a/src/deeprhythm/batch_infer.py b/src/deeprhythm/batch_infer.py
@@ -8,8 +8,15 @@
 import torch.multiprocessing as multiprocessing
 
 from deeprhythm.audio_proc.hcqm import compute_hcqm, make_kernels
-from deeprhythm.model.predictor import load_cnn_model
-from deeprhythm.utils import AudioLoadError, AudioTooShortError, class_to_bpm, get_device, load_and_split_audio
+from deeprhythm.model.frame_cnn import DeepRhythmModel
+from deeprhythm.utils import (
+    AudioLoadError,
+    AudioTooShortError,
+    class_to_bpm,
+    get_device,
+    get_weights,
+    load_and_split_audio,
+)
 
 NUM_WORKERS = 8
 NUM_BATCH = 128
@@ -112,7 +119,9 @@ def consume_and_process(
     specs = make_kernels(len_audio, sr, device=device)
     if not quiet:
         print('made kernels')
-    model = load_cnn_model(device=device, quiet=quiet)
+    model = DeepRhythmModel()
+    model.load_state_dict(torch.load(get_weights(quiet=quiet), map_location=torch.device(device), weights_only=False))
+    model = model.to(device=device)
     model.eval()
     if not quiet:
         print('loaded model')

diff --git a/src/deeprhythm/bench/__init__.py b/src/deeprhythm/bench/__init__.py
-Original file line number
+Diff line change
@@ Expand Up / @@ -17,4 +17,5 @@ dist/ @@
     *.csv
     *.pb
     .workspace
-    .venv
+    .venv
+    .DS_Store