diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 0000000..e0f47db --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,3 @@ +## 2025-02-12 - [Fake Incremental Computation] +**Learning:** The `IncrementalMelSpectrogram` claimed to reuse cached frames but was actually recomputing the entire audio window (O(N)) and then overwriting the cached part. +**Action:** When working on "incremental" or "caching" features, always benchmark the processing time vs data size. True incremental processing should scale with `new_data`, not `total_data`. diff --git a/src/mel.js b/src/mel.js index c9bf174..3d09032 100644 --- a/src/mel.js +++ b/src/mel.js @@ -290,7 +290,7 @@ export class MelSpectrogram { * @param {Float32Array} audio - Mono PCM audio * @returns {{rawMel: Float32Array, nFrames: number, featuresLen: number}} */ - computeRawMel(audio) { + computeRawMel(audio, startFrame = 0) { const N = audio.length; if (N === 0) return { rawMel: new Float32Array(0), nFrames: 0, featuresLen: 0 }; @@ -317,7 +317,7 @@ export class MelSpectrogram { const { _fftRe: fftRe, _fftIm: fftIm, _powerBuf: powerBuf } = this; const { hannWindow: window, melFilterbank: fb, nMels, twiddles: tw, nFft, nFreqBins, hopLength, logZeroGuard } = this; - for (let t = 0; t < nFrames; t++) { + for (let t = startFrame; t < nFrames; t++) { const offset = t * hopLength; for (let k = 0; k < nFft; k++) { fftRe[k] = padded[offset + k] * window[k]; fftIm[k] = 0; } fft(fftRe, fftIm, nFft, tw); @@ -452,7 +452,7 @@ export class IncrementalMelSpectrogram { const prefixFrames = Math.floor(prefixSamples / this.processor.hopLength); const safeFrames = Math.max(0, Math.min(prefixFrames - this.boundaryFrames, this._cachedFeaturesLen)); - const { rawMel, nFrames, featuresLen } = this.processor.computeRawMel(audio); + const { rawMel, nFrames, featuresLen } = this.processor.computeRawMel(audio, safeFrames); if (safeFrames > 0 && this._cachedRawMel) { for (let m = 0; m < this.nMels; m++) {