From 5b70efd981381b0c59695f96bf70354ef8f0da5e Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Mon, 19 Jan 2026 23:36:38 -0800 Subject: [PATCH 01/11] FiLM class --- NAM/film.h | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 NAM/film.h diff --git a/NAM/film.h b/NAM/film.h new file mode 100644 index 0000000..9df849b --- /dev/null +++ b/NAM/film.h @@ -0,0 +1,68 @@ +#pragma once + +#include +#include +#include + +#include "dsp.h" + +namespace nam +{ +// Feature-wise Linear Modulation (FiLM) +// +// Given an `input` (out_channels x num_frames) and a `condition` +// (in_channels x num_frames), compute: +// scale, shift = Conv1x1(condition) split across channels +// output = input * scale + shift (elementwise) +class FiLM +{ +public: + FiLM(const int in_channels, const int out_channels) + : _cond_to_scale_shift(in_channels, 2 * out_channels, /*bias=*/true) + { + } + + // Get the entire internal output buffer. This is intended for internal wiring + // between layers; callers should treat the buffer as pre-allocated storage + // and only consider the first `num_frames` columns valid for a given + // processing call. Slice with .leftCols(num_frames) as needed. + Eigen::MatrixXf& GetOutput() { return _output; } + const Eigen::MatrixXf& GetOutput() const { return _output; } + + void SetMaxBufferSize(const int maxBufferSize) + { + _cond_to_scale_shift.SetMaxBufferSize(maxBufferSize); + _output.resize(get_out_channels(), maxBufferSize); + } + + void set_weights_(std::vector::iterator& weights) { _cond_to_scale_shift.set_weights_(weights); } + + long get_in_channels() const { return _cond_to_scale_shift.get_in_channels(); } + long get_out_channels() const { return _cond_to_scale_shift.get_out_channels() / 2; } + + // :param input: (out_channels x num_frames) + // :param condition: (in_channels x num_frames) + // Writes (out_channels x num_frames) into internal output buffer; access via GetOutput(). + void Process(const Eigen::MatrixXf& input, const Eigen::MatrixXf& condition, const int num_frames) + { + assert(get_out_channels() == input.rows()); + assert(get_in_channels() == condition.rows()); + assert(num_frames <= input.cols()); + assert(num_frames <= condition.cols()); + assert(num_frames <= _output.cols()); + + _cond_to_scale_shift.process_(condition, num_frames); + const auto& scale_shift = _cond_to_scale_shift.GetOutput(); + + // scale = top out_channels, shift = bottom out_channels + const auto scale = scale_shift.topRows(get_out_channels()).leftCols(num_frames); + const auto shift = scale_shift.bottomRows(get_out_channels()).leftCols(num_frames); + + _output.leftCols(num_frames).array() = input.leftCols(num_frames).array() * scale.array() + shift.array(); + } + +private: + Conv1x1 _cond_to_scale_shift; // in_channels -> 2*out_channels + Eigen::MatrixXf _output; // out_channels x maxBufferSize +}; +} // namespace nam From 6d5cb622025d3fbdb6da71acb5cbf0194d2b49af Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Mon, 19 Jan 2026 23:38:39 -0800 Subject: [PATCH 02/11] Rename constructor arguments --- NAM/film.h | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/NAM/film.h b/NAM/film.h index 9df849b..3f14095 100644 --- a/NAM/film.h +++ b/NAM/film.h @@ -10,15 +10,15 @@ namespace nam { // Feature-wise Linear Modulation (FiLM) // -// Given an `input` (out_channels x num_frames) and a `condition` -// (in_channels x num_frames), compute: +// Given an `input` (input_dim x num_frames) and a `condition` +// (condition_dim x num_frames), compute: // scale, shift = Conv1x1(condition) split across channels // output = input * scale + shift (elementwise) class FiLM { public: - FiLM(const int in_channels, const int out_channels) - : _cond_to_scale_shift(in_channels, 2 * out_channels, /*bias=*/true) + FiLM(const int condition_dim, const int input_dim) + : _cond_to_scale_shift(condition_dim, 2 * input_dim, /*bias=*/true) { } @@ -32,21 +32,21 @@ class FiLM void SetMaxBufferSize(const int maxBufferSize) { _cond_to_scale_shift.SetMaxBufferSize(maxBufferSize); - _output.resize(get_out_channels(), maxBufferSize); + _output.resize(get_input_dim(), maxBufferSize); } void set_weights_(std::vector::iterator& weights) { _cond_to_scale_shift.set_weights_(weights); } - long get_in_channels() const { return _cond_to_scale_shift.get_in_channels(); } - long get_out_channels() const { return _cond_to_scale_shift.get_out_channels() / 2; } + long get_condition_dim() const { return _cond_to_scale_shift.get_in_channels(); } + long get_input_dim() const { return _cond_to_scale_shift.get_out_channels() / 2; } - // :param input: (out_channels x num_frames) - // :param condition: (in_channels x num_frames) - // Writes (out_channels x num_frames) into internal output buffer; access via GetOutput(). + // :param input: (input_dim x num_frames) + // :param condition: (condition_dim x num_frames) + // Writes (input_dim x num_frames) into internal output buffer; access via GetOutput(). void Process(const Eigen::MatrixXf& input, const Eigen::MatrixXf& condition, const int num_frames) { - assert(get_out_channels() == input.rows()); - assert(get_in_channels() == condition.rows()); + assert(get_input_dim() == input.rows()); + assert(get_condition_dim() == condition.rows()); assert(num_frames <= input.cols()); assert(num_frames <= condition.cols()); assert(num_frames <= _output.cols()); @@ -54,15 +54,15 @@ class FiLM _cond_to_scale_shift.process_(condition, num_frames); const auto& scale_shift = _cond_to_scale_shift.GetOutput(); - // scale = top out_channels, shift = bottom out_channels - const auto scale = scale_shift.topRows(get_out_channels()).leftCols(num_frames); - const auto shift = scale_shift.bottomRows(get_out_channels()).leftCols(num_frames); + // scale = top input_dim, shift = bottom input_dim + const auto scale = scale_shift.topRows(get_input_dim()).leftCols(num_frames); + const auto shift = scale_shift.bottomRows(get_input_dim()).leftCols(num_frames); _output.leftCols(num_frames).array() = input.leftCols(num_frames).array() * scale.array() + shift.array(); } private: - Conv1x1 _cond_to_scale_shift; // in_channels -> 2*out_channels - Eigen::MatrixXf _output; // out_channels x maxBufferSize + Conv1x1 _cond_to_scale_shift; // condition_dim -> 2*input_dim + Eigen::MatrixXf _output; // input_dim x maxBufferSize }; } // namespace nam From e34a09abf61da810a7b5ed15f1f4159abe3884a3 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Mon, 19 Jan 2026 23:38:54 -0800 Subject: [PATCH 03/11] Formatting --- tools/test/test_wavenet/test_head1x1.cpp | 43 ++++++++++++------------ 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/tools/test/test_wavenet/test_head1x1.cpp b/tools/test/test_wavenet/test_head1x1.cpp index 18ff70b..805a447 100644 --- a/tools/test/test_wavenet/test_head1x1.cpp +++ b/tools/test/test_wavenet/test_head1x1.cpp @@ -178,27 +178,28 @@ void test_head1x1_gated() // Input mixin: (conditionSize, 2*bottleneck) = (1, 4) = 4 weights // 1x1: (bottleneck, channels) + bias = (2, 2) + 2 = 4 + 2 = 6 weights // head1x1: (bottleneck, head1x1_out_channels) + bias = (2, 2) + 2 = 4 + 2 = 6 weights - std::vector weights{// Conv: (channels, 2*bottleneck, kernelSize=1) weights + (2*bottleneck,) bias - // Weight layout: for each kernel position, for each output channel, for each input channel - // For kernel position 0: - // Output channel 0: connects to input channels 0 and 1 - 1.0f, 0.0f, // output channel 0 - // Output channel 1: connects to input channels 0 and 1 - 0.0f, 1.0f, // output channel 1 - // Output channel 2: connects to input channels 0 and 1 - 1.0f, 0.0f, // output channel 2 - // Output channel 3: connects to input channels 0 and 1 - 0.0f, 1.0f, // output channel 3 - // Bias: 2*bottleneck values - 0.0f, 0.0f, 0.0f, 0.0f, - // Input mixin: (conditionSize, 2*bottleneck) weights (all 1.0 for simplicity) - 1.0f, 1.0f, 1.0f, 1.0f, - // 1x1: (bottleneck, channels) weights + (channels,) bias (identity) - 1.0f, 0.0f, 0.0f, 1.0f, // weights (identity) - 0.0f, 0.0f, // bias - // head1x1: (bottleneck, head1x1_out_channels) weights + (head1x1_out_channels,) bias - 0.5f, 0.0f, 0.0f, 0.5f, // weights - 0.1f, 0.1f}; + std::vector weights{ + // Conv: (channels, 2*bottleneck, kernelSize=1) weights + (2*bottleneck,) bias + // Weight layout: for each kernel position, for each output channel, for each input channel + // For kernel position 0: + // Output channel 0: connects to input channels 0 and 1 + 1.0f, 0.0f, // output channel 0 + // Output channel 1: connects to input channels 0 and 1 + 0.0f, 1.0f, // output channel 1 + // Output channel 2: connects to input channels 0 and 1 + 1.0f, 0.0f, // output channel 2 + // Output channel 3: connects to input channels 0 and 1 + 0.0f, 1.0f, // output channel 3 + // Bias: 2*bottleneck values + 0.0f, 0.0f, 0.0f, 0.0f, + // Input mixin: (conditionSize, 2*bottleneck) weights (all 1.0 for simplicity) + 1.0f, 1.0f, 1.0f, 1.0f, + // 1x1: (bottleneck, channels) weights + (channels,) bias (identity) + 1.0f, 0.0f, 0.0f, 1.0f, // weights (identity) + 0.0f, 0.0f, // bias + // head1x1: (bottleneck, head1x1_out_channels) weights + (head1x1_out_channels,) bias + 0.5f, 0.0f, 0.0f, 0.5f, // weights + 0.1f, 0.1f}; auto it = weights.begin(); layer.set_weights_(it); From bfb58c67d40445594d23c29aa84361dfacc793f1 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Mon, 19 Jan 2026 23:48:14 -0800 Subject: [PATCH 04/11] Enhance FiLM class to support optional shift in processing - Updated FiLM constructor to accept a boolean shift parameter, modifying the internal scaling logic accordingly. - Adjusted get_input_dim method to return the correct dimension based on the shift parameter. - Refactored the process method to conditionally apply shift during output calculation. - Added comprehensive tests for FiLM functionality, covering both shift and scale-only scenarios. This update improves the flexibility of the FiLM class for various use cases. --- NAM/film.h | 27 +++++--- tools/run_tests.cpp | 5 ++ tools/test/test_film.cpp | 140 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 164 insertions(+), 8 deletions(-) create mode 100644 tools/test/test_film.cpp diff --git a/NAM/film.h b/NAM/film.h index 3f14095..2bf5dc3 100644 --- a/NAM/film.h +++ b/NAM/film.h @@ -17,8 +17,9 @@ namespace nam class FiLM { public: - FiLM(const int condition_dim, const int input_dim) - : _cond_to_scale_shift(condition_dim, 2 * input_dim, /*bias=*/true) + FiLM(const int condition_dim, const int input_dim, const bool shift) + : _cond_to_scale_shift(condition_dim, (shift ? 2 : 1) * input_dim, /*bias=*/true) + , _do_shift(shift) { } @@ -38,7 +39,10 @@ class FiLM void set_weights_(std::vector::iterator& weights) { _cond_to_scale_shift.set_weights_(weights); } long get_condition_dim() const { return _cond_to_scale_shift.get_in_channels(); } - long get_input_dim() const { return _cond_to_scale_shift.get_out_channels() / 2; } + long get_input_dim() const + { + return _do_shift ? (_cond_to_scale_shift.get_out_channels() / 2) : _cond_to_scale_shift.get_out_channels(); + } // :param input: (input_dim x num_frames) // :param condition: (condition_dim x num_frames) @@ -54,15 +58,22 @@ class FiLM _cond_to_scale_shift.process_(condition, num_frames); const auto& scale_shift = _cond_to_scale_shift.GetOutput(); - // scale = top input_dim, shift = bottom input_dim const auto scale = scale_shift.topRows(get_input_dim()).leftCols(num_frames); - const auto shift = scale_shift.bottomRows(get_input_dim()).leftCols(num_frames); - - _output.leftCols(num_frames).array() = input.leftCols(num_frames).array() * scale.array() + shift.array(); + if (_do_shift) + { + // scale = top input_dim, shift = bottom input_dim + const auto shift = scale_shift.bottomRows(get_input_dim()).leftCols(num_frames); + _output.leftCols(num_frames).array() = input.leftCols(num_frames).array() * scale.array() + shift.array(); + } + else + { + _output.leftCols(num_frames).array() = input.leftCols(num_frames).array() * scale.array(); + } } private: - Conv1x1 _cond_to_scale_shift; // condition_dim -> 2*input_dim + Conv1x1 _cond_to_scale_shift; // condition_dim -> (shift ? 2 : 1) * input_dim Eigen::MatrixXf _output; // input_dim x maxBufferSize + bool _do_shift; }; } // namespace nam diff --git a/tools/run_tests.cpp b/tools/run_tests.cpp index de3a2e2..35d170d 100644 --- a/tools/run_tests.cpp +++ b/tools/run_tests.cpp @@ -7,6 +7,7 @@ #include "test/test_conv_1x1.cpp" #include "test/test_convnet.cpp" #include "test/test_dsp.cpp" +#include "test/test_film.cpp" #include "test/test_fast_lut.cpp" #include "test/test_get_dsp.cpp" #include "test/test_ring_buffer.cpp" @@ -98,6 +99,10 @@ int main() test_conv_1x1::test_set_max_buffer_size(); test_conv_1x1::test_process_multiple_calls(); + test_film::test_set_max_buffer_size(); + test_film::test_process_bias_only(); + test_film::test_process_scale_only(); + test_wavenet::test_layer::test_gated(); test_wavenet::test_layer::test_layer_getters(); test_wavenet::test_layer::test_non_gated_layer(); diff --git a/tools/test/test_film.cpp b/tools/test/test_film.cpp new file mode 100644 index 0000000..81aa5a6 --- /dev/null +++ b/tools/test/test_film.cpp @@ -0,0 +1,140 @@ +// Tests for FiLM + +#include +#include +#include +#include + +#include "NAM/film.h" + +namespace test_film +{ +void test_set_max_buffer_size() +{ + const int condition_dim = 2; + const int input_dim = 3; + nam::FiLM film(condition_dim, input_dim, /*shift=*/true); + + const int maxBufferSize = 128; + film.SetMaxBufferSize(maxBufferSize); + + const auto out = film.GetOutput(); + assert(out.rows() == input_dim); + assert(out.cols() == maxBufferSize); +} + +void test_process_bias_only() +{ + const int condition_dim = 2; + const int input_dim = 3; + nam::FiLM film(condition_dim, input_dim, /*shift=*/true); + + const int maxBufferSize = 64; + film.SetMaxBufferSize(maxBufferSize); + + // Configure the internal Conv1x1 (condition_dim -> 2*input_dim) to have: + // - all-zero weights + // - fixed biases so that scale/shift are constants + // + // Layout for Conv1x1 weights when groups=1: + // - matrix weights: (2*input_dim * condition_dim) + // - bias: (2*input_dim) + std::vector weights; + weights.resize((2 * input_dim) * condition_dim + (2 * input_dim), 0.0f); + + // biases: [scale(0..input_dim-1), shift(0..input_dim-1)] + const float scale0 = 2.0f; + const float scale1 = -1.0f; + const float scale2 = 0.5f; + const float shift0 = 10.0f; + const float shift1 = -20.0f; + const float shift2 = 3.0f; + + const int bias_offset = (2 * input_dim) * condition_dim; + weights[bias_offset + 0] = scale0; + weights[bias_offset + 1] = scale1; + weights[bias_offset + 2] = scale2; + weights[bias_offset + 3] = shift0; + weights[bias_offset + 4] = shift1; + weights[bias_offset + 5] = shift2; + + auto it = weights.begin(); + film.set_weights_(it); + assert(it == weights.end()); + + const int num_frames = 4; + Eigen::MatrixXf input(input_dim, num_frames); + // Make each channel distinct, and vary over frames. + input << 1.0f, 2.0f, 3.0f, 4.0f, // + -1.0f, -2.0f, -3.0f, -4.0f, // + 0.25f, 0.5f, 0.75f, 1.0f; + + Eigen::MatrixXf condition(condition_dim, num_frames); + condition.setRandom(); // doesn't matter because weights are zero + + film.Process(input, condition, num_frames); + const auto out = film.GetOutput().leftCols(num_frames); + + // Expected: output = input * scale + shift (elementwise) + const float scales[3] = {scale0, scale1, scale2}; + const float shifts[3] = {shift0, shift1, shift2}; + for (int c = 0; c < input_dim; c++) + { + for (int t = 0; t < num_frames; t++) + { + const float expected = input(c, t) * scales[c] + shifts[c]; + assert(std::abs(out(c, t) - expected) < 1e-6f); + } + } +} + +void test_process_scale_only() +{ + const int condition_dim = 2; + const int input_dim = 3; + nam::FiLM film(condition_dim, input_dim, /*shift=*/false); + + const int maxBufferSize = 64; + film.SetMaxBufferSize(maxBufferSize); + + // Internal Conv1x1 is (condition_dim -> input_dim) in scale-only mode. + // We'll use all-zero weights and biases that define the scale. + std::vector weights; + weights.resize(input_dim * condition_dim + input_dim, 0.0f); + + const float scale0 = 2.0f; + const float scale1 = -1.0f; + const float scale2 = 0.5f; + + const int bias_offset = input_dim * condition_dim; + weights[bias_offset + 0] = scale0; + weights[bias_offset + 1] = scale1; + weights[bias_offset + 2] = scale2; + + auto it = weights.begin(); + film.set_weights_(it); + assert(it == weights.end()); + + const int num_frames = 4; + Eigen::MatrixXf input(input_dim, num_frames); + input << 1.0f, 2.0f, 3.0f, 4.0f, // + -1.0f, -2.0f, -3.0f, -4.0f, // + 0.25f, 0.5f, 0.75f, 1.0f; + + Eigen::MatrixXf condition(condition_dim, num_frames); + condition.setRandom(); // doesn't matter because weights are zero + + film.Process(input, condition, num_frames); + const auto out = film.GetOutput().leftCols(num_frames); + + const float scales[3] = {scale0, scale1, scale2}; + for (int c = 0; c < input_dim; c++) + { + for (int t = 0; t < num_frames; t++) + { + const float expected = input(c, t) * scales[c]; + assert(std::abs(out(c, t) - expected) < 1e-6f); + } + } +} +} // namespace test_film From b5ab66ee7122bf67ddf5d2e5447059d13b1f91fa Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Mon, 19 Jan 2026 23:56:02 -0800 Subject: [PATCH 05/11] Start sketching in FiLMs to Layer::Process() --- NAM/wavenet.cpp | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp index 2cb749e..2393963 100644 --- a/NAM/wavenet.cpp +++ b/NAM/wavenet.cpp @@ -52,10 +52,27 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma const long bottleneck = this->_bottleneck; // Use the actual bottleneck value, not the doubled output channels // Step 1: input convolutions - this->_conv.Process(input, num_frames); + if (this->_conv_pre_film != nullptr) + { + this->_conv_pre_film->Process(input, condition, num_frames); + this->_conv.Process(this->_conv_pre_film->GetOutput(), num_frames); + } + else + { + this->_conv.Process(input, num_frames); + } + this->_input_mixin.process_(condition, num_frames); - this->_z.leftCols(num_frames).noalias() = - this->_conv.GetOutput().leftCols(num_frames) + _input_mixin.GetOutput().leftCols(num_frames); + if (this->_conv_post_film != nullptr) + { + this->_conv_post_film->Process(this->_conv.GetOutput(), condition, num_frames); + this->_z.leftCols(num_frames).noalias() = this->_conv_post_film->GetOutput().leftCols(num_frames); + } + else + { + this->_z.leftCols(num_frames).noalias() = this->_conv.GetOutput().leftCols(num_frames); + } + this->_z.leftCols(num_frames).noalias() += _input_mixin.GetOutput().leftCols(num_frames); // Step 2 & 3: activation and 1x1 // From 1e298932ac7d6740355401dc9a8a89435c5a1f4e Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Tue, 20 Jan 2026 15:25:40 -0800 Subject: [PATCH 06/11] Add in-place Process_ method to FiLM and comprehensive tests - Add Process_ method to FiLM for in-place processing - Add tests for Process_ with shift enabled - Add tests for Process_ with scale-only mode - Add tests for partial frame processing to ensure only first num_frames are modified --- NAM/film.h | 7 ++ tools/run_tests.cpp | 3 + tools/test/test_film.cpp | 209 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 219 insertions(+) diff --git a/NAM/film.h b/NAM/film.h index 2bf5dc3..6b273e6 100644 --- a/NAM/film.h +++ b/NAM/film.h @@ -71,6 +71,13 @@ class FiLM } } + // in-place + void Process_(Eigen::MatrixXf& input, const Eigen::MatrixXf& condition, const int num_frames) + { + Process(input, condition, num_frames); + input.leftCols(num_frames).noalias() = _output.leftCols(num_frames); + } + private: Conv1x1 _cond_to_scale_shift; // condition_dim -> (shift ? 2 : 1) * input_dim Eigen::MatrixXf _output; // input_dim x maxBufferSize diff --git a/tools/run_tests.cpp b/tools/run_tests.cpp index 35d170d..c72f594 100644 --- a/tools/run_tests.cpp +++ b/tools/run_tests.cpp @@ -102,6 +102,9 @@ int main() test_film::test_set_max_buffer_size(); test_film::test_process_bias_only(); test_film::test_process_scale_only(); + test_film::test_process_inplace_with_shift(); + test_film::test_process_inplace_scale_only(); + test_film::test_process_inplace_partial_frames(); test_wavenet::test_layer::test_gated(); test_wavenet::test_layer::test_layer_getters(); diff --git a/tools/test/test_film.cpp b/tools/test/test_film.cpp index 81aa5a6..ec8bb95 100644 --- a/tools/test/test_film.cpp +++ b/tools/test/test_film.cpp @@ -137,4 +137,213 @@ void test_process_scale_only() } } } + +void test_process_inplace_with_shift() +{ + const int condition_dim = 2; + const int input_dim = 3; + nam::FiLM film(condition_dim, input_dim, /*shift=*/true); + + const int maxBufferSize = 64; + film.SetMaxBufferSize(maxBufferSize); + + // Configure the internal Conv1x1 with zero weights and fixed biases + std::vector weights; + weights.resize((2 * input_dim) * condition_dim + (2 * input_dim), 0.0f); + + const float scale0 = 2.0f; + const float scale1 = -1.0f; + const float scale2 = 0.5f; + const float shift0 = 10.0f; + const float shift1 = -20.0f; + const float shift2 = 3.0f; + + const int bias_offset = (2 * input_dim) * condition_dim; + weights[bias_offset + 0] = scale0; + weights[bias_offset + 1] = scale1; + weights[bias_offset + 2] = scale2; + weights[bias_offset + 3] = shift0; + weights[bias_offset + 4] = shift1; + weights[bias_offset + 5] = shift2; + + auto it = weights.begin(); + film.set_weights_(it); + assert(it == weights.end()); + + const int num_frames = 4; + Eigen::MatrixXf input(input_dim, num_frames); + input << 1.0f, 2.0f, 3.0f, 4.0f, // + -1.0f, -2.0f, -3.0f, -4.0f, // + 0.25f, 0.5f, 0.75f, 1.0f; + + // Keep a copy of the original input for comparison + const Eigen::MatrixXf input_original = input; + + Eigen::MatrixXf condition(condition_dim, num_frames); + condition.setRandom(); // doesn't matter because weights are zero + + // Test in-place processing + film.Process_(input, condition, num_frames); + + // Verify that input was modified in-place + // Expected: output = input * scale + shift (elementwise) + const float scales[3] = {scale0, scale1, scale2}; + const float shifts[3] = {shift0, shift1, shift2}; + for (int c = 0; c < input_dim; c++) + { + for (int t = 0; t < num_frames; t++) + { + const float expected = input_original(c, t) * scales[c] + shifts[c]; + assert(std::abs(input(c, t) - expected) < 1e-6f); + } + } + + // Verify that Process_ produces the same result as Process + Eigen::MatrixXf input_for_process = input_original; + film.Process(input_for_process, condition, num_frames); + const auto out = film.GetOutput().leftCols(num_frames); + + for (int c = 0; c < input_dim; c++) + { + for (int t = 0; t < num_frames; t++) + { + assert(std::abs(input(c, t) - out(c, t)) < 1e-6f); + } + } +} + +void test_process_inplace_scale_only() +{ + const int condition_dim = 2; + const int input_dim = 3; + nam::FiLM film(condition_dim, input_dim, /*shift=*/false); + + const int maxBufferSize = 64; + film.SetMaxBufferSize(maxBufferSize); + + // Internal Conv1x1 is (condition_dim -> input_dim) in scale-only mode. + std::vector weights; + weights.resize(input_dim * condition_dim + input_dim, 0.0f); + + const float scale0 = 2.0f; + const float scale1 = -1.0f; + const float scale2 = 0.5f; + + const int bias_offset = input_dim * condition_dim; + weights[bias_offset + 0] = scale0; + weights[bias_offset + 1] = scale1; + weights[bias_offset + 2] = scale2; + + auto it = weights.begin(); + film.set_weights_(it); + assert(it == weights.end()); + + const int num_frames = 4; + Eigen::MatrixXf input(input_dim, num_frames); + input << 1.0f, 2.0f, 3.0f, 4.0f, // + -1.0f, -2.0f, -3.0f, -4.0f, // + 0.25f, 0.5f, 0.75f, 1.0f; + + // Keep a copy of the original input for comparison + const Eigen::MatrixXf input_original = input; + + Eigen::MatrixXf condition(condition_dim, num_frames); + condition.setRandom(); // doesn't matter because weights are zero + + // Test in-place processing + film.Process_(input, condition, num_frames); + + // Verify that input was modified in-place + const float scales[3] = {scale0, scale1, scale2}; + for (int c = 0; c < input_dim; c++) + { + for (int t = 0; t < num_frames; t++) + { + const float expected = input_original(c, t) * scales[c]; + assert(std::abs(input(c, t) - expected) < 1e-6f); + } + } + + // Verify that Process_ produces the same result as Process + Eigen::MatrixXf input_for_process = input_original; + film.Process(input_for_process, condition, num_frames); + const auto out = film.GetOutput().leftCols(num_frames); + + for (int c = 0; c < input_dim; c++) + { + for (int t = 0; t < num_frames; t++) + { + assert(std::abs(input(c, t) - out(c, t)) < 1e-6f); + } + } +} + +void test_process_inplace_partial_frames() +{ + // Test that only the first num_frames columns are modified when input has more columns + const int condition_dim = 2; + const int input_dim = 3; + nam::FiLM film(condition_dim, input_dim, /*shift=*/true); + + const int maxBufferSize = 64; + film.SetMaxBufferSize(maxBufferSize); + + // Configure the internal Conv1x1 with zero weights and fixed biases + std::vector weights; + weights.resize((2 * input_dim) * condition_dim + (2 * input_dim), 0.0f); + + const float scale0 = 2.0f; + const float scale1 = -1.0f; + const float scale2 = 0.5f; + const float shift0 = 10.0f; + const float shift1 = -20.0f; + const float shift2 = 3.0f; + + const int bias_offset = (2 * input_dim) * condition_dim; + weights[bias_offset + 0] = scale0; + weights[bias_offset + 1] = scale1; + weights[bias_offset + 2] = scale2; + weights[bias_offset + 3] = shift0; + weights[bias_offset + 4] = shift1; + weights[bias_offset + 5] = shift2; + + auto it = weights.begin(); + film.set_weights_(it); + assert(it == weights.end()); + + // Create input with more columns than num_frames + const int total_cols = 8; + const int num_frames = 4; + Eigen::MatrixXf input(input_dim, total_cols); + input << 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, // + -1.0f, -2.0f, -3.0f, -4.0f, -5.0f, -6.0f, -7.0f, -8.0f, // + 0.25f, 0.5f, 0.75f, 1.0f, 1.25f, 1.5f, 1.75f, 2.0f; + + // Keep a copy of the original input + const Eigen::MatrixXf input_original = input; + + Eigen::MatrixXf condition(condition_dim, num_frames); + condition.setRandom(); + + // Test in-place processing with only num_frames frames + film.Process_(input, condition, num_frames); + + // Verify that only the first num_frames columns were modified + const float scales[3] = {scale0, scale1, scale2}; + const float shifts[3] = {shift0, shift1, shift2}; + for (int c = 0; c < input_dim; c++) + { + // First num_frames should be modified + for (int t = 0; t < num_frames; t++) + { + const float expected = input_original(c, t) * scales[c] + shifts[c]; + assert(std::abs(input(c, t) - expected) < 1e-6f); + } + // Remaining columns should be unchanged + for (int t = num_frames; t < total_cols; t++) + { + assert(std::abs(input(c, t) - input_original(c, t)) < 1e-6f); + } + } +} } // namespace test_film From 7e3c071e04518205133441ae3709272042655c34 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Tue, 20 Jan 2026 15:37:10 -0800 Subject: [PATCH 07/11] Finish wavenet::Layer::Process() FiLM chanegs --- NAM/wavenet.cpp | 56 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 13 deletions(-) diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp index 2393963..673e14e 100644 --- a/NAM/wavenet.cpp +++ b/NAM/wavenet.cpp @@ -52,27 +52,31 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma const long bottleneck = this->_bottleneck; // Use the actual bottleneck value, not the doubled output channels // Step 1: input convolutions - if (this->_conv_pre_film != nullptr) + if (this->_conv_pre_film) { - this->_conv_pre_film->Process(input, condition, num_frames); - this->_conv.Process(this->_conv_pre_film->GetOutput(), num_frames); + this->_conv_pre_film->Process_(input, condition, num_frames); } - else + this->_conv.Process(input, num_frames); + if (this->_conv_post_film) { - this->_conv.Process(input, num_frames); + this->_conv_post_film->Process_(this->_conv.GetOutput(), condition, num_frames); } + if (this->_input_mixin_pre_film) + { + this->_input_mixin_pre_film->Process_(condition, num_frames); + } this->_input_mixin.process_(condition, num_frames); - if (this->_conv_post_film != nullptr) + if (this->_input_mixin_post_film) { - this->_conv_post_film->Process(this->_conv.GetOutput(), condition, num_frames); - this->_z.leftCols(num_frames).noalias() = this->_conv_post_film->GetOutput().leftCols(num_frames); + this->_input_mixin_post_film->Process_(this->_input_mixin.GetOutput(), condition, num_frames); } - else + this->_z.leftCols(num_frames).noalias() = + _conv.GetOutput().leftCols(num_frames) + _input_mixin.GetOutput().leftCols(num_frames); + if (this->_activation_pre_film) { - this->_z.leftCols(num_frames).noalias() = this->_conv.GetOutput().leftCols(num_frames); + this->_activation_pre_film->Process_(this->_z, num_frames); } - this->_z.leftCols(num_frames).noalias() += _input_mixin.GetOutput().leftCols(num_frames); // Step 2 & 3: activation and 1x1 // @@ -83,6 +87,10 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma if (this->_gating_mode == GatingMode::NONE) { this->_activation->apply(this->_z.leftCols(num_frames)); + if (this->_activation_post_film) + { + this->_activation_post_film->Process_(this->_z, num_frames); + } _1x1.process_(_z, num_frames); } else if (this->_gating_mode == GatingMode::GATED) @@ -92,6 +100,10 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma auto input_block = this->_z.leftCols(num_frames); auto output_block = this->_z.topRows(bottleneck).leftCols(num_frames); this->_gating_activation->apply(input_block, output_block); + if (this->_gating_activation_post_film) + { + this->_activation_post_film->Process_(this->_z.topRows(bottleneck), num_frames); + } _1x1.process_(this->_z.topRows(bottleneck), num_frames); } else if (this->_gating_mode == GatingMode::BLENDED) @@ -101,19 +113,37 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma auto input_block = this->_z.leftCols(num_frames); auto output_block = this->_z.topRows(bottleneck).leftCols(num_frames); this->_blending_activation->apply(input_block, output_block); + if (this->_activation_post_film) + { + this->_activation_post_film->Process_(this->_z.topRows(bottleneck), num_frames); + } _1x1.process_(this->_z.topRows(bottleneck), num_frames); + if (this->_1x1_post_film) + { + this->_1x1_post_film->Process_(this->_1x1.GetOutput(), num_frames); + } } if (this->_head1x1) { if (this->_gating_mode == GatingMode::NONE) + { this->_head1x1->process_(this->_z.leftCols(num_frames), num_frames); + } else - this->_head1x1->process(this->_z.topRows(bottleneck).leftCols(num_frames), num_frames); + { + this->_head1x1->process_(this->_z.topRows(bottleneck).leftCols(num_frames), num_frames); + } + this->_head1x1->process(this->_z.topRows(bottleneck).leftCols(num_frames), num_frames); + if (this->_head1x1_post_film) + { + this->_head1x1_post_film->Process_(this->_head1x1->GetOutput(), num_frames); + } this->_output_head.leftCols(num_frames).noalias() = this->_head1x1->GetOutput().leftCols(num_frames); } - else + else // No head 1x1 { + // (No FiLM) // Store output to head (skip connection: activated conv output) if (this->_gating_mode == GatingMode::NONE) this->_output_head.leftCols(num_frames).noalias() = this->_z.leftCols(num_frames); From 2367323f71efff26a13d1fb27dfa9284118c7f1b Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Tue, 20 Jan 2026 15:57:02 -0800 Subject: [PATCH 08/11] Add FiLM initialization to Layer constructor - Fixed _FiLMParams struct syntax - Added FiLM parameter members to LayerArrayParams (9 FiLM layers) - Updated _Layer constructor to accept and initialize FiLM objects - Updated _LayerArray constructor to pass FiLM parameters to layers - Added factory parsing for FiLM parameters from JSON config - FiLM objects are initialized when active=true in parameters - Added SetMaxBufferSize() and set_weights_() support for all FiLM objects --- NAM/wavenet.cpp | 138 ++++++++++++++++++++++++++++++++++++++++-------- NAM/wavenet.h | 106 +++++++++++++++++++++++++++++++++++-- 2 files changed, 220 insertions(+), 24 deletions(-) diff --git a/NAM/wavenet.cpp b/NAM/wavenet.cpp index 673e14e..b0caadb 100644 --- a/NAM/wavenet.cpp +++ b/NAM/wavenet.cpp @@ -34,6 +34,25 @@ void nam::wavenet::_Layer::SetMaxBufferSize(const int maxBufferSize) this->_output_head.resize(this->_bottleneck, maxBufferSize); this->_output_head.setZero(); // Ensure consistent initialization across platforms } + // Set max buffer size for FiLM objects + if (this->_conv_pre_film) + this->_conv_pre_film->SetMaxBufferSize(maxBufferSize); + if (this->_conv_post_film) + this->_conv_post_film->SetMaxBufferSize(maxBufferSize); + if (this->_input_mixin_pre_film) + this->_input_mixin_pre_film->SetMaxBufferSize(maxBufferSize); + if (this->_input_mixin_post_film) + this->_input_mixin_post_film->SetMaxBufferSize(maxBufferSize); + if (this->_activation_pre_film) + this->_activation_pre_film->SetMaxBufferSize(maxBufferSize); + if (this->_activation_post_film) + this->_activation_post_film->SetMaxBufferSize(maxBufferSize); + if (this->_gating_activation_post_film) + this->_gating_activation_post_film->SetMaxBufferSize(maxBufferSize); + if (this->_1x1_post_film) + this->_1x1_post_film->SetMaxBufferSize(maxBufferSize); + if (this->_head1x1_post_film) + this->_head1x1_post_film->SetMaxBufferSize(maxBufferSize); } void nam::wavenet::_Layer::set_weights_(std::vector::iterator& weights) @@ -45,6 +64,25 @@ void nam::wavenet::_Layer::set_weights_(std::vector::iterator& weights) { this->_head1x1->set_weights_(weights); } + // Set weights for FiLM objects + if (this->_conv_pre_film) + this->_conv_pre_film->set_weights_(weights); + if (this->_conv_post_film) + this->_conv_post_film->set_weights_(weights); + if (this->_input_mixin_pre_film) + this->_input_mixin_pre_film->set_weights_(weights); + if (this->_input_mixin_post_film) + this->_input_mixin_post_film->set_weights_(weights); + if (this->_activation_pre_film) + this->_activation_pre_film->set_weights_(weights); + if (this->_activation_post_film) + this->_activation_post_film->set_weights_(weights); + if (this->_gating_activation_post_film) + this->_gating_activation_post_film->set_weights_(weights); + if (this->_1x1_post_film) + this->_1x1_post_film->set_weights_(weights); + if (this->_head1x1_post_film) + this->_head1x1_post_film->set_weights_(weights); } void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::MatrixXf& condition, const int num_frames) @@ -54,28 +92,40 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma // Step 1: input convolutions if (this->_conv_pre_film) { - this->_conv_pre_film->Process_(input, condition, num_frames); + // Use Process() instead of Process_() since input is const + this->_conv_pre_film->Process(input, condition, num_frames); + this->_conv.Process(this->_conv_pre_film->GetOutput(), num_frames); + } + else + { + this->_conv.Process(input, num_frames); } - this->_conv.Process(input, num_frames); if (this->_conv_post_film) { - this->_conv_post_film->Process_(this->_conv.GetOutput(), condition, num_frames); + Eigen::MatrixXf& conv_output = this->_conv.GetOutput(); + this->_conv_post_film->Process_(conv_output, condition, num_frames); } if (this->_input_mixin_pre_film) { - this->_input_mixin_pre_film->Process_(condition, num_frames); + // Use Process() instead of Process_() since condition is const + this->_input_mixin_pre_film->Process(condition, condition, num_frames); + this->_input_mixin.process_(this->_input_mixin_pre_film->GetOutput(), num_frames); + } + else + { + this->_input_mixin.process_(condition, num_frames); } - this->_input_mixin.process_(condition, num_frames); if (this->_input_mixin_post_film) { - this->_input_mixin_post_film->Process_(this->_input_mixin.GetOutput(), condition, num_frames); + Eigen::MatrixXf& input_mixin_output = this->_input_mixin.GetOutput(); + this->_input_mixin_post_film->Process_(input_mixin_output, condition, num_frames); } this->_z.leftCols(num_frames).noalias() = _conv.GetOutput().leftCols(num_frames) + _input_mixin.GetOutput().leftCols(num_frames); if (this->_activation_pre_film) { - this->_activation_pre_film->Process_(this->_z, num_frames); + this->_activation_pre_film->Process_(this->_z, condition, num_frames); } // Step 2 & 3: activation and 1x1 @@ -89,7 +139,7 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma this->_activation->apply(this->_z.leftCols(num_frames)); if (this->_activation_post_film) { - this->_activation_post_film->Process_(this->_z, num_frames); + this->_activation_post_film->Process_(this->_z, condition, num_frames); } _1x1.process_(_z, num_frames); } @@ -102,7 +152,10 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma this->_gating_activation->apply(input_block, output_block); if (this->_gating_activation_post_film) { - this->_activation_post_film->Process_(this->_z.topRows(bottleneck), num_frames); + // Use Process() for blocks and copy result back + this->_gating_activation_post_film->Process(this->_z.topRows(bottleneck), condition, num_frames); + this->_z.topRows(bottleneck).leftCols(num_frames).noalias() = + this->_gating_activation_post_film->GetOutput().leftCols(num_frames); } _1x1.process_(this->_z.topRows(bottleneck), num_frames); } @@ -115,12 +168,16 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma this->_blending_activation->apply(input_block, output_block); if (this->_activation_post_film) { - this->_activation_post_film->Process_(this->_z.topRows(bottleneck), num_frames); + // Use Process() for blocks and copy result back + this->_activation_post_film->Process(this->_z.topRows(bottleneck), condition, num_frames); + this->_z.topRows(bottleneck).leftCols(num_frames).noalias() = + this->_activation_post_film->GetOutput().leftCols(num_frames); } _1x1.process_(this->_z.topRows(bottleneck), num_frames); if (this->_1x1_post_film) { - this->_1x1_post_film->Process_(this->_1x1.GetOutput(), num_frames); + Eigen::MatrixXf& _1x1_output = this->_1x1.GetOutput(); + this->_1x1_post_film->Process_(_1x1_output, condition, num_frames); } } @@ -137,7 +194,8 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma this->_head1x1->process(this->_z.topRows(bottleneck).leftCols(num_frames), num_frames); if (this->_head1x1_post_film) { - this->_head1x1_post_film->Process_(this->_head1x1->GetOutput(), num_frames); + Eigen::MatrixXf& head1x1_output = this->_head1x1->GetOutput(); + this->_head1x1_post_film->Process_(head1x1_output, condition, num_frames); } this->_output_head.leftCols(num_frames).noalias() = this->_head1x1->GetOutput().leftCols(num_frames); } @@ -158,19 +216,26 @@ void nam::wavenet::_Layer::Process(const Eigen::MatrixXf& input, const Eigen::Ma // LayerArray ================================================================= -nam::wavenet::_LayerArray::_LayerArray(const int input_size, const int condition_size, const int head_size, - const int channels, const int bottleneck, const int kernel_size, - const std::vector& dilations, const std::string activation, - const GatingMode gating_mode, const bool head_bias, const int groups_input, - const int groups_1x1, const Head1x1Params& head1x1_params, - const std::string& secondary_activation) +nam::wavenet::_LayerArray::_LayerArray( + const int input_size, const int condition_size, const int head_size, const int channels, const int bottleneck, + const int kernel_size, const std::vector& dilations, const std::string activation, const GatingMode gating_mode, + const bool head_bias, const int groups_input, const int groups_1x1, const Head1x1Params& head1x1_params, + const std::string& secondary_activation, const _FiLMParams& conv_pre_film_params, + const _FiLMParams& conv_post_film_params, const _FiLMParams& input_mixin_pre_film_params, + const _FiLMParams& input_mixin_post_film_params, const _FiLMParams& activation_pre_film_params, + const _FiLMParams& activation_post_film_params, const _FiLMParams& gating_activation_post_film_params, + const _FiLMParams& _1x1_post_film_params, const _FiLMParams& head1x1_post_film_params) : _rechannel(input_size, channels, false) , _head_rechannel(bottleneck, head_size, head_bias) , _bottleneck(bottleneck) { for (size_t i = 0; i < dilations.size(); i++) this->_layers.push_back(_Layer(condition_size, channels, bottleneck, kernel_size, dilations[i], activation, - gating_mode, groups_input, groups_1x1, head1x1_params, secondary_activation)); + gating_mode, groups_input, groups_1x1, head1x1_params, secondary_activation, + conv_pre_film_params, conv_post_film_params, input_mixin_pre_film_params, + input_mixin_post_film_params, activation_pre_film_params, + activation_post_film_params, gating_activation_post_film_params, + _1x1_post_film_params, head1x1_post_film_params)); } void nam::wavenet::_LayerArray::SetMaxBufferSize(const int maxBufferSize) @@ -322,7 +387,12 @@ nam::wavenet::WaveNet::WaveNet(const int in_channels, layer_array_params[i].channels, layer_array_params[i].bottleneck, layer_array_params[i].kernel_size, layer_array_params[i].dilations, layer_array_params[i].activation, layer_array_params[i].gating_mode, layer_array_params[i].head_bias, layer_array_params[i].groups_input, layer_array_params[i].groups_1x1, - layer_array_params[i].head1x1_params, layer_array_params[i].secondary_activation)); + layer_array_params[i].head1x1_params, layer_array_params[i].secondary_activation, + layer_array_params[i].conv_pre_film_params, layer_array_params[i].conv_post_film_params, + layer_array_params[i].input_mixin_pre_film_params, layer_array_params[i].input_mixin_post_film_params, + layer_array_params[i].activation_pre_film_params, layer_array_params[i].activation_post_film_params, + layer_array_params[i].gating_activation_post_film_params, layer_array_params[i]._1x1_post_film_params, + layer_array_params[i].head1x1_post_film_params)); if (i > 0) if (layer_array_params[i].channels != layer_array_params[i - 1].head_size) { @@ -577,9 +647,35 @@ std::unique_ptr nam::wavenet::Factory(const nlohmann::json& config, st int head1x1_groups = layer_config.value("head1x1_groups", 1); nam::wavenet::Head1x1Params head1x1_params(head1x1_active, head1x1_out_channels, head1x1_groups); + // Helper function to parse FiLM parameters + auto parse_film_params = [&layer_config](const std::string& key) -> nam::wavenet::_FiLMParams { + if (layer_config.find(key) == layer_config.end() || layer_config[key] == false) + { + return nam::wavenet::_FiLMParams(false, false); + } + const nlohmann::json& film_config = layer_config[key]; + bool active = film_config.value("active", true); + bool shift = film_config.value("shift", true); + return nam::wavenet::_FiLMParams(active, shift); + }; + + // Parse FiLM parameters + nam::wavenet::_FiLMParams conv_pre_film_params = parse_film_params("conv_pre_film"); + nam::wavenet::_FiLMParams conv_post_film_params = parse_film_params("conv_post_film"); + nam::wavenet::_FiLMParams input_mixin_pre_film_params = parse_film_params("input_mixin_pre_film"); + nam::wavenet::_FiLMParams input_mixin_post_film_params = parse_film_params("input_mixin_post_film"); + nam::wavenet::_FiLMParams activation_pre_film_params = parse_film_params("activation_pre_film"); + nam::wavenet::_FiLMParams activation_post_film_params = parse_film_params("activation_post_film"); + nam::wavenet::_FiLMParams gating_activation_post_film_params = parse_film_params("gating_activation_post_film"); + nam::wavenet::_FiLMParams _1x1_post_film_params = parse_film_params("1x1_post_film"); + nam::wavenet::_FiLMParams head1x1_post_film_params = parse_film_params("head1x1_post_film"); + layer_array_params.push_back(nam::wavenet::LayerArrayParams( input_size, condition_size, head_size, channels, bottleneck, kernel_size, dilations, activation, gating_mode, - head_bias, groups, groups_1x1, head1x1_params, secondary_activation)); + head_bias, groups, groups_1x1, head1x1_params, secondary_activation, conv_pre_film_params, conv_post_film_params, + input_mixin_pre_film_params, input_mixin_post_film_params, activation_pre_film_params, + activation_post_film_params, gating_activation_post_film_params, _1x1_post_film_params, + head1x1_post_film_params)); } const bool with_head = !config["head"].is_null(); const float head_scale = config["head_scale"]; diff --git a/NAM/wavenet.h b/NAM/wavenet.h index e411385..f8cb502 100644 --- a/NAM/wavenet.h +++ b/NAM/wavenet.h @@ -11,6 +11,7 @@ #include "dsp.h" #include "conv1d.h" #include "gating_activations.h" +#include "film.h" namespace nam { @@ -45,13 +46,29 @@ struct Head1x1Params const int groups; }; +struct _FiLMParams +{ + _FiLMParams(bool active_, bool shift_) + : active(active_) + , shift(shift_) + { + } + const bool active; + const bool shift; +}; + class _Layer { public: // New constructor with GatingMode enum and configurable activations _Layer(const int condition_size, const int channels, const int bottleneck, const int kernel_size, const int dilation, const std::string activation, const GatingMode gating_mode, const int groups_input, const int groups_1x1, - const Head1x1Params& head1x1_params, const std::string& secondary_activation) + const Head1x1Params& head1x1_params, const std::string& secondary_activation, + const _FiLMParams& conv_pre_film_params, const _FiLMParams& conv_post_film_params, + const _FiLMParams& input_mixin_pre_film_params, const _FiLMParams& input_mixin_post_film_params, + const _FiLMParams& activation_pre_film_params, const _FiLMParams& activation_post_film_params, + const _FiLMParams& gating_activation_post_film_params, const _FiLMParams& _1x1_post_film_params, + const _FiLMParams& head1x1_post_film_params) : _conv(channels, (gating_mode != GatingMode::NONE) ? 2 * bottleneck : bottleneck, kernel_size, true, dilation) , _input_mixin(condition_size, (gating_mode != GatingMode::NONE) ? 2 * bottleneck : bottleneck, false) , _1x1(bottleneck, channels, groups_1x1) @@ -84,6 +101,50 @@ class _Layer if (!secondary_activation.empty()) throw std::invalid_argument("secondary_activation provided for none mode"); } + + // Initialize FiLM objects + if (conv_pre_film_params.active) + { + _conv_pre_film = std::make_unique(condition_size, channels, conv_pre_film_params.shift); + } + if (conv_post_film_params.active) + { + const int conv_out_channels = (gating_mode != GatingMode::NONE) ? 2 * bottleneck : bottleneck; + _conv_post_film = std::make_unique(condition_size, conv_out_channels, conv_post_film_params.shift); + } + if (input_mixin_pre_film_params.active) + { + _input_mixin_pre_film = std::make_unique(condition_size, condition_size, input_mixin_pre_film_params.shift); + } + if (input_mixin_post_film_params.active) + { + const int input_mixin_out_channels = (gating_mode != GatingMode::NONE) ? 2 * bottleneck : bottleneck; + _input_mixin_post_film = + std::make_unique(condition_size, input_mixin_out_channels, input_mixin_post_film_params.shift); + } + if (activation_pre_film_params.active) + { + const int z_channels = (gating_mode != GatingMode::NONE) ? 2 * bottleneck : bottleneck; + _activation_pre_film = std::make_unique(condition_size, z_channels, activation_pre_film_params.shift); + } + if (activation_post_film_params.active) + { + _activation_post_film = std::make_unique(condition_size, bottleneck, activation_post_film_params.shift); + } + if (gating_activation_post_film_params.active) + { + _gating_activation_post_film = + std::make_unique(condition_size, bottleneck, gating_activation_post_film_params.shift); + } + if (_1x1_post_film_params.active) + { + _1x1_post_film = std::make_unique(condition_size, channels, _1x1_post_film_params.shift); + } + if (head1x1_post_film_params.active && head1x1_params.active) + { + _head1x1_post_film = + std::make_unique(condition_size, head1x1_params.out_channels, head1x1_post_film_params.shift); + } }; // Resize all arrays to be able to process `maxBufferSize` frames. @@ -141,6 +202,17 @@ class _Layer // Gating/blending activation objects std::unique_ptr _gating_activation; std::unique_ptr _blending_activation; + + // FiLM objects for feature-wise linear modulation + std::unique_ptr _conv_pre_film; + std::unique_ptr _conv_post_film; + std::unique_ptr _input_mixin_pre_film; + std::unique_ptr _input_mixin_post_film; + std::unique_ptr _activation_pre_film; + std::unique_ptr _activation_post_film; + std::unique_ptr _gating_activation_post_film; + std::unique_ptr _1x1_post_film; + std::unique_ptr _head1x1_post_film; }; class LayerArrayParams @@ -150,7 +222,12 @@ class LayerArrayParams const int bottleneck_, const int kernel_size_, const std::vector&& dilations_, const std::string activation_, const GatingMode gating_mode_, const bool head_bias_, const int groups_input, const int groups_1x1_, const Head1x1Params& head1x1_params_, - const std::string& secondary_activation_) + const std::string& secondary_activation_, const _FiLMParams& conv_pre_film_params_, + const _FiLMParams& conv_post_film_params_, const _FiLMParams& input_mixin_pre_film_params_, + const _FiLMParams& input_mixin_post_film_params_, const _FiLMParams& activation_pre_film_params_, + const _FiLMParams& activation_post_film_params_, + const _FiLMParams& gating_activation_post_film_params_, const _FiLMParams& _1x1_post_film_params_, + const _FiLMParams& head1x1_post_film_params_) : input_size(input_size_) , condition_size(condition_size_) , head_size(head_size_) @@ -165,6 +242,15 @@ class LayerArrayParams , groups_1x1(groups_1x1_) , head1x1_params(head1x1_params_) , secondary_activation(secondary_activation_) + , conv_pre_film_params(conv_pre_film_params_) + , conv_post_film_params(conv_post_film_params_) + , input_mixin_pre_film_params(input_mixin_pre_film_params_) + , input_mixin_post_film_params(input_mixin_post_film_params_) + , activation_pre_film_params(activation_pre_film_params_) + , activation_post_film_params(activation_post_film_params_) + , gating_activation_post_film_params(gating_activation_post_film_params_) + , _1x1_post_film_params(_1x1_post_film_params_) + , head1x1_post_film_params(head1x1_post_film_params_) { } @@ -182,6 +268,15 @@ class LayerArrayParams const int groups_1x1; const Head1x1Params head1x1_params; const std::string secondary_activation; + const _FiLMParams conv_pre_film_params; + const _FiLMParams conv_post_film_params; + const _FiLMParams input_mixin_pre_film_params; + const _FiLMParams input_mixin_post_film_params; + const _FiLMParams activation_pre_film_params; + const _FiLMParams activation_post_film_params; + const _FiLMParams gating_activation_post_film_params; + const _FiLMParams _1x1_post_film_params; + const _FiLMParams head1x1_post_film_params; }; // An array of layers with the same channels, kernel sizes, activations. @@ -192,7 +287,12 @@ class _LayerArray _LayerArray(const int input_size, const int condition_size, const int head_size, const int channels, const int bottleneck, const int kernel_size, const std::vector& dilations, const std::string activation, const GatingMode gating_mode, const bool head_bias, const int groups_input, - const int groups_1x1, const Head1x1Params& head1x1_params, const std::string& secondary_activation); + const int groups_1x1, const Head1x1Params& head1x1_params, const std::string& secondary_activation, + const _FiLMParams& conv_pre_film_params, const _FiLMParams& conv_post_film_params, + const _FiLMParams& input_mixin_pre_film_params, const _FiLMParams& input_mixin_post_film_params, + const _FiLMParams& activation_pre_film_params, const _FiLMParams& activation_post_film_params, + const _FiLMParams& gating_activation_post_film_params, const _FiLMParams& _1x1_post_film_params, + const _FiLMParams& head1x1_post_film_params); void SetMaxBufferSize(const int maxBufferSize); From 9180b78614cb8d1796bb82a6e0720f2665dc32ec Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Tue, 20 Jan 2026 17:06:32 -0800 Subject: [PATCH 09/11] Add helper functions to test files for FiLM parameters - Created helper functions in all test files to reduce duplication: - make_default_film_params() - returns default (inactive) FiLM parameters - make_layer() - creates _Layer with default FiLM parameters - make_layer_array() - creates _LayerArray with default FiLM parameters - make_layer_array_params() - creates LayerArrayParams with default FiLM parameters - Updated all test files to use helper functions instead of direct constructors - Removed duplicate code from test files - All tests now compile and build successfully --- .../test_condition_processing.cpp | 25 +++++- tools/test/test_wavenet/test_full.cpp | 55 ++++++++---- tools/test/test_wavenet/test_head1x1.cpp | 39 ++++++--- tools/test/test_wavenet/test_layer.cpp | 47 +++++++---- tools/test/test_wavenet/test_layer_array.cpp | 33 ++++++-- .../test/test_wavenet/test_real_time_safe.cpp | 83 +++++++++++++++---- .../test/test_wavenet_configurable_gating.cpp | 82 ++++++++++++++---- 7 files changed, 278 insertions(+), 86 deletions(-) diff --git a/tools/test/test_wavenet/test_condition_processing.cpp b/tools/test/test_wavenet/test_condition_processing.cpp index c4b5b1a..2e3d46c 100644 --- a/tools/test/test_wavenet/test_condition_processing.cpp +++ b/tools/test/test_wavenet/test_condition_processing.cpp @@ -13,6 +13,25 @@ namespace test_wavenet { namespace test_condition_processing { +// Helper function to create default (inactive) FiLM parameters +static nam::wavenet::_FiLMParams make_default_film_params() +{ + return nam::wavenet::_FiLMParams(false, false); +} + +// Helper function to create LayerArrayParams with default FiLM parameters +static nam::wavenet::LayerArrayParams make_layer_array_params( + const int input_size, const int condition_size, const int head_size, const int channels, const int bottleneck, + const int kernel_size, std::vector&& dilations, const std::string activation, + const nam::wavenet::GatingMode gating_mode, const bool head_bias, const int groups_input, const int groups_1x1, + const nam::wavenet::Head1x1Params& head1x1_params, const std::string& secondary_activation) +{ + auto film_params = make_default_film_params(); + return nam::wavenet::LayerArrayParams( + input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations), activation, + gating_mode, head_bias, groups_input, groups_1x1, head1x1_params, secondary_activation, film_params, film_params, + film_params, film_params, film_params, film_params, film_params, film_params, film_params); +} // Helper function to create a simple WaveNet with specified input and output channels std::unique_ptr create_simple_wavenet( @@ -37,9 +56,9 @@ std::unique_ptr create_simple_wavenet( const int head1x1_groups = 1; nam::wavenet::Head1x1Params head1x1_params(head1x1_active, channels, head1x1_groups); - nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, bottleneck, kernel_size, - std::move(dilations), activation, gating_mode, head_bias, groups, groups_1x1, - head1x1_params, ""); + nam::wavenet::LayerArrayParams params = make_layer_array_params( + input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations), activation, + gating_mode, head_bias, groups, groups_1x1, head1x1_params, ""); std::vector layer_array_params; layer_array_params.push_back(std::move(params)); diff --git a/tools/test/test_wavenet/test_full.cpp b/tools/test/test_wavenet/test_full.cpp index ee498c1..4364390 100644 --- a/tools/test/test_wavenet/test_full.cpp +++ b/tools/test/test_wavenet/test_full.cpp @@ -12,6 +12,25 @@ namespace test_wavenet { namespace test_full { +// Helper function to create default (inactive) FiLM parameters +static nam::wavenet::_FiLMParams make_default_film_params() +{ + return nam::wavenet::_FiLMParams(false, false); +} + +// Helper function to create LayerArrayParams with default FiLM parameters +static nam::wavenet::LayerArrayParams make_layer_array_params( + const int input_size, const int condition_size, const int head_size, const int channels, const int bottleneck, + const int kernel_size, std::vector&& dilations, const std::string activation, + const nam::wavenet::GatingMode gating_mode, const bool head_bias, const int groups_input, const int groups_1x1, + const nam::wavenet::Head1x1Params& head1x1_params, const std::string& secondary_activation) +{ + auto film_params = make_default_film_params(); + return nam::wavenet::LayerArrayParams( + input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations), activation, + gating_mode, head_bias, groups_input, groups_1x1, head1x1_params, secondary_activation, film_params, film_params, + film_params, film_params, film_params, film_params, film_params, film_params, film_params); +} // Test full WaveNet model void test_wavenet_model() { @@ -32,9 +51,9 @@ void test_wavenet_model() const bool head1x1_active = false; nam::wavenet::Head1x1Params head1x1_params(head1x1_active, channels, 1); - nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, bottleneck, kernel_size, - std::move(dilations), activation, gating_mode, head_bias, groups, groups_1x1, - head1x1_params, ""); + nam::wavenet::LayerArrayParams params = make_layer_array_params( + input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations), activation, + gating_mode, head_bias, groups, groups_1x1, head1x1_params, ""); std::vector layer_array_params; layer_array_params.push_back(std::move(params)); @@ -98,14 +117,14 @@ void test_wavenet_multiple_arrays() const bool head1x1_active = false; nam::wavenet::Head1x1Params head1x1_params(head1x1_active, channels, 1); - layer_array_params.push_back(nam::wavenet::LayerArrayParams( - input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations1), activation, - gating_mode, head_bias, groups, groups_1x1, head1x1_params, "")); + layer_array_params.push_back(make_layer_array_params(input_size, condition_size, head_size, channels, bottleneck, + kernel_size, std::move(dilations1), activation, gating_mode, + head_bias, groups, groups_1x1, head1x1_params, "")); // Second array (head_size of first must match channels of second) std::vector dilations2{1}; - layer_array_params.push_back(nam::wavenet::LayerArrayParams( - head_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations2), activation, - gating_mode, head_bias, groups, groups_1x1, head1x1_params, "")); + layer_array_params.push_back(make_layer_array_params(head_size, condition_size, head_size, channels, bottleneck, + kernel_size, std::move(dilations2), activation, gating_mode, + head_bias, groups, groups_1x1, head1x1_params, "")); std::vector weights; // Array 0: rechannel, layer, head_rechannel @@ -156,9 +175,9 @@ void test_wavenet_zero_input() const bool head1x1_active = false; nam::wavenet::Head1x1Params head1x1_params(head1x1_active, channels, 1); - nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, bottleneck, kernel_size, - std::move(dilations), activation, gating_mode, head_bias, groups, groups_1x1, - head1x1_params, ""); + nam::wavenet::LayerArrayParams params = make_layer_array_params( + input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations), activation, + gating_mode, head_bias, groups, groups_1x1, head1x1_params, ""); std::vector layer_array_params; layer_array_params.push_back(std::move(params)); @@ -205,9 +224,9 @@ void test_wavenet_different_buffer_sizes() const bool head1x1_active = false; nam::wavenet::Head1x1Params head1x1_params(head1x1_active, channels, 1); - nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, bottleneck, kernel_size, - std::move(dilations), activation, gating_mode, head_bias, groups, groups_1x1, - head1x1_params, ""); + nam::wavenet::LayerArrayParams params = make_layer_array_params( + input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations), activation, + gating_mode, head_bias, groups, groups_1x1, head1x1_params, ""); std::vector layer_array_params; layer_array_params.push_back(std::move(params)); @@ -258,9 +277,9 @@ void test_wavenet_prewarm() nam::wavenet::Head1x1Params head1x1_params(head1x1_active, channels, 1); - nam::wavenet::LayerArrayParams params(input_size, condition_size, head_size, channels, bottleneck, kernel_size, - std::move(dilations), activation, gating_mode, head_bias, groups, groups_1x1, - head1x1_params, ""); + nam::wavenet::LayerArrayParams params = make_layer_array_params( + input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations), activation, + gating_mode, head_bias, groups, groups_1x1, head1x1_params, ""); std::vector layer_array_params; layer_array_params.push_back(std::move(params)); diff --git a/tools/test/test_wavenet/test_head1x1.cpp b/tools/test/test_wavenet/test_head1x1.cpp index 805a447..40d5abf 100644 --- a/tools/test/test_wavenet/test_head1x1.cpp +++ b/tools/test/test_wavenet/test_head1x1.cpp @@ -12,6 +12,25 @@ namespace test_wavenet { namespace test_head1x1 { +// Helper function to create default (inactive) FiLM parameters +static nam::wavenet::_FiLMParams make_default_film_params() +{ + return nam::wavenet::_FiLMParams(false, false); +} + +// Helper function to create a Layer with default FiLM parameters +static nam::wavenet::_Layer make_layer(const int condition_size, const int channels, const int bottleneck, + const int kernel_size, const int dilation, const std::string activation, + const nam::wavenet::GatingMode gating_mode, const int groups_input, + const int groups_1x1, const nam::wavenet::Head1x1Params& head1x1_params, + const std::string& secondary_activation) +{ + auto film_params = make_default_film_params(); + return nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gating_mode, + groups_input, groups_1x1, head1x1_params, secondary_activation, film_params, film_params, + film_params, film_params, film_params, film_params, film_params, film_params, + film_params); +} void test_head1x1_inactive() { @@ -28,8 +47,8 @@ void test_head1x1_inactive() const bool head1x1_active = false; nam::wavenet::Head1x1Params head1x1_params(head1x1_active, channels, 1); - auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, - groups_input, groups_1x1, head1x1_params, ""); + auto layer = make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, + groups_input, groups_1x1, head1x1_params, ""); // Set weights (same as non-gated layer test) // With bottleneck=channels=2: @@ -96,8 +115,8 @@ void test_head1x1_active() // Create head1x1 with different out_channels to verify it's being used nam::wavenet::Head1x1Params head1x1_params(head1x1_active, channels, head1x1_groups); - auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, - groups_input, groups_1x1, head1x1_params, ""); + auto layer = make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, + groups_input, groups_1x1, head1x1_params, ""); // Set weights: conv, input_mixin, 1x1, head1x1 // With bottleneck=channels=2: @@ -168,8 +187,8 @@ void test_head1x1_gated() const int head1x1_groups = 1; nam::wavenet::Head1x1Params head1x1_params(head1x1_active, channels, head1x1_groups); - auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, - groups_input, groups_1x1, head1x1_params, "Sigmoid"); + auto layer = make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, + groups_input, groups_1x1, head1x1_params, "Sigmoid"); // For gated: conv outputs 2*bottleneck, input_mixin outputs 2*bottleneck, 1x1 outputs channels // head1x1 outputs channels @@ -259,8 +278,8 @@ void test_head1x1_groups() const int head1x1_groups = 2; // Grouped head1x1 nam::wavenet::Head1x1Params head1x1_params(head1x1_active, channels, head1x1_groups); - auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, - groups_input, groups_1x1, head1x1_params, ""); + auto layer = make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, + groups_input, groups_1x1, head1x1_params, ""); // With grouped head1x1, we need to provide weights for each group // For groups=2, channels=4, bottleneck=4: each group has 2 in_channels and 2 out_channels @@ -340,8 +359,8 @@ void test_head1x1_different_out_channels() const int head1x1_groups = 1; nam::wavenet::Head1x1Params head1x1_params(head1x1_active, head1x1_out_channels, head1x1_groups); - auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, - groups_input, groups_1x1, head1x1_params, ""); + auto layer = make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, + groups_input, groups_1x1, head1x1_params, ""); // head1x1 should map from bottleneck to head1x1_out_channels // With channels=4, bottleneck=4, head1x1_out_channels=2: diff --git a/tools/test/test_wavenet/test_layer.cpp b/tools/test/test_wavenet/test_layer.cpp index ae43274..c2de221 100644 --- a/tools/test/test_wavenet/test_layer.cpp +++ b/tools/test/test_wavenet/test_layer.cpp @@ -12,6 +12,25 @@ namespace test_wavenet { namespace test_layer { +// Helper function to create default (inactive) FiLM parameters +static nam::wavenet::_FiLMParams make_default_film_params() +{ + return nam::wavenet::_FiLMParams(false, false); +} + +// Helper function to create a Layer with default FiLM parameters +static nam::wavenet::_Layer make_layer(const int condition_size, const int channels, const int bottleneck, + const int kernel_size, const int dilation, const std::string activation, + const nam::wavenet::GatingMode gating_mode, const int groups_input, + const int groups_1x1, const nam::wavenet::Head1x1Params& head1x1_params, + const std::string& secondary_activation) +{ + auto film_params = make_default_film_params(); + return nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gating_mode, + groups_input, groups_1x1, head1x1_params, secondary_activation, film_params, film_params, + film_params, film_params, film_params, film_params, film_params, film_params, + film_params); +} void test_gated() { // Assert correct nuemrics of the gating activation. @@ -26,8 +45,8 @@ void test_gated() const int groups_input = 1; const int groups_1x1 = 1; nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); - auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, - groups_input, groups_1x1, head1x1_params, "Sigmoid"); + auto layer = make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, + groups_input, groups_1x1, head1x1_params, "Sigmoid"); // Conv, input mixin, 1x1 std::vector weights{ @@ -103,8 +122,8 @@ void test_layer_getters() const int groups_1x1 = 1; nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); - auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, - groups_input, groups_1x1, head1x1_params, ""); + auto layer = make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, + groups_input, groups_1x1, head1x1_params, ""); assert(layer.get_channels() == channels); assert(layer.get_kernel_size() == kernelSize); @@ -125,8 +144,8 @@ void test_non_gated_layer() const int groups_1x1 = 1; nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); - auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, - groups_input, groups_1x1, head1x1_params, ""); + auto layer = make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, + groups_input, groups_1x1, head1x1_params, ""); // For non-gated: conv outputs 1 channel, input_mixin outputs 1 channel, 1x1 outputs 1 channel // Conv: (1,1,1) weight + (1,) bias @@ -193,8 +212,8 @@ void test_layer_activations() const int groups_input = 1; const int groups_1x1 = 1; nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); - auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, "Tanh", gating_mode, - groups_input, groups_1x1, head1x1_params, ""); + auto layer = make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, "Tanh", gating_mode, + groups_input, groups_1x1, head1x1_params, ""); std::vector weights{1.0f, 0.0f, 1.0f, 1.0f, 0.0f}; auto it = weights.begin(); layer.set_weights_(it); @@ -230,8 +249,8 @@ void test_layer_multichannel() const int groups_1x1 = 1; nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); - auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, - groups_input, groups_1x1, head1x1_params, ""); + auto layer = make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, + groups_input, groups_1x1, head1x1_params, ""); assert(layer.get_channels() == channels); @@ -300,8 +319,8 @@ void test_layer_bottleneck() const int groups_1x1 = 1; nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); - auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, - groups_input, groups_1x1, head1x1_params, ""); + auto layer = make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, + groups_input, groups_1x1, head1x1_params, ""); // With bottleneck < channels, the internal conv and input_mixin should have bottleneck channels, // but the 1x1 should map from bottleneck back to channels @@ -378,8 +397,8 @@ void test_layer_bottleneck_gated() const int groups_1x1 = 1; nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); - auto layer = nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, - groups_input, groups_1x1, head1x1_params, "Sigmoid"); + auto layer = make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, gating_mode, + groups_input, groups_1x1, head1x1_params, "Sigmoid"); // With gated=true and bottleneck=2, internal channels should be 2*bottleneck=4 // Conv: (channels, 2*bottleneck, kernelSize=1) = (4, 4, 1) + bias diff --git a/tools/test/test_wavenet/test_layer_array.cpp b/tools/test/test_wavenet/test_layer_array.cpp index d5916a2..aa93c35 100644 --- a/tools/test/test_wavenet/test_layer_array.cpp +++ b/tools/test/test_wavenet/test_layer_array.cpp @@ -12,6 +12,27 @@ namespace test_wavenet { namespace test_layer_array { +// Helper function to create default (inactive) FiLM parameters +static nam::wavenet::_FiLMParams make_default_film_params() +{ + return nam::wavenet::_FiLMParams(false, false); +} + +// Helper function to create a LayerArray with default FiLM parameters +static nam::wavenet::_LayerArray make_layer_array(const int input_size, const int condition_size, const int head_size, + const int channels, const int bottleneck, const int kernel_size, + const std::vector& dilations, const std::string activation, + const nam::wavenet::GatingMode gating_mode, const bool head_bias, + const int groups_input, const int groups_1x1, + const nam::wavenet::Head1x1Params& head1x1_params, + const std::string& secondary_activation) +{ + auto film_params = make_default_film_params(); + return nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, bottleneck, kernel_size, dilations, + activation, gating_mode, head_bias, groups_input, groups_1x1, head1x1_params, + secondary_activation, film_params, film_params, film_params, film_params, + film_params, film_params, film_params, film_params, film_params); +} // Test layer array construction and basic processing void test_layer_array_basic() { @@ -30,8 +51,8 @@ void test_layer_array_basic() nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); auto layer_array = - nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, bottleneck, kernel_size, dilations, - activation, gating_mode, head_bias, groups, groups_1x1, head1x1_params, ""); + make_layer_array(input_size, condition_size, head_size, channels, bottleneck, kernel_size, dilations, activation, + gating_mode, head_bias, groups, groups_1x1, head1x1_params, ""); const int numFrames = 4; layer_array.SetMaxBufferSize(numFrames); @@ -89,8 +110,8 @@ void test_layer_array_receptive_field() nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); auto layer_array = - nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, bottleneck, kernel_size, dilations, - activation, gating_mode, head_bias, groups, groups_1x1, head1x1_params, ""); + make_layer_array(input_size, condition_size, head_size, channels, bottleneck, kernel_size, dilations, activation, + gating_mode, head_bias, groups, groups_1x1, head1x1_params, ""); long rf = layer_array.get_receptive_field(); // Expected: sum of dilation * (kernel_size - 1) for each layer @@ -120,8 +141,8 @@ void test_layer_array_with_head_input() nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); auto layer_array = - nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, bottleneck, kernel_size, dilations, - activation, gating_mode, head_bias, groups, groups_1x1, head1x1_params, ""); + make_layer_array(input_size, condition_size, head_size, channels, bottleneck, kernel_size, dilations, activation, + gating_mode, head_bias, groups, groups_1x1, head1x1_params, ""); const int numFrames = 2; layer_array.SetMaxBufferSize(numFrames); diff --git a/tools/test/test_wavenet/test_real_time_safe.cpp b/tools/test/test_wavenet/test_real_time_safe.cpp index cc04150..8b7eda9 100644 --- a/tools/test/test_wavenet/test_real_time_safe.cpp +++ b/tools/test/test_wavenet/test_real_time_safe.cpp @@ -102,6 +102,55 @@ void operator delete[](void* ptr) noexcept namespace test_wavenet { +// Helper function to create default (inactive) FiLM parameters +static nam::wavenet::_FiLMParams make_default_film_params() +{ + return nam::wavenet::_FiLMParams(false, false); +} + +// Helper function to create a Layer with default FiLM parameters +static nam::wavenet::_Layer make_layer(const int condition_size, const int channels, const int bottleneck, + const int kernel_size, const int dilation, const std::string activation, + const nam::wavenet::GatingMode gating_mode, const int groups_input, + const int groups_1x1, const nam::wavenet::Head1x1Params& head1x1_params, + const std::string& secondary_activation) +{ + auto film_params = make_default_film_params(); + return nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gating_mode, + groups_input, groups_1x1, head1x1_params, secondary_activation, film_params, film_params, + film_params, film_params, film_params, film_params, film_params, film_params, + film_params); +} + +// Helper function to create a LayerArray with default FiLM parameters +static nam::wavenet::_LayerArray make_layer_array(const int input_size, const int condition_size, const int head_size, + const int channels, const int bottleneck, const int kernel_size, + const std::vector& dilations, const std::string activation, + const nam::wavenet::GatingMode gating_mode, const bool head_bias, + const int groups_input, const int groups_1x1, + const nam::wavenet::Head1x1Params& head1x1_params, + const std::string& secondary_activation) +{ + auto film_params = make_default_film_params(); + return nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, bottleneck, kernel_size, dilations, + activation, gating_mode, head_bias, groups_input, groups_1x1, head1x1_params, + secondary_activation, film_params, film_params, film_params, film_params, + film_params, film_params, film_params, film_params, film_params); +} + +// Helper function to create LayerArrayParams with default FiLM parameters +static nam::wavenet::LayerArrayParams make_layer_array_params( + const int input_size, const int condition_size, const int head_size, const int channels, const int bottleneck, + const int kernel_size, std::vector&& dilations, const std::string activation, + const nam::wavenet::GatingMode gating_mode, const bool head_bias, const int groups_input, const int groups_1x1, + const nam::wavenet::Head1x1Params& head1x1_params, const std::string& secondary_activation) +{ + auto film_params = make_default_film_params(); + return nam::wavenet::LayerArrayParams( + input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations), activation, + gating_mode, head_bias, groups_input, groups_1x1, head1x1_params, secondary_activation, film_params, film_params, + film_params, film_params, film_params, film_params, film_params, film_params, film_params); +} // Helper function to run allocation tracking tests // setup: Function to run before tracking starts (can be nullptr) // test: Function to run while tracking allocations (required) @@ -438,8 +487,8 @@ void test_layer_process_realtime_safe() const int groups_1x1 = 1; nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); - auto layer = nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, - gating_mode, groups_input, groups_1x1, head1x1_params, ""); + auto layer = make_layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gating_mode, + groups_input, groups_1x1, head1x1_params, ""); // Set weights std::vector weights{1.0f, 0.0f, // Conv (weight, bias) @@ -494,8 +543,8 @@ void test_layer_bottleneck_process_realtime_safe() const int groups_1x1 = 1; nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); - auto layer = nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, - gating_mode, groups_input, groups_1x1, head1x1_params, ""); + auto layer = make_layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gating_mode, + groups_input, groups_1x1, head1x1_params, ""); // Set weights for bottleneck != channels // Conv: (channels, bottleneck, kernelSize=1) = (4, 2, 1) + bias @@ -580,8 +629,8 @@ void test_layer_grouped_process_realtime_safe() const int groups_1x1 = 2; // 1x1 is also grouped nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); - auto layer = nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, - gating_mode, groups_input, groups_1x1, head1x1_params, ""); + auto layer = make_layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gating_mode, + groups_input, groups_1x1, head1x1_params, ""); // Set weights for grouped convolution // With groups_input=2, channels=4: each group has 2 in_channels and 2 out_channels @@ -693,8 +742,8 @@ void test_layer_array_process_realtime_safe() nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); auto layer_array = - nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, bottleneck, kernel_size, dilations, - activation, gating_mode, head_bias, groups, groups_1x1, head1x1_params, ""); + make_layer_array(input_size, condition_size, head_size, channels, bottleneck, kernel_size, dilations, activation, + gating_mode, head_bias, groups, groups_1x1, head1x1_params, ""); // Set weights: rechannel(1), layer(conv:1+1, input_mixin:1, 1x1:1+1), head_rechannel(1) std::vector weights{1.0f, // Rechannel @@ -762,14 +811,14 @@ void test_process_realtime_safe() const int bottleneck = channels; const int groups_1x1 = 1; nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); - layer_array_params.push_back(nam::wavenet::LayerArrayParams( - input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations1), activation, - gating_mode, head_bias, groups, groups_1x1, head1x1_params, "")); + layer_array_params.push_back(make_layer_array_params(input_size, condition_size, head_size, channels, bottleneck, + kernel_size, std::move(dilations1), activation, gating_mode, + head_bias, groups, groups_1x1, head1x1_params, "")); // Second layer array (head_size of first must match channels of second) std::vector dilations2{1}; - layer_array_params.push_back(nam::wavenet::LayerArrayParams( - head_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations2), activation, - gating_mode, head_bias, groups, groups_1x1, head1x1_params, "")); + layer_array_params.push_back(make_layer_array_params(head_size, condition_size, head_size, channels, bottleneck, + kernel_size, std::move(dilations2), activation, gating_mode, + head_bias, groups, groups_1x1, head1x1_params, "")); // Weights: Array 0: rechannel(1), layer(conv:1+1, input_mixin:1, 1x1:1+1), head_rechannel(1) // Array 1: same structure @@ -839,9 +888,9 @@ void test_process_3in_2out_realtime_safe() std::vector layer_array_params; std::vector dilations1{1}; - layer_array_params.push_back(nam::wavenet::LayerArrayParams( - input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations1), activation, - gating_mode, head_bias, groups, groups_1x1, head1x1_params, "")); + layer_array_params.push_back(make_layer_array_params(input_size, condition_size, head_size, channels, bottleneck, + kernel_size, std::move(dilations1), activation, gating_mode, + head_bias, groups, groups_1x1, head1x1_params, "")); // Calculate weights: // _rechannel: Conv1x1(3, 4, bias=false) = 3*4 = 12 weights diff --git a/tools/test/test_wavenet_configurable_gating.cpp b/tools/test/test_wavenet_configurable_gating.cpp index a98326c..33e946c 100644 --- a/tools/test/test_wavenet_configurable_gating.cpp +++ b/tools/test/test_wavenet_configurable_gating.cpp @@ -9,6 +9,55 @@ namespace test_wavenet_configurable_gating { +// Helper function to create default (inactive) FiLM parameters +static nam::wavenet::_FiLMParams make_default_film_params() +{ + return nam::wavenet::_FiLMParams(false, false); +} + +// Helper function to create a Layer with default FiLM parameters +static nam::wavenet::_Layer make_layer(const int condition_size, const int channels, const int bottleneck, + const int kernel_size, const int dilation, const std::string activation, + const nam::wavenet::GatingMode gating_mode, const int groups_input, + const int groups_1x1, const nam::wavenet::Head1x1Params& head1x1_params, + const std::string& secondary_activation) +{ + auto film_params = make_default_film_params(); + return nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gating_mode, + groups_input, groups_1x1, head1x1_params, secondary_activation, film_params, film_params, + film_params, film_params, film_params, film_params, film_params, film_params, + film_params); +} + +// Helper function to create LayerArrayParams with default FiLM parameters +static nam::wavenet::LayerArrayParams make_layer_array_params( + const int input_size, const int condition_size, const int head_size, const int channels, const int bottleneck, + const int kernel_size, std::vector&& dilations, const std::string activation, + const nam::wavenet::GatingMode gating_mode, const bool head_bias, const int groups_input, const int groups_1x1, + const nam::wavenet::Head1x1Params& head1x1_params, const std::string& secondary_activation) +{ + auto film_params = make_default_film_params(); + return nam::wavenet::LayerArrayParams( + input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::move(dilations), activation, + gating_mode, head_bias, groups_input, groups_1x1, head1x1_params, secondary_activation, film_params, film_params, + film_params, film_params, film_params, film_params, film_params, film_params, film_params); +} + +// Helper function to create a LayerArray with default FiLM parameters +static nam::wavenet::_LayerArray make_layer_array(const int input_size, const int condition_size, const int head_size, + const int channels, const int bottleneck, const int kernel_size, + const std::vector& dilations, const std::string activation, + const nam::wavenet::GatingMode gating_mode, const bool head_bias, + const int groups_input, const int groups_1x1, + const nam::wavenet::Head1x1Params& head1x1_params, + const std::string& secondary_activation) +{ + auto film_params = make_default_film_params(); + return nam::wavenet::_LayerArray(input_size, condition_size, head_size, channels, bottleneck, kernel_size, dilations, + activation, gating_mode, head_bias, groups_input, groups_1x1, head1x1_params, + secondary_activation, film_params, film_params, film_params, film_params, + film_params, film_params, film_params, film_params, film_params); +} class TestConfigurableGating { @@ -31,9 +80,8 @@ class TestConfigurableGating for (const auto& gating_act : gating_activations) { - auto layer = - nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, - nam::wavenet::GatingMode::GATED, groups_input, groups_1x1, head1x1_params, gating_act); + auto layer = make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, + nam::wavenet::GatingMode::GATED, groups_input, groups_1x1, head1x1_params, gating_act); // Verify that the layer was created successfully and has correct dimensions assert(layer.get_channels() == channels); @@ -59,8 +107,8 @@ class TestConfigurableGating for (const auto& blending_act : blending_activations) { auto layer = - nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, - nam::wavenet::GatingMode::BLENDED, groups_input, groups_1x1, head1x1_params, blending_act); + make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, + nam::wavenet::GatingMode::BLENDED, groups_input, groups_1x1, head1x1_params, blending_act); // Verify that the layer was created successfully and has correct dimensions assert(layer.get_channels() == channels); @@ -85,7 +133,7 @@ class TestConfigurableGating nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); // Test with different gating activations - auto params_gated = nam::wavenet::LayerArrayParams( + auto params_gated = make_layer_array_params( input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::vector{1, 2}, activation, nam::wavenet::GatingMode::GATED, head_bias, groups_input, groups_1x1, head1x1_params, "Tanh"); @@ -93,7 +141,7 @@ class TestConfigurableGating assert(params_gated.secondary_activation == "Tanh"); // Test with different blending activations - auto params_blended = nam::wavenet::LayerArrayParams( + auto params_blended = make_layer_array_params( input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::vector{1, 2}, activation, nam::wavenet::GatingMode::BLENDED, head_bias, groups_input, groups_1x1, head1x1_params, "ReLU"); @@ -117,9 +165,9 @@ class TestConfigurableGating const int groups_1x1 = 1; nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); - auto layer_array = nam::wavenet::_LayerArray( - input_size, condition_size, head_size, channels, bottleneck, kernel_size, std::vector{1}, activation, - nam::wavenet::GatingMode::GATED, head_bias, groups_input, groups_1x1, head1x1_params, "ReLU"); + auto layer_array = make_layer_array(input_size, condition_size, head_size, channels, bottleneck, kernel_size, + std::vector{1}, activation, nam::wavenet::GatingMode::GATED, head_bias, + groups_input, groups_1x1, head1x1_params, "ReLU"); // Verify that layers were created correctly by checking receptive field // This should be non-zero for a valid layer array @@ -181,16 +229,14 @@ class TestConfigurableGating // Create layers with different gating activations auto layer_sigmoid = - nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, - nam::wavenet::GatingMode::GATED, groups_input, groups_1x1, head1x1_params, "Sigmoid"); + make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, nam::wavenet::GatingMode::GATED, + groups_input, groups_1x1, head1x1_params, "Sigmoid"); - auto layer_tanh = - nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, - nam::wavenet::GatingMode::GATED, groups_input, groups_1x1, head1x1_params, "Tanh"); + auto layer_tanh = make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, + nam::wavenet::GatingMode::GATED, groups_input, groups_1x1, head1x1_params, "Tanh"); - auto layer_relu = - nam::wavenet::_Layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, - nam::wavenet::GatingMode::GATED, groups_input, groups_1x1, head1x1_params, "ReLU"); + auto layer_relu = make_layer(conditionSize, channels, bottleneck, kernelSize, dilation, activation, + nam::wavenet::GatingMode::GATED, groups_input, groups_1x1, head1x1_params, "ReLU"); // Set max buffer size for all layers const int num_frames = 10; From 3f2faebf869f8593b75a2a171f97e6f209d6ae80 Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Tue, 20 Jan 2026 17:13:49 -0800 Subject: [PATCH 10/11] Add real-time safety tests for Layer with all FiLMs active - Added helper function make_layer_all_films() to create Layer with all FiLMs active - Added test_layer_all_films_with_shift_realtime_safe() - tests with shift=true - Added test_layer_all_films_without_shift_realtime_safe() - tests with shift=false - Both tests verify no memory allocations occur during Process() calls - Tests cover all 8 active FiLMs (excluding head1x1_post_film when head1x1 is inactive) - Tests multiple buffer sizes (1, 8, 16, 32, 64, 128, 256) - Fixed weight calculations: shift=true needs 4 weights per FiLM, shift=false needs 2 weights per FiLM - Registered tests in run_tests.cpp --- tools/run_tests.cpp | 2 + .../test/test_wavenet/test_real_time_safe.cpp | 195 ++++++++++++++++++ 2 files changed, 197 insertions(+) diff --git a/tools/run_tests.cpp b/tools/run_tests.cpp index c72f594..7e2b06e 100644 --- a/tools/run_tests.cpp +++ b/tools/run_tests.cpp @@ -134,6 +134,8 @@ int main() test_wavenet::test_layer_process_realtime_safe(); test_wavenet::test_layer_bottleneck_process_realtime_safe(); test_wavenet::test_layer_grouped_process_realtime_safe(); + test_wavenet::test_layer_all_films_with_shift_realtime_safe(); + test_wavenet::test_layer_all_films_without_shift_realtime_safe(); test_wavenet::test_layer_array_process_realtime_safe(); test_wavenet::test_process_realtime_safe(); test_wavenet::test_process_3in_2out_realtime_safe(); diff --git a/tools/test/test_wavenet/test_real_time_safe.cpp b/tools/test/test_wavenet/test_real_time_safe.cpp index 8b7eda9..64470d9 100644 --- a/tools/test/test_wavenet/test_real_time_safe.cpp +++ b/tools/test/test_wavenet/test_real_time_safe.cpp @@ -151,6 +151,22 @@ static nam::wavenet::LayerArrayParams make_layer_array_params( gating_mode, head_bias, groups_input, groups_1x1, head1x1_params, secondary_activation, film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params, film_params); } + +// Helper function to create a Layer with all FiLMs active +static nam::wavenet::_Layer make_layer_all_films(const int condition_size, const int channels, const int bottleneck, + const int kernel_size, const int dilation, + const std::string activation, + const nam::wavenet::GatingMode gating_mode, const int groups_input, + const int groups_1x1, + const nam::wavenet::Head1x1Params& head1x1_params, + const std::string& secondary_activation, const bool shift) +{ + nam::wavenet::_FiLMParams film_params(true, shift); + return nam::wavenet::_Layer(condition_size, channels, bottleneck, kernel_size, dilation, activation, gating_mode, + groups_input, groups_1x1, head1x1_params, secondary_activation, film_params, film_params, + film_params, film_params, film_params, film_params, film_params, film_params, + film_params); +} // Helper function to run allocation tracking tests // setup: Function to run before tracking starts (can be nullptr) // test: Function to run while tracking allocations (required) @@ -723,6 +739,185 @@ void test_layer_grouped_process_realtime_safe() } } +// Test that Layer::Process() method with all FiLMs active (with shift) does not allocate or free memory +void test_layer_all_films_with_shift_realtime_safe() +{ + // Setup: Create a Layer with all FiLMs active and shift=true + const int condition_size = 1; + const int channels = 1; + const int bottleneck = channels; + const int kernel_size = 1; + const int dilation = 1; + const std::string activation = "ReLU"; + const nam::wavenet::GatingMode gating_mode = nam::wavenet::GatingMode::NONE; + const int groups_input = 1; + const int groups_1x1 = 1; + + nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); + auto layer = make_layer_all_films(condition_size, channels, bottleneck, kernel_size, dilation, activation, + gating_mode, groups_input, groups_1x1, head1x1_params, "", true); + + // Set weights + // Base layer weights: + // Conv: (channels, bottleneck, kernel_size) + bias = (1, 1, 1) + 1 = 2 weights + // Input mixin: (condition_size, bottleneck) = (1, 1) = 1 weight + // 1x1: (bottleneck, channels) + bias = (1, 1) + 1 = 2 weights + // Total base: 5 weights + + // FiLM weights (each FiLM uses Conv1x1: condition_size -> (shift ? 2 : 1) * input_dim with bias) + // With shift=true, each FiLM needs: (2 * input_dim) * condition_size weights + (2 * input_dim) biases + // conv_pre_film: condition_size=1, input_dim=channels=1 -> (2*1)*1 + (2*1) = 2 + 2 = 4 weights + // conv_post_film: condition_size=1, input_dim=bottleneck=1 -> (2*1)*1 + (2*1) = 2 + 2 = 4 weights + // input_mixin_pre_film: condition_size=1, input_dim=condition_size=1 -> (2*1)*1 + (2*1) = 2 + 2 = 4 weights + // input_mixin_post_film: condition_size=1, input_dim=bottleneck=1 -> (2*1)*1 + (2*1) = 2 + 2 = 4 weights + // activation_pre_film: condition_size=1, input_dim=bottleneck=1 -> (2*1)*1 + (2*1) = 2 + 2 = 4 weights + // activation_post_film: condition_size=1, input_dim=bottleneck=1 -> (2*1)*1 + (2*1) = 2 + 2 = 4 weights + // gating_activation_post_film: condition_size=1, input_dim=bottleneck=1 -> (2*1)*1 + (2*1) = 2 + 2 = 4 weights + // _1x1_post_film: condition_size=1, input_dim=channels=1 -> (2*1)*1 + (2*1) = 2 + 2 = 4 weights + // head1x1_post_film: not active (head1x1 is false) + // Total FiLM: 8 * 4 = 32 weights + + std::vector weights; + // Base layer weights + weights.insert(weights.end(), {1.0f, 0.0f}); // Conv (weight, bias) + weights.push_back(1.0f); // Input mixin + weights.insert(weights.end(), {1.0f, 0.0f}); // 1x1 (weight, bias) + + // FiLM weights (identity-like: scale=1, shift=0) + // For each FiLM: weights are row-major (out_channels x in_channels), then biases + // With shift=true: out_channels=2 (scale, shift), in_channels=condition_size=1 + for (int i = 0; i < 8; i++) + { + weights.push_back(1.0f); // scale weight (out_channel 0, in_channel 0) + weights.push_back(0.0f); // shift weight (out_channel 1, in_channel 0) + weights.push_back(0.0f); // scale bias + weights.push_back(0.0f); // shift bias + } + + auto it = weights.begin(); + layer.set_weights_(it); + assert(it == weights.end()); + + const int maxBufferSize = 256; + layer.SetMaxBufferSize(maxBufferSize); + + // Test with several different buffer sizes + std::vector buffer_sizes{1, 8, 16, 32, 64, 128, 256}; + + for (int buffer_size : buffer_sizes) + { + // Prepare input/condition matrices (allocate before tracking) + Eigen::MatrixXf input(channels, buffer_size); + Eigen::MatrixXf condition(condition_size, buffer_size); + input.setConstant(0.5f); + condition.setConstant(0.5f); + + std::string test_name = "Layer Process (all FiLMs active, shift=true) - Buffer size " + std::to_string(buffer_size); + run_allocation_test_no_allocations( + nullptr, // No setup needed + [&]() { + // Call Process() - this should not allocate or free + layer.Process(input, condition, buffer_size); + }, + nullptr, // No teardown needed + test_name.c_str()); + + // Verify output is valid + auto output = layer.GetOutputNextLayer().leftCols(buffer_size); + assert(output.rows() == channels && output.cols() == buffer_size); + assert(std::isfinite(output(0, 0))); + } +} + +// Test that Layer::Process() method with all FiLMs active (without shift) does not allocate or free memory +void test_layer_all_films_without_shift_realtime_safe() +{ + // Setup: Create a Layer with all FiLMs active and shift=false + const int condition_size = 1; + const int channels = 1; + const int bottleneck = channels; + const int kernel_size = 1; + const int dilation = 1; + const std::string activation = "ReLU"; + const nam::wavenet::GatingMode gating_mode = nam::wavenet::GatingMode::NONE; + const int groups_input = 1; + const int groups_1x1 = 1; + + nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); + auto layer = make_layer_all_films(condition_size, channels, bottleneck, kernel_size, dilation, activation, + gating_mode, groups_input, groups_1x1, head1x1_params, "", false); + + // Set weights + // Base layer weights: + // Conv: (channels, bottleneck, kernel_size) + bias = (1, 1, 1) + 1 = 2 weights + // Input mixin: (condition_size, bottleneck) = (1, 1) = 1 weight + // 1x1: (bottleneck, channels) + bias = (1, 1) + 1 = 2 weights + // Total base: 5 weights + + // FiLM weights (each FiLM uses Conv1x1: condition_size -> input_dim with bias) + // With shift=false, each FiLM needs: input_dim * condition_size weights + input_dim biases + // conv_pre_film: condition_size=1, input_dim=channels=1 -> 1*1 + 1 = 2 weights + // conv_post_film: condition_size=1, input_dim=bottleneck=1 -> 1*1 + 1 = 2 weights + // input_mixin_pre_film: condition_size=1, input_dim=condition_size=1 -> 1*1 + 1 = 2 weights + // input_mixin_post_film: condition_size=1, input_dim=bottleneck=1 -> 1*1 + 1 = 2 weights + // activation_pre_film: condition_size=1, input_dim=bottleneck=1 -> 1*1 + 1 = 2 weights + // activation_post_film: condition_size=1, input_dim=bottleneck=1 -> 1*1 + 1 = 2 weights + // gating_activation_post_film: condition_size=1, input_dim=bottleneck=1 -> 1*1 + 1 = 2 weights + // _1x1_post_film: condition_size=1, input_dim=channels=1 -> 1*1 + 1 = 2 weights + // head1x1_post_film: not active (head1x1 is false) + // Total FiLM: 8 * 2 = 16 weights + + std::vector weights; + // Base layer weights + weights.insert(weights.end(), {1.0f, 0.0f}); // Conv (weight, bias) + weights.push_back(1.0f); // Input mixin + weights.insert(weights.end(), {1.0f, 0.0f}); // 1x1 (weight, bias) + + // FiLM weights (identity-like: scale=1) + // For each FiLM: weights are row-major (out_channels x in_channels), then biases + // With shift=false: out_channels=input_dim=1, in_channels=condition_size=1 + for (int i = 0; i < 8; i++) + { + weights.push_back(1.0f); // scale weight (out_channel 0, in_channel 0) + weights.push_back(0.0f); // scale bias + } + + auto it = weights.begin(); + layer.set_weights_(it); + assert(it == weights.end()); + + const int maxBufferSize = 256; + layer.SetMaxBufferSize(maxBufferSize); + + // Test with several different buffer sizes + std::vector buffer_sizes{1, 8, 16, 32, 64, 128, 256}; + + for (int buffer_size : buffer_sizes) + { + // Prepare input/condition matrices (allocate before tracking) + Eigen::MatrixXf input(channels, buffer_size); + Eigen::MatrixXf condition(condition_size, buffer_size); + input.setConstant(0.5f); + condition.setConstant(0.5f); + + std::string test_name = + "Layer Process (all FiLMs active, shift=false) - Buffer size " + std::to_string(buffer_size); + run_allocation_test_no_allocations( + nullptr, // No setup needed + [&]() { + // Call Process() - this should not allocate or free + layer.Process(input, condition, buffer_size); + }, + nullptr, // No teardown needed + test_name.c_str()); + + // Verify output is valid + auto output = layer.GetOutputNextLayer().leftCols(buffer_size); + assert(output.rows() == channels && output.cols() == buffer_size); + assert(std::isfinite(output(0, 0))); + } +} + // Test that LayerArray::Process() method does not allocate or free memory void test_layer_array_process_realtime_safe() { From 0b6eb593044bf88d75488e0e86110239e52e64de Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Tue, 20 Jan 2026 17:33:39 -0800 Subject: [PATCH 11/11] Refactor FiLM real-time safety tests to reduce duplication - Created shared helper function test_layer_all_films_realtime_safe_impl() that takes shift parameter - Both test functions now call the helper with their respective shift values - Consolidated weight calculation logic into single conditional block - Dynamic test name generation based on shift parameter - Reduced code duplication significantly while maintaining same functionality --- .../test/test_wavenet/test_real_time_safe.cpp | 141 ++++-------------- 1 file changed, 32 insertions(+), 109 deletions(-) diff --git a/tools/test/test_wavenet/test_real_time_safe.cpp b/tools/test/test_wavenet/test_real_time_safe.cpp index 64470d9..db47b67 100644 --- a/tools/test/test_wavenet/test_real_time_safe.cpp +++ b/tools/test/test_wavenet/test_real_time_safe.cpp @@ -739,10 +739,10 @@ void test_layer_grouped_process_realtime_safe() } } -// Test that Layer::Process() method with all FiLMs active (with shift) does not allocate or free memory -void test_layer_all_films_with_shift_realtime_safe() +// Helper function to test Layer::Process() with all FiLMs active +static void test_layer_all_films_realtime_safe_impl(const bool shift) { - // Setup: Create a Layer with all FiLMs active and shift=true + // Setup: Create a Layer with all FiLMs active const int condition_size = 1; const int channels = 1; const int bottleneck = channels; @@ -755,7 +755,7 @@ void test_layer_all_films_with_shift_realtime_safe() nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); auto layer = make_layer_all_films(condition_size, channels, bottleneck, kernel_size, dilation, activation, - gating_mode, groups_input, groups_1x1, head1x1_params, "", true); + gating_mode, groups_input, groups_1x1, head1x1_params, "", shift); // Set weights // Base layer weights: @@ -764,34 +764,32 @@ void test_layer_all_films_with_shift_realtime_safe() // 1x1: (bottleneck, channels) + bias = (1, 1) + 1 = 2 weights // Total base: 5 weights - // FiLM weights (each FiLM uses Conv1x1: condition_size -> (shift ? 2 : 1) * input_dim with bias) - // With shift=true, each FiLM needs: (2 * input_dim) * condition_size weights + (2 * input_dim) biases - // conv_pre_film: condition_size=1, input_dim=channels=1 -> (2*1)*1 + (2*1) = 2 + 2 = 4 weights - // conv_post_film: condition_size=1, input_dim=bottleneck=1 -> (2*1)*1 + (2*1) = 2 + 2 = 4 weights - // input_mixin_pre_film: condition_size=1, input_dim=condition_size=1 -> (2*1)*1 + (2*1) = 2 + 2 = 4 weights - // input_mixin_post_film: condition_size=1, input_dim=bottleneck=1 -> (2*1)*1 + (2*1) = 2 + 2 = 4 weights - // activation_pre_film: condition_size=1, input_dim=bottleneck=1 -> (2*1)*1 + (2*1) = 2 + 2 = 4 weights - // activation_post_film: condition_size=1, input_dim=bottleneck=1 -> (2*1)*1 + (2*1) = 2 + 2 = 4 weights - // gating_activation_post_film: condition_size=1, input_dim=bottleneck=1 -> (2*1)*1 + (2*1) = 2 + 2 = 4 weights - // _1x1_post_film: condition_size=1, input_dim=channels=1 -> (2*1)*1 + (2*1) = 2 + 2 = 4 weights - // head1x1_post_film: not active (head1x1 is false) - // Total FiLM: 8 * 4 = 32 weights - std::vector weights; // Base layer weights weights.insert(weights.end(), {1.0f, 0.0f}); // Conv (weight, bias) weights.push_back(1.0f); // Input mixin weights.insert(weights.end(), {1.0f, 0.0f}); // 1x1 (weight, bias) - // FiLM weights (identity-like: scale=1, shift=0) - // For each FiLM: weights are row-major (out_channels x in_channels), then biases - // With shift=true: out_channels=2 (scale, shift), in_channels=condition_size=1 + // FiLM weights (each FiLM uses Conv1x1: condition_size -> (shift ? 2 : 1) * input_dim with bias) + // With shift=true: each FiLM needs (2 * input_dim) * condition_size weights + (2 * input_dim) biases = 4 weights + // With shift=false: each FiLM needs input_dim * condition_size weights + input_dim biases = 2 weights + // All 8 FiLMs are active (excluding head1x1_post_film since head1x1 is false) for (int i = 0; i < 8; i++) { - weights.push_back(1.0f); // scale weight (out_channel 0, in_channel 0) - weights.push_back(0.0f); // shift weight (out_channel 1, in_channel 0) - weights.push_back(0.0f); // scale bias - weights.push_back(0.0f); // shift bias + if (shift) + { + // With shift: weights are row-major (out_channels=2 x in_channels=1), then biases + weights.push_back(1.0f); // scale weight (out_channel 0, in_channel 0) + weights.push_back(0.0f); // shift weight (out_channel 1, in_channel 0) + weights.push_back(0.0f); // scale bias + weights.push_back(0.0f); // shift bias + } + else + { + // Without shift: weights are row-major (out_channels=1 x in_channels=1), then bias + weights.push_back(1.0f); // scale weight (out_channel 0, in_channel 0) + weights.push_back(0.0f); // scale bias + } } auto it = weights.begin(); @@ -812,7 +810,9 @@ void test_layer_all_films_with_shift_realtime_safe() input.setConstant(0.5f); condition.setConstant(0.5f); - std::string test_name = "Layer Process (all FiLMs active, shift=true) - Buffer size " + std::to_string(buffer_size); + std::string shift_str = shift ? "true" : "false"; + std::string test_name = + "Layer Process (all FiLMs active, shift=" + shift_str + ") - Buffer size " + std::to_string(buffer_size); run_allocation_test_no_allocations( nullptr, // No setup needed [&]() { @@ -829,93 +829,16 @@ void test_layer_all_films_with_shift_realtime_safe() } } +// Test that Layer::Process() method with all FiLMs active (with shift) does not allocate or free memory +void test_layer_all_films_with_shift_realtime_safe() +{ + test_layer_all_films_realtime_safe_impl(true); +} + // Test that Layer::Process() method with all FiLMs active (without shift) does not allocate or free memory void test_layer_all_films_without_shift_realtime_safe() { - // Setup: Create a Layer with all FiLMs active and shift=false - const int condition_size = 1; - const int channels = 1; - const int bottleneck = channels; - const int kernel_size = 1; - const int dilation = 1; - const std::string activation = "ReLU"; - const nam::wavenet::GatingMode gating_mode = nam::wavenet::GatingMode::NONE; - const int groups_input = 1; - const int groups_1x1 = 1; - - nam::wavenet::Head1x1Params head1x1_params(false, channels, 1); - auto layer = make_layer_all_films(condition_size, channels, bottleneck, kernel_size, dilation, activation, - gating_mode, groups_input, groups_1x1, head1x1_params, "", false); - - // Set weights - // Base layer weights: - // Conv: (channels, bottleneck, kernel_size) + bias = (1, 1, 1) + 1 = 2 weights - // Input mixin: (condition_size, bottleneck) = (1, 1) = 1 weight - // 1x1: (bottleneck, channels) + bias = (1, 1) + 1 = 2 weights - // Total base: 5 weights - - // FiLM weights (each FiLM uses Conv1x1: condition_size -> input_dim with bias) - // With shift=false, each FiLM needs: input_dim * condition_size weights + input_dim biases - // conv_pre_film: condition_size=1, input_dim=channels=1 -> 1*1 + 1 = 2 weights - // conv_post_film: condition_size=1, input_dim=bottleneck=1 -> 1*1 + 1 = 2 weights - // input_mixin_pre_film: condition_size=1, input_dim=condition_size=1 -> 1*1 + 1 = 2 weights - // input_mixin_post_film: condition_size=1, input_dim=bottleneck=1 -> 1*1 + 1 = 2 weights - // activation_pre_film: condition_size=1, input_dim=bottleneck=1 -> 1*1 + 1 = 2 weights - // activation_post_film: condition_size=1, input_dim=bottleneck=1 -> 1*1 + 1 = 2 weights - // gating_activation_post_film: condition_size=1, input_dim=bottleneck=1 -> 1*1 + 1 = 2 weights - // _1x1_post_film: condition_size=1, input_dim=channels=1 -> 1*1 + 1 = 2 weights - // head1x1_post_film: not active (head1x1 is false) - // Total FiLM: 8 * 2 = 16 weights - - std::vector weights; - // Base layer weights - weights.insert(weights.end(), {1.0f, 0.0f}); // Conv (weight, bias) - weights.push_back(1.0f); // Input mixin - weights.insert(weights.end(), {1.0f, 0.0f}); // 1x1 (weight, bias) - - // FiLM weights (identity-like: scale=1) - // For each FiLM: weights are row-major (out_channels x in_channels), then biases - // With shift=false: out_channels=input_dim=1, in_channels=condition_size=1 - for (int i = 0; i < 8; i++) - { - weights.push_back(1.0f); // scale weight (out_channel 0, in_channel 0) - weights.push_back(0.0f); // scale bias - } - - auto it = weights.begin(); - layer.set_weights_(it); - assert(it == weights.end()); - - const int maxBufferSize = 256; - layer.SetMaxBufferSize(maxBufferSize); - - // Test with several different buffer sizes - std::vector buffer_sizes{1, 8, 16, 32, 64, 128, 256}; - - for (int buffer_size : buffer_sizes) - { - // Prepare input/condition matrices (allocate before tracking) - Eigen::MatrixXf input(channels, buffer_size); - Eigen::MatrixXf condition(condition_size, buffer_size); - input.setConstant(0.5f); - condition.setConstant(0.5f); - - std::string test_name = - "Layer Process (all FiLMs active, shift=false) - Buffer size " + std::to_string(buffer_size); - run_allocation_test_no_allocations( - nullptr, // No setup needed - [&]() { - // Call Process() - this should not allocate or free - layer.Process(input, condition, buffer_size); - }, - nullptr, // No teardown needed - test_name.c_str()); - - // Verify output is valid - auto output = layer.GetOutputNextLayer().leftCols(buffer_size); - assert(output.rows() == channels && output.cols() == buffer_size); - assert(std::isfinite(output(0, 0))); - } + test_layer_all_films_realtime_safe_impl(false); } // Test that LayerArray::Process() method does not allocate or free memory