diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 0000000..77eeb97
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,28 @@
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+# Set the OS, Python version, and other tools you might need
+build:
+  os: ubuntu-24.04
+  tools:
+    python: "3.13"
+  apt_packages:
+    - doxygen
+  jobs:
+    pre_build:
+      - doxygen Doxyfile
+
+# Build documentation in the "docs/" directory with Sphinx
+sphinx:
+   configuration: docs/conf.py
+
+# Optionally, but recommended,
+# declare the Python requirements required to build your documentation
+# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
+python:
+   install:
+   - requirements: docs/requirements.txt
+        
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f9e40f0..5381d99 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 3.10)
 
 # Make sure this matches ./NAM/version.h!
-project(NAM VERSION 0.3.0)
+project(NAM VERSION 0.4.0)
 
 set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
 
diff --git a/Doxyfile b/Doxyfile
new file mode 100644
index 0000000..14c0366
--- /dev/null
+++ b/Doxyfile
@@ -0,0 +1,245 @@
+# Doxyfile configuration for NeuralAmpModelerCore
+
+PROJECT_NAME           = "NeuralAmpModelerCore"
+PROJECT_NUMBER         = "0.4.0"
+PROJECT_BRIEF          = "Core C++ DSP library for NAM plugins"
+PROJECT_LOGO           =
+OUTPUT_DIRECTORY       = docs/doxygen
+CREATE_SUBDIRS         = NO
+ALLOW_UNICODE_NAMES    = NO
+OUTPUT_LANGUAGE        = English
+BRIEF_MEMBER_DESC      = YES
+REPEAT_BRIEF           = YES
+ABBREVIATE_BRIEF        = "The $name class" \
+                          "The $name widget" \
+                          "The $name file" \
+                          is \
+                          provides \
+                          specifies \
+                          contains \
+                          represents \
+                          a \
+                          an \
+                          the
+ALWAYS_DETAILED_SEC    = NO
+INLINE_INHERITED_MEMB  = NO
+FULL_PATH_NAMES        = NO
+STRIP_FROM_PATH        =
+STRIP_FROM_INC_PATH    =
+SHORT_NAMES            = NO
+JAVADOC_AUTOBRIEF      = YES
+QT_AUTOBRIEF           = NO
+MULTILINE_CPP_IS_BRIEF = NO
+INHERIT_DOCS           = YES
+SEPARATE_MEMBER_PAGES  = NO
+TAB_SIZE               = 4
+ALIASES                =
+OPTIMIZE_OUTPUT_FOR_C  = NO
+OPTIMIZE_OUTPUT_JAVA   = NO
+OPTIMIZE_FOR_FORTRAN   = NO
+OPTIMIZE_OUTPUT_VHDL   = NO
+EXTENSION_MAPPING      =
+MARKDOWN_SUPPORT       = YES
+TOC_INCLUDE_HEADINGS   = 0
+AUTOLINK_SUPPORT       = YES
+BUILTIN_STL_SUPPORT    = YES
+CPP_CLI_SUPPORT        = NO
+SIP_SUPPORT            = NO
+IDL_PROPERTY_SUPPORT   = YES
+DISTRIBUTE_GROUP_DOC   = NO
+GROUP_NESTED_COMPOUNDS = NO
+SUBGROUPING            = YES
+INLINE_GROUPED_CLASSES = NO
+INLINE_SIMPLE_STRUCTS  = NO
+TYPEDEF_HIDES_STRUCT   = NO
+LOOKUP_CACHE_SIZE      = 0
+
+# Build configuration
+EXTRACT_ALL            = YES
+EXTRACT_PRIVATE        = NO
+EXTRACT_PACKAGE        = NO
+EXTRACT_STATIC         = YES
+EXTRACT_LOCAL_CLASSES  = YES
+EXTRACT_LOCAL_METHODS  = NO
+EXTRACT_ANON_NSPACES   = NO
+RESOLVE_UNNAMED_PARAMS = YES
+HIDE_UNDOC_MEMBERS     = NO
+HIDE_UNDOC_CLASSES     = NO
+HIDE_FRIEND_COMPOUNDS  = NO
+HIDE_IN_BODY_DOCS      = NO
+INTERNAL_DOCS          = NO
+CASE_SENSE_NAMES       = YES
+HIDE_SCOPE_NAMES       = NO
+HIDE_COMPOUND_REFERENCE= NO
+
+# Input configuration
+INPUT                  = NAM/
+INPUT_ENCODING         = UTF-8
+FILE_PATTERNS          = *.h \
+                         *.hpp
+RECURSIVE              = NO
+EXCLUDE                =
+EXCLUDE_SYMLINKS       = NO
+EXCLUDE_PATTERNS       =
+EXCLUDE_SYMBOLS        =
+EXAMPLE_PATH           =
+EXAMPLE_PATTERNS       =
+EXAMPLE_RECURSIVE      = NO
+IMAGE_PATH             =
+INPUT_FILTER           =
+FILTER_PATTERNS        =
+FILTER_SOURCE_FILES    = NO
+FILTER_SOURCE_PATTERNS =
+USE_MDFILE_AS_MAINPAGE =
+
+# Source browsing
+SOURCE_BROWSER         = NO
+INLINE_SOURCES         = NO
+STRIP_CODE_COMMENTS    = YES
+REFERENCED_BY_RELATION = NO
+REFERENCES_RELATION    = NO
+REFERENCES_LINK_SOURCE = YES
+SOURCE_TOOLTIPS        = YES
+USE_HTAGS              = NO
+VERBATIM_HEADERS       = YES
+CLANG_ASSISTED_PARSING = NO
+CLANG_ADD_INC_PATHS    = YES
+CLANG_OPTIONS          =
+CLANG_DATABASE_PATH    =
+
+# Alphabetical index
+ALPHABETICAL_INDEX     = YES
+COLS_IN_ALPHA_INDEX    = 5
+IGNORE_PREFIX          =
+
+# HTML output
+GENERATE_HTML          = YES
+HTML_OUTPUT            = html
+HTML_FILE_EXTENSION    = .html
+HTML_HEADER            =
+HTML_FOOTER            =
+HTML_STYLESHEET        =
+HTML_EXTRA_STYLESHEET  =
+HTML_EXTRA_FILES       =
+HTML_COLORSTYLE_HUE    = 220
+HTML_COLORSTYLE_SAT    = 100
+HTML_COLORSTYLE_GAMMA  = 80
+HTML_TIMESTAMP         = NO
+HTML_DYNAMIC_MENUS     = YES
+HTML_DYNAMIC_SECTIONS  = NO
+HTML_INDEX_NUM_ENTRIES = 100
+GENERATE_DOCSET        = NO
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+DOCSET_FEEDURL         =
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+DOCSET_PUBLISHER_ID    = org.doxygen.Publisher
+DOCSET_PUBLISHER_NAME  = Publisher
+GENERATE_HTMLHELP      = NO
+CHM_FILE               =
+HHC_LOCATION           =
+GENERATE_CHI           = NO
+CHM_INDEX_ENCODING     =
+BINARY_TOC             = NO
+TOC_EXPAND             = NO
+GENERATE_QHP           = NO
+QCH_FILE               =
+QHP_NAMESPACE          = org.doxygen.Project
+QHP_VIRTUAL_FOLDER     = doc
+QHP_CUST_FILTER_NAME   =
+QHP_CUST_FIL           =
+QHP_SECT_FILTER_ATTRIBUTES=
+QHG_LOCATION           =
+GENERATE_ECLIPSEHELP    = NO
+ECLIPSE_DOC_ID         = org.doxygen.Project
+DISABLE_INDEX           = NO
+GENERATE_TREEVIEW       = NO
+ENUM_VALUES_PER_LINE   = 4
+TREEVIEW_WIDTH          = 250
+EXT_LINKS_IN_WINDOW     = NO
+FORMULA_FONTSIZE        = 10
+FORMULA_TRANSPARENT     = YES
+USE_MATHJAX             = NO
+MATHJAX_FORMAT          = HTML-CSS
+MATHJAX_RELPATH         =
+MATHJAX_EXTENSIONS      =
+MATHJAX_CODEFILE        =
+SEARCHENGINE            = YES
+SERVER_BASED_SEARCH     = NO
+EXTERNAL_SEARCH         = NO
+SEARCHENGINE_URL        =
+SEARCHDATA_FILE         = searchdata.xml
+EXTERNAL_SEARCH_ID      =
+EXTRA_SEARCH_MAPPINGS   =
+
+# LaTeX output
+GENERATE_LATEX          = NO
+
+# RTF output
+GENERATE_RTF            = NO
+
+# Man page output
+GENERATE_MAN            = NO
+
+# XML output (for Breathe)
+GENERATE_XML            = YES
+XML_OUTPUT              = xml
+XML_PROGRAMLISTING      = YES
+
+# DOCBOOK output
+GENERATE_DOCBOOK        = NO
+
+# Configuration options related to the preprocessor
+ENABLE_PREPROCESSING    = YES
+MACRO_EXPANSION         = NO
+EXPAND_ONLY_PREDEF      = NO
+SEARCH_INCLUDES         = YES
+INCLUDE_PATH            =
+INCLUDE_FILE_PATTERNS   =
+PREDEFINED             = NAM_SAMPLE=float
+EXPAND_AS_DEFINED       =
+SKIP_FUNCTION_MACROS    = YES
+
+# Configuration options related to external references
+TAGFILES               =
+GENERATE_TAGFILE        =
+ALLEXTERNALS           = NO
+EXTERNAL_GROUPS        = YES
+EXTERNAL_PAGES         = YES
+PERL_PATH              = /usr/bin/perl
+
+# Configuration options related to the dot tool
+CLASS_DIAGRAMS          = YES
+MSCGEN_PATH             =
+DIA_PATH                =
+HIDE_UNDOC_RELATIONS   = YES
+HAVE_DOT                = NO
+DOT_NUM_THREADS         = 0
+DOT_FONTNAME            = Helvetica
+DOT_FONTSIZE            = 10
+DOT_FONTPATH            =
+CLASS_GRAPH             = YES
+COLLABORATION_GRAPH     = YES
+GROUP_GRAPHS            = YES
+UML_LOOK                = NO
+UML_LIMIT_NUM_FIELDS    = 10
+TEMPLATE_RELATIONS      = NO
+INCLUDE_GRAPH           = YES
+INCLUDED_BY_GRAPH       = YES
+CALL_GRAPH              = NO
+CALLER_GRAPH            = NO
+GRAPHICAL_HIERARCHY     = YES
+DIRECTORY_GRAPH         = YES
+DOT_IMAGE_FORMAT        = png
+INTERACTIVE_SVG         = NO
+DOT_PATH                =
+DOTFILE_DIRS            =
+MSCFILE_DIRS            =
+DIAFILE_DIRS            =
+PLANTUML_JAR_PATH       =
+PLANTUML_CFG_FILE       =
+PLANTUML_INCLUDE_PATH   =
+DOT_GRAPH_MAX_NODES     = 50
+MAX_DOT_GRAPH_DEPTH     = 0
+DOT_MULTI_TARGETS       = NO
+GENERATE_LEGEND         = YES
+DOT_CLEANUP             = YES
diff --git a/NAM/conv1d.h b/NAM/conv1d.h
index 48967b6..8182966 100644
--- a/NAM/conv1d.h
+++ b/NAM/conv1d.h
@@ -6,48 +6,115 @@
 
 namespace nam
 {
+/// \brief 1D dilated convolution layer
+///
+/// Implements a 1D convolution with support for dilation and grouped convolution.
+/// Uses a ring buffer to maintain input history for efficient processing of
+/// sequential audio frames.
 class Conv1D
 {
 public:
+  /// \brief Default constructor
+  ///
+  /// Initializes with dilation=1 and groups=1. Use set_size_() to configure.
   Conv1D()
   {
     this->_dilation = 1;
     this->_num_groups = 1;
   };
+
+  /// \brief Constructor
+  /// \param in_channels Number of input channels
+  /// \param out_channels Number of output channels
+  /// \param kernel_size Size of the convolution kernel
+  /// \param bias Whether to use bias (1 for true, 0 for false)
+  /// \param dilation Dilation factor for the convolution
+  /// \param groups Number of groups for grouped convolution (default: 1)
   Conv1D(const int in_channels, const int out_channels, const int kernel_size, const int bias, const int dilation,
          const int groups = 1)
   {
     set_size_(in_channels, out_channels, kernel_size, bias, dilation, groups);
   };
+
+  /// \brief Set the parameters (weights) of this module
+  /// \param weights Iterator to the weights vector. Will be advanced as weights are consumed.
   void set_weights_(std::vector<float>::iterator& weights);
+
+  /// \brief Set the size parameters of the convolution
+  /// \param in_channels Number of input channels
+  /// \param out_channels Number of output channels
+  /// \param kernel_size Size of the convolution kernel
+  /// \param do_bias Whether to use bias
+  /// \param _dilation Dilation factor for the convolution
+  /// \param groups Number of groups for grouped convolution
   void set_size_(const int in_channels, const int out_channels, const int kernel_size, const bool do_bias,
                  const int _dilation, const int groups = 1);
+
+  /// \brief Set size and weights in one call
+  /// \param in_channels Number of input channels
+  /// \param out_channels Number of output channels
+  /// \param kernel_size Size of the convolution kernel
+  /// \param _dilation Dilation factor for the convolution
+  /// \param do_bias Whether to use bias
+  /// \param groups Number of groups for grouped convolution
+  /// \param weights Iterator to the weights vector. Will be advanced as weights are consumed.
   void set_size_and_weights_(const int in_channels, const int out_channels, const int kernel_size, const int _dilation,
                              const bool do_bias, const int groups, std::vector<float>::iterator& weights);
-  // Reset the ring buffer and pre-allocate output buffer
-  // :param sampleRate: Unused, for interface consistency
-  // :param maxBufferSize: Maximum buffer size for output buffer and to size ring buffer
+
+  /// \brief Reset the ring buffer and pre-allocate output buffer
+  /// \param maxBufferSize Maximum buffer size for output buffer and to size ring buffer
   void SetMaxBufferSize(const int maxBufferSize);
-  // Get the entire internal output buffer. This is intended for internal wiring
-  // between layers; callers should treat the buffer as pre-allocated storage
-  // and only consider the first `num_frames` columns valid for a given
-  // processing call. Slice with .leftCols(num_frames) as needed.
+
+  /// \brief Get the entire internal output buffer
+  ///
+  /// This is intended for internal wiring between layers; callers should treat
+  /// the buffer as pre-allocated storage and only consider the first num_frames columns
+  /// valid for a given processing call. Slice with .leftCols(num_frames) as needed.
+  /// \return Reference to the output buffer
   Eigen::MatrixXf& GetOutput() { return _output; }
+
+  /// \brief Get the entire internal output buffer (const version)
+  /// \return Const reference to the output buffer
   const Eigen::MatrixXf& GetOutput() const { return _output; }
-  // Process input and write to internal output buffer
-  // :param input: Input matrix (channels x num_frames)
-  // :param num_frames: Number of frames to process
+
+  /// \brief Process input and write to internal output buffer
+  /// \param input Input matrix (channels x num_frames)
+  /// \param num_frames Number of frames to process
   void Process(const Eigen::MatrixXf& input, const int num_frames);
-  // Process from input to output (legacy method, kept for compatibility)
-  //  Rightmost indices of input go from i_start for ncols,
-  //  Indices on output for from j_start (to j_start + ncols - i_start)
+
+  /// \brief Process from input to output (legacy method, kept for compatibility)
+  ///
+  /// Rightmost indices of input go from i_start for ncols,
+  /// Indices on output go from j_start (to j_start + ncols - i_start).
+  /// \param input Input matrix
+  /// \param output Output matrix
+  /// \param i_start Starting index in input
+  /// \param ncols Number of columns to process
+  /// \param j_start Starting index in output
   void process_(const Eigen::MatrixXf& input, Eigen::MatrixXf& output, const long i_start, const long ncols,
                 const long j_start) const;
+  /// \brief Get the number of input channels
+  /// \return Number of input channels
   long get_in_channels() const { return this->_weight.size() > 0 ? this->_weight[0].cols() : 0; };
+
+  /// \brief Get the kernel size
+  /// \return Kernel size
   long get_kernel_size() const { return this->_weight.size(); };
+
+  /// \brief Get the total number of weights
+  /// \return Total number of weight parameters
   long get_num_weights() const;
+
+  /// \brief Get the number of output channels
+  /// \return Number of output channels
   long get_out_channels() const { return this->_weight.size() > 0 ? this->_weight[0].rows() : 0; };
+
+  /// \brief Get the dilation factor
+  /// \return Dilation factor
   int get_dilation() const { return this->_dilation; };
+
+  /// \brief Check if bias is used
+  /// \return true if bias is present, false otherwise
   bool has_bias() const { return this->_bias.size() > 0; };
 
 protected:
diff --git a/NAM/convnet.h b/NAM/convnet.h
index 1765311..0d963df 100644
--- a/NAM/convnet.h
+++ b/NAM/convnet.h
@@ -18,17 +18,26 @@ namespace nam
 {
 namespace convnet
 {
-// Custom Conv that avoids re-computing on pieces of the input and trusts
-// that the corresponding outputs are where they need to be.
-// Beware: this is clever!
-
-// Batch normalization
-// In prod mode, so really just an elementwise affine layer.
+/// \brief Batch normalization layer
+///
+/// In production mode, so really just an elementwise affine layer.
+/// Applies: y = (x - mean) / sqrt(variance + eps) * weight + bias
+/// which simplifies to: y = scale * x + loc
 class BatchNorm
 {
 public:
+  /// \brief Default constructor
   BatchNorm() {};
+
+  /// \brief Constructor with weights
+  /// \param dim Dimension of the input
+  /// \param weights Iterator to the weights vector. Will be advanced as weights are consumed.
   BatchNorm(const int dim, std::vector<float>::iterator& weights);
+
+  /// \brief Process input in-place
+  /// \param input Input matrix to process
+  /// \param i_start Start index
+  /// \param i_end End index
   void process_(Eigen::MatrixXf& input, const long i_start, const long i_end) const;
 
 private:
@@ -41,22 +50,53 @@ class BatchNorm
   Eigen::VectorXf loc;
 };
 
+/// \brief A single block in a ConvNet
+///
+/// Consists of a dilated convolution, optional batch normalization, and activation.
 class ConvNetBlock
 {
 public:
+  /// \brief Default constructor
   ConvNetBlock() {};
+
+  /// \brief Set the parameters (weights) of this block
+  /// \param in_channels Number of input channels
+  /// \param out_channels Number of output channels
+  /// \param _dilation Dilation factor for the convolution
+  /// \param batchnorm Whether to use batch normalization
+  /// \param activation_config Activation function configuration
+  /// \param groups Number of groups for grouped convolution
+  /// \param weights Iterator to the weights vector. Will be advanced as weights are consumed.
   void set_weights_(const int in_channels, const int out_channels, const int _dilation, const bool batchnorm,
                     const activations::ActivationConfig& activation_config, const int groups,
                     std::vector<float>::iterator& weights);
+
+  /// \brief Resize buffers to handle maxBufferSize frames
+  /// \param maxBufferSize Maximum number of frames to process in a single call
   void SetMaxBufferSize(const int maxBufferSize);
-  // Process input matrix directly (new API, similar to WaveNet)
+
+  /// \brief Process input matrix directly (new API, similar to WaveNet)
+  /// \param input Input matrix (channels x num_frames)
+  /// \param num_frames Number of frames to process
   void Process(const Eigen::MatrixXf& input, const int num_frames);
-  // Legacy method for compatibility (uses indices)
+
+  /// \brief Process input (legacy method for compatibility, uses indices)
+  /// \param input Input matrix
+  /// \param output Output matrix
+  /// \param i_start Start index in input
+  /// \param i_end End index in input
   void process_(const Eigen::MatrixXf& input, Eigen::MatrixXf& output, const long i_start, const long i_end);
-  // Get output from last Process() call
+
+  /// \brief Get output from last Process() call
+  /// \param num_frames Number of frames to return
+  /// \return Block reference to the output
   Eigen::Block<Eigen::MatrixXf> GetOutput(const int num_frames);
+
+  /// \brief Get the number of output channels
+  /// \return Number of output channels
   long get_out_channels() const;
-  Conv1D conv;
+
+  Conv1D conv; ///< The dilated convolution layer
 
 private:
   BatchNorm batchnorm;
@@ -77,15 +117,38 @@ class _Head
   Eigen::VectorXf _bias; // (out_channels,)
 };
 
+/// \brief Convolutional neural network model
+///
+/// A ConvNet consists of multiple ConvNetBlocks with increasing dilation factors,
+/// followed by a head layer that produces the final output.
 class ConvNet : public Buffer
 {
 public:
+  /// \brief Constructor
+  /// \param in_channels Number of input channels
+  /// \param out_channels Number of output channels
+  /// \param channels Number of channels in the hidden layers
+  /// \param dilations Vector of dilation factors, one per block
+  /// \param batchnorm Whether to use batch normalization
+  /// \param activation_config Activation function configuration
+  /// \param weights Model weights vector
+  /// \param expected_sample_rate Expected sample rate in Hz (-1.0 if unknown)
+  /// \param groups Number of groups for grouped convolution
   ConvNet(const int in_channels, const int out_channels, const int channels, const std::vector<int>& dilations,
           const bool batchnorm, const activations::ActivationConfig& activation_config, std::vector<float>& weights,
           const double expected_sample_rate = -1.0, const int groups = 1);
+
+  /// \brief Destructor
   ~ConvNet() = default;
 
+  /// \brief Process audio frames
+  /// \param input Input audio buffers
+  /// \param output Output audio buffers
+  /// \param num_frames Number of frames to process
   void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override;
+
+  /// \brief Resize all buffers to handle maxBufferSize frames
+  /// \param maxBufferSize Maximum number of frames to process in a single call
   void SetMaxBufferSize(const int maxBufferSize) override;
 
 protected:
@@ -102,7 +165,11 @@ class ConvNet : public Buffer
   int PrewarmSamples() override { return mPrewarmSamples; };
 };
 
-// Factory
+/// \brief Factory function to instantiate ConvNet from JSON
+/// \param config JSON configuration object
+/// \param weights Model weights vector
+/// \param expectedSampleRate Expected sample rate in Hz (-1.0 if unknown)
+/// \return Unique pointer to a DSP object (ConvNet instance)
 std::unique_ptr<DSP> Factory(const nlohmann::json& config, std::vector<float>& weights,
                              const double expectedSampleRate);
 
diff --git a/NAM/dsp.h b/NAM/dsp.h
index 73319a2..8b984d2 100644
--- a/NAM/dsp.h
+++ b/NAM/dsp.h
@@ -17,96 +17,141 @@
 #else
   #define NAM_SAMPLE double
 #endif
-// Use a sample rate of -1 if we don't know what the model expects to be run at.
-// TODO clean this up and track a bool for whether it knows.
+/// \brief Use a sample rate of -1 if we don't know what the model expects to be run at
 #define NAM_UNKNOWN_EXPECTED_SAMPLE_RATE -1.0
 
 namespace nam
 {
 namespace wavenet
 {
-// Forward declaration to allow WaveNet to access protected members of DSP
-// Not sure I like this.
+/// Forward declaration to allow WaveNet to access protected members of DSP
 class WaveNet;
 } // namespace wavenet
 
-enum EArchitectures
-{
-  kLinear = 0,
-  kConvNet,
-  kLSTM,
-  kCatLSTM,
-  kWaveNet,
-  kCatWaveNet,
-  kNumModels
-};
 
+/// \brief Base class for all DSP models
+///
+/// DSP provides the common interface for all neural network-based audio processing models.
+/// It handles:
+/// - Input/output channel management
+/// - Sample rate tracking
+/// - Level management (input/output levels and loudness)
+/// - Prewarm functionality for settling initial conditions
+/// - Buffer size management
+///
+/// Subclasses should override process() to implement the actual processing algorithm.
 class DSP
 {
 public:
-  // Older models won't know, but newer ones will come with a loudness from the training based on their response to a
-  // standardized input.
-  // We may choose to have the models figure out for themselves how loud they are in here in the future.
+  /// \brief Constructor
+  ///
+  /// \param in_channels Number of input channels
+  /// \param out_channels Number of output channels
+  /// \param expected_sample_rate Expected sample rate in Hz (-1.0 if unknown)
   DSP(const int in_channels, const int out_channels, const double expected_sample_rate);
+
+  /// \brief Virtual destructor
   virtual ~DSP() = default;
-  // prewarm() does any required intial work required to "settle" model initial conditions
-  // it can be somewhat expensive, so should not be called during realtime audio processing
-  // Important: don't expect the model to be outputting zeroes after this. Neural networks
-  // Don't know that there's anything special about "zero", and forcing this gets rid of
-  // some possibilities that I dont' want to rule out (e.g. models that "are noisy").
+
+  /// \brief Prewarm the model to settle initial conditions
+  ///
+  /// This can be somewhat expensive, so should not be called during real-time audio processing.
+  /// Important: don't expect the model to be outputting zeroes after this. Neural networks
+  /// don't know that there's anything special about "zero", and forcing this gets rid of
+  /// some possibilities (e.g. models that "are noisy").
   virtual void prewarm();
-  // process() does all of the processing requried to take `input` array and
-  // fill in the required values on `output`.
-  // To do this:
-  // 1. The core DSP algorithm is run (This is what should probably be
-  //    overridden in subclasses).
-  // 2. The output level is applied and the result stored to `output`.
-  // `input` and `output` are double pointers where the first pointer indexes channels
-  // and the second indexes frames: input[channel][frame]
+
+  /// \brief Process audio frames
+  ///
+  /// \param input Input audio buffers. Double pointer where the first pointer indexes channels
+  ///              and the second indexes frames: input[channel][frame]
+  /// \param output Output audio buffers. Same structure as input.
+  /// \param num_frames Number of frames to process
   virtual void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames);
-  // Expected sample rate, in Hz.
-  // TODO throw if it doesn't know.
+  /// \brief Get the expected sample rate
+  /// \return Expected sample rate in Hz (-1.0 if unknown)
   double GetExpectedSampleRate() const { return mExpectedSampleRate; };
-  // Number of input channels
+
+  /// \brief Get the number of input channels
+  /// \return Number of input channels
   int NumInputChannels() const { return mInChannels; };
-  // Number of output channels
+
+  /// \brief Get the number of output channels
+  /// \return Number of output channels
   int NumOutputChannels() const { return mOutChannels; };
-  // Input Level, in dBu, corresponding to 0 dBFS for a sine wave
-  // You should call HasInputLevel() first to be safe.
-  // Note: input level is assumed global over all inputs.
+
+  /// \brief Get the input level
+  ///
+  /// Input level is in dBu RMS, corresponding to 0 dBFS peak for a 1 kHz sine wave.
+  /// You should call HasInputLevel() first to be safe.
+  /// Note: input level is assumed global over all inputs.
+  /// \return Input level in dBu
   double GetInputLevel();
-  // Get how loud this model is, in dB.
-  // Throws a std::runtime_error if the model doesn't know how loud it is.
-  // Note: loudness is assumed global over all outputs.
+
+  /// \brief Get how loud this model's output is, in dB, if a "typical" input is processed.
+  /// This can be used to normalize the output level of the object.
+  ///
+  /// Throws a std::runtime_error if the model doesn't know how loud it is.
+  /// Note: loudness is assumed global over all outputs.
+  /// \return Loudness in dB
+  /// \throws std::runtime_error If the model doesn't know its loudness
   double GetLoudness() const;
-  // Output Level, in dBu, corresponding to 0 dBFS for a sine wave
-  // You should call HasOutputLevel() first to be safe.
-  // Note: output level is assumed global over all outputs.
+
+  /// \brief Get the output level
+  ///
+  /// Output level is in dBu RMS, corresponding to 0 dBFS peak for a 1 kHz sine wave.
+  /// You should call HasOutputLevel() first to be safe.
+  /// Note: output level is assumed global over all outputs.
+  /// \return Output level in dBu
   double GetOutputLevel();
-  // Does this model know its input level?
-  // Note: input level is assumed global over all inputs.
+
+  /// \brief Check if this model knows its input level
+  ///
+  /// Note: input level is assumed global over all inputs.
+  /// \return true if input level is known, false otherwise
   bool HasInputLevel();
-  // Get whether the model knows how loud it is.
+
+  /// \brief Check if the model knows how loud it is
+  /// \return true if loudness is known, false otherwise
   bool HasLoudness() const { return mHasLoudness; };
-  // Does this model know its output level?
-  // Note: output level is assumed global over all outputs.
+
+  /// \brief Check if this model knows its output level
+  ///
+  /// Note: output level is assumed global over all outputs.
+  /// \return true if output level is known, false otherwise
   bool HasOutputLevel();
-  // General function for resetting the DSP unit.
-  // This doesn't call prewarm(). If you want to do that, then you might want to use ResetAndPrewarm().
-  // See https://github.com/sdatkinson/NeuralAmpModelerCore/issues/96 for the reasoning.
+
+  /// \brief General function for resetting the DSP unit
+  ///
+  /// This doesn't call prewarm(). If you want to do that, then you might want to use ResetAndPrewarm().
+  /// See https://github.com/sdatkinson/NeuralAmpModelerCore/issues/96 for the reasoning.
+  /// \param sampleRate Current sample rate
+  /// \param maxBufferSize Maximum buffer size to process
   virtual void Reset(const double sampleRate, const int maxBufferSize);
-  // Reset(), then prewarm()
+
+  /// \brief Reset the DSP unit, then prewarm
+  /// \param sampleRate Current sample rate
+  /// \param maxBufferSize Maximum buffer size to process
   void ResetAndPrewarm(const double sampleRate, const int maxBufferSize)
   {
     Reset(sampleRate, maxBufferSize);
     prewarm();
   }
+
+  /// \brief Set the input level
+  /// \param inputLevel Input level in dBu
   void SetInputLevel(const double inputLevel);
-  // Set the loudness, in dB.
-  // This is usually defined to be the loudness to a standardized input. The trainer has its own, but you can always
-  // use this to define it a different way if you like yours better.
-  // Note: loudness is assumed global over all outputs.
+
+  /// \brief Set the loudness
+  ///
+  /// This is usually defined to be the loudness to a standardized input. The trainer has its own,
+  /// but you can always use this to define it a different way if you like yours better.
+  /// Note: loudness is assumed global over all outputs.
+  /// \param loudness Loudness in dB
   void SetLoudness(const double loudness);
+
+  /// \brief Set the output level
+  /// \param outputLevel Output level in dBu
   void SetOutputLevel(const double outputLevel);
 
 protected:
@@ -123,10 +168,18 @@ class DSP
   // The largest buffer I expect to be told to process:
   int mMaxBufferSize = 0;
 
-  // How many samples should be processed for me to be considered "warmed up"?
+  /// \brief Get how many samples should be processed for the model to be considered "warmed up"
+  ///
+  /// Override this in subclasses to specify prewarm requirements.
+  /// \return Number of samples needed for prewarm
   virtual int PrewarmSamples() { return 0; };
 
+  /// \brief Set the maximum buffer size
+  /// \param maxBufferSize Maximum number of frames to process in a single call
   virtual void SetMaxBufferSize(const int maxBufferSize);
+
+  /// \brief Get the maximum buffer size
+  /// \return Maximum buffer size
   int GetMaxBufferSize() const { return mMaxBufferSize; };
 
 private:
@@ -142,12 +195,20 @@ class DSP
   Level mOutputLevel;
 };
 
-// Class where an input buffer is kept so that long-time effects can be
-// captured. (e.g. conv nets or impulse responses, where we need history that's
-// longer than the sample buffer that's coming in.)
+/// \brief Base class for DSP models that require input buffering
+/// This class is deprecated and will be removed in a future version.
+///
+/// Class where an input buffer is kept so that long-time effects can be captured.
+/// (e.g. conv nets or impulse responses, where we need history that's longer than
+/// the sample buffer that's coming in.)
 class Buffer : public DSP
 {
 public:
+  /// \brief Constructor
+  /// \param in_channels Number of input channels
+  /// \param out_channels Number of output channels
+  /// \param receptive_field Size of the receptive field (buffer size needed)
+  /// \param expected_sample_rate Expected sample rate in Hz (-1.0 if unknown)
   Buffer(const int in_channels, const int out_channels, const int receptive_field,
          const double expected_sample_rate = -1.0);
 
@@ -168,12 +229,26 @@ class Buffer : public DSP
   virtual void _rewind_buffers_();
 };
 
-// Basic linear model (an IR!)
+/// \brief Basic linear model
+///
+/// Implements a simple linear convolution, (i.e. an impulse response).
 class Linear : public Buffer
 {
 public:
+  /// \brief Constructor
+  /// \param in_channels Number of input channels
+  /// \param out_channels Number of output channels
+  /// \param receptive_field Size of the impulse response
+  /// \param _bias Whether to use bias
+  /// \param weights Model weights (impulse response coefficients)
+  /// \param expected_sample_rate Expected sample rate in Hz (-1.0 if unknown)
   Linear(const int in_channels, const int out_channels, const int receptive_field, const bool _bias,
          const std::vector<float>& weights, const double expected_sample_rate = -1.0);
+
+  /// \brief Process audio frames
+  /// \param input Input audio buffers
+  /// \param output Output audio buffers
+  /// \param num_frames Number of frames to process
   void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override;
 
 protected:
@@ -183,31 +258,69 @@ class Linear : public Buffer
 
 namespace linear
 {
+/// \brief Factory function to instantiate Linear model from JSON
+/// \param config JSON configuration object
+/// \param weights Model weights vector
+/// \param expectedSampleRate Expected sample rate in Hz (-1.0 if unknown)
+/// \return Unique pointer to a DSP object (Linear instance)
 std::unique_ptr<DSP> Factory(const nlohmann::json& config, std::vector<float>& weights,
                              const double expectedSampleRate);
 } // namespace linear
 
 // NN modules =================================================================
 
-// Really just a linear layer
+/// \brief 1x1 convolution (really just a fully-connected linear layer operating per-sample)
+///
+/// Performs a pointwise convolution, which is equivalent to a fully connected layer
+/// applied independently to each time step. Supports grouped convolution for efficiency.
 class Conv1x1
 {
 public:
+  /// \brief Constructor
+  /// \param in_channels Number of input channels
+  /// \param out_channels Number of output channels
+  /// \param _bias Whether to use bias
+  /// \param groups Number of groups for grouped convolution (default: 1)
   Conv1x1(const int in_channels, const int out_channels, const bool _bias, const int groups = 1);
-  // Get the entire internal output buffer. This is intended for internal wiring
-  // between layers/arrays; callers should treat the buffer as pre-allocated
-  // storage and only consider the first `num_frames` columns valid for a given
-  // processing call. Slice with .leftCols(num_frames) as needed.
+
+  /// \brief Get the entire internal output buffer
+  ///
+  /// This is intended for internal wiring between layers/arrays; callers should treat
+  /// the buffer as pre-allocated storage and only consider the first num_frames columns
+  /// valid for a given processing call. Slice with .leftCols(num_frames) as needed.
+  /// \return Reference to the output buffer
   Eigen::MatrixXf& GetOutput() { return _output; }
+
+  /// \brief Get the entire internal output buffer (const version)
+  /// \return Const reference to the output buffer
   const Eigen::MatrixXf& GetOutput() const { return _output; }
+
+  /// \brief Resize the output buffer to handle maxBufferSize frames
+  /// \param maxBufferSize Maximum number of frames to process in a single call
   void SetMaxBufferSize(const int maxBufferSize);
+
+  /// \brief Set the parameters (weights) of this module
+  /// \param weights Iterator to the weights vector. Will be advanced as weights are consumed.
   void set_weights_(std::vector<float>::iterator& weights);
-  // :param input: (N,Cin) or (Cin,)
-  // :return: (N,Cout) or (Cout,), respectively
+
+  /// \brief Process input and return output matrix
+  ///
+  /// \param input Input matrix (channels x num_frames) or (channels,)
+  /// \return Output matrix (channels x num_frames) or (channels,), respectively
   Eigen::MatrixXf process(const Eigen::MatrixXf& input) const { return process(input, (int)input.cols()); };
+
+  /// \brief Process input and return output matrix
+  /// \param input Input matrix (channels x num_frames)
+  /// \param num_frames Number of frames to process
+  /// \return Output matrix (channels x num_frames)
   Eigen::MatrixXf process(const Eigen::MatrixXf& input, const int num_frames) const;
-  // Store output to pre-allocated _output; access with GetOutput()
-  // Uses Eigen::Ref to accept matrices and block expressions without creating temporaries (real-time safe)
+
+  /// \brief Process input and store output to pre-allocated buffer
+  ///
+  /// Uses Eigen::Ref to accept matrices and block expressions without creating
+  /// temporaries (real-time safe). Access output via GetOutput().
+  /// \param input Input matrix (channels x num_frames)
+  /// \param num_frames Number of frames to process
   void process_(const Eigen::Ref<const Eigen::MatrixXf>& input, const int num_frames);
 
   long get_out_channels() const { return this->_weight.rows(); };
@@ -226,41 +339,28 @@ class Conv1x1
 // Utilities ==================================================================
 // Implemented in get_dsp.cpp
 
-// Data for a DSP object
-// :param version: Data version. Follows the conventions established in the trainer code.
-// :param architecture: Defines the high-level architecture. Supported are (as per `get-dsp()` in get_dsp.cpp):
-//     * "CatLSTM"
-//     * "CatWaveNet"
-//     * "ConvNet"
-//     * "LSTM"
-//     * "Linear"
-//     * "WaveNet"
-// :param config:
-// :param metadata:
-// :param weights: The model weights
-// :param expected_sample_rate: Most NAM models implicitly assume that data will be provided to them at some sample
-//     rate. This captures it for other components interfacing with the model to understand its needs. Use -1.0 for "I
-//     don't know".
+/// \brief Data structure for a DSP object
+///
+/// Contains all information needed to instantiate and configure a DSP model.
 struct dspData
 {
-  std::string version;
-  std::string architecture;
-  nlohmann::json config;
-  nlohmann::json metadata;
-  std::vector<float> weights;
-  double expected_sample_rate;
+  std::string version; ///< Data version. Follows conventions established in trainer code.
+  std::string architecture; ///< High-level architecture. Supported: "ConvNet", "LSTM", "Linear", "WaveNet"
+  nlohmann::json config; ///< Model configuration JSON
+  nlohmann::json metadata; ///< Model metadata JSON
+  std::vector<float> weights; ///< Model weights
+  double expected_sample_rate; ///< Expected sample rate in Hz. Most NAM models implicitly assume data at some sample
+                               ///< rate. Use -1.0 for "I don't know".
 };
 
-// Verify that the config that we are building our model from is supported by
-// this plugin version.
+/// \brief Verify that the config version is supported by this plugin version
+/// \param version Config version string to verify
 void verify_config_version(const std::string version);
 
-// Takes the model file and uses it to instantiate an instance of DSP.
-std::unique_ptr<DSP> get_dsp(const std::filesystem::path model_file);
-// Creates an instance of DSP. Also returns a dspData struct that holds the data of the model.
-std::unique_ptr<DSP> get_dsp(const std::filesystem::path model_file, dspData& returnedConfig);
-// Instantiates a DSP object from dsp_config struct.
-std::unique_ptr<DSP> get_dsp(dspData& conf);
-// Legacy loader for directory-type DSPs
+/// \brief Legacy loader for directory-style DSPs
+///
+/// Loads models from a directory structure (older format).
+/// \param dirname Path to the directory containing the model
+/// \return Unique pointer to a DSP object
 std::unique_ptr<DSP> get_dsp_legacy(const std::filesystem::path dirname);
 }; // namespace nam
diff --git a/NAM/film.h b/NAM/film.h
index b5376f0..9e1ec25 100644
--- a/NAM/film.h
+++ b/NAM/film.h
@@ -8,46 +8,69 @@
 
 namespace nam
 {
-// Feature-wise Linear Modulation (FiLM)
-//
-// Given an `input` (input_dim x num_frames) and a `condition`
-// (condition_dim x num_frames), compute:
-//   scale, shift = Conv1x1(condition) split across channels
-//   output = input * scale + shift  (elementwise)
+/// \brief Feature-wise Linear Modulation (FiLM)
+///
+/// Given an input (input_dim x num_frames) and a condition (condition_dim x num_frames), compute:
+///   scale, shift = Conv1x1(condition) split across channels (top/bottom half, respectively)
+///   output = input * scale + shift  (elementwise)
+///
+/// FiLM applies per-channel scaling and optional shifting based on conditioning input,
+/// allowing the model to adapt its behavior based on external signals.
 class FiLM
 {
 public:
+  /// \brief Constructor
+  /// \param condition_dim Size of the conditioning input
+  /// \param input_dim Size of the input to be modulated
+  /// \param shift Whether to apply both scale and shift (true) or only scale (false)
   FiLM(const int condition_dim, const int input_dim, const bool shift)
   : _cond_to_scale_shift(condition_dim, (shift ? 2 : 1) * input_dim, /*bias=*/true)
   , _do_shift(shift)
   {
   }
 
-  // Get the entire internal output buffer. This is intended for internal wiring
-  // between layers; callers should treat the buffer as pre-allocated storage
-  // and only consider the first `num_frames` columns valid for a given
-  // processing call. Slice with .leftCols(num_frames) as needed.
+  /// \brief Get the entire internal output buffer
+  ///
+  /// This is intended for internal wiring between layers; callers should treat
+  /// the buffer as pre-allocated storage and only consider the first num_frames columns
+  /// valid for a given processing call. Slice with .leftCols(num_frames) as needed.
+  /// \return Reference to the output buffer
   Eigen::MatrixXf& GetOutput() { return _output; }
+
+  /// \brief Get the entire internal output buffer (const version)
+  /// \return Const reference to the output buffer
   const Eigen::MatrixXf& GetOutput() const { return _output; }
 
+  /// \brief Resize buffers to handle maxBufferSize frames
+  /// \param maxBufferSize Maximum number of frames to process in a single call
   void SetMaxBufferSize(const int maxBufferSize)
   {
     _cond_to_scale_shift.SetMaxBufferSize(maxBufferSize);
     _output.resize(get_input_dim(), maxBufferSize);
   }
 
+  /// \brief Set the parameters (weights) of this module
+  /// \param weights Iterator to the weights vector. Will be advanced as weights are consumed.
   void set_weights_(std::vector<float>::iterator& weights) { _cond_to_scale_shift.set_weights_(weights); }
 
+  /// \brief Get the condition dimension
+  /// \return Size of the conditioning input
   long get_condition_dim() const { return _cond_to_scale_shift.get_in_channels(); }
+
+  /// \brief Get the input dimension
+  /// \return Size of the input to be modulated
   long get_input_dim() const
   {
     return _do_shift ? (_cond_to_scale_shift.get_out_channels() / 2) : _cond_to_scale_shift.get_out_channels();
   }
 
-  // :param input: (input_dim x num_frames)
-  // :param condition: (condition_dim x num_frames)
-  // Writes (input_dim x num_frames) into internal output buffer; access via GetOutput().
-  // Uses Eigen::Ref to accept matrices and block expressions without creating temporaries (real-time safe)
+  /// \brief Process input with conditioning
+  ///
+  /// Writes (input_dim x num_frames) into internal output buffer; access via GetOutput().
+  /// Uses Eigen::Ref to accept matrices and block expressions without creating temporaries (real-time safe).
+  /// \param input Input matrix (input_dim x num_frames)
+  /// \param condition Conditioning matrix (condition_dim x num_frames)
+  /// \param num_frames Number of frames to process
   void Process(const Eigen::Ref<const Eigen::MatrixXf>& input, const Eigen::Ref<const Eigen::MatrixXf>& condition,
                const int num_frames)
   {
@@ -73,8 +96,13 @@ class FiLM
     }
   }
 
-  // in-place
-  // Uses Eigen::Ref to accept matrices and block expressions without creating temporaries (real-time safe)
+  /// \brief Process input with conditioning (in-place)
+  ///
+  /// Uses Eigen::Ref to accept matrices and block expressions without creating temporaries (real-time safe).
+  /// Modifies the input matrix directly.
+  /// \param input Input matrix (input_dim x num_frames), will be modified in-place
+  /// \param condition Conditioning matrix (condition_dim x num_frames)
+  /// \param num_frames Number of frames to process
   void Process_(Eigen::Ref<Eigen::MatrixXf> input, const Eigen::Ref<const Eigen::MatrixXf>& condition,
                 const int num_frames)
   {
diff --git a/NAM/get_dsp.h b/NAM/get_dsp.h
index 3aef8d6..6353053 100644
--- a/NAM/get_dsp.h
+++ b/NAM/get_dsp.h
@@ -6,22 +6,37 @@
 
 namespace nam
 {
-// Get NAM from a .nam file at the provided location
+/// \brief Get NAM from a .nam file at the provided location
+/// \param config_filename Path to the .nam model file
+/// \return Unique pointer to a DSP object
 std::unique_ptr<DSP> get_dsp(const std::filesystem::path config_filename);
 
-// Get NAM from a provided configuration struct
+/// \brief Get NAM from a provided configuration struct
+/// \param conf DSP data structure containing model configuration and weights
+/// \return Unique pointer to a DSP object
 std::unique_ptr<DSP> get_dsp(dspData& conf);
 
-// Get NAM from a provided .nam file path and store its configuration in the provided conf
+/// \brief Get NAM from a .nam file and store its configuration
+///
+/// Creates an instance of DSP and also returns a dspData struct that holds the data of the model.
+/// \param config_filename Path to the .nam model file
+/// \param returnedConfig Output parameter that will be filled with the model data
+/// \return Unique pointer to a DSP object
 std::unique_ptr<DSP> get_dsp(const std::filesystem::path config_filename, dspData& returnedConfig);
 
-// Get NAM from a provided configuration JSON object
+/// \brief Get NAM from a provided configuration JSON object
+/// \param config JSON configuration object
+/// \param returnedConfig Output parameter that will be filled with the model data
+/// \return Unique pointer to a DSP object
 std::unique_ptr<DSP> get_dsp(const nlohmann::json& config, dspData& returnedConfig);
 
-// Get NAM from a provided configuration JSON object (convenience overload)
+/// \brief Get NAM from a provided configuration JSON object (convenience overload)
+/// \param config JSON configuration object
+/// \return Unique pointer to a DSP object
 std::unique_ptr<DSP> get_dsp(const nlohmann::json& config);
 
-// Get sample rate from a .nam file
-// Returns -1 if not known (Really old .nam files)
+/// \brief Get sample rate from a .nam file
+/// \param j JSON object from the .nam file
+/// \return Sample rate in Hz, or -1 if not known (really old .nam files)
 double get_sample_rate_from_nam_file(const nlohmann::json& j);
 }; // namespace nam
diff --git a/NAM/lstm.h b/NAM/lstm.h
index 5c03853..d97de20 100644
--- a/NAM/lstm.h
+++ b/NAM/lstm.h
@@ -13,18 +13,22 @@ namespace nam
 {
 namespace lstm
 {
-// A Single LSTM cell
-// i input
-// f forget
-// g cell
-// o output
-// c cell state
-// h hidden state
+/// \brief A single LSTM cell
 class LSTMCell
 {
 public:
+  /// \brief Constructor
+  /// \param input_size Size of the input vector
+  /// \param hidden_size Size of the hidden state
+  /// \param weights Iterator to the weights vector. Will be advanced as weights are consumed.
   LSTMCell(const int input_size, const int hidden_size, std::vector<float>::iterator& weights);
+
+  /// \brief Get the current hidden state
+  /// \return Hidden state vector
   Eigen::VectorXf get_hidden_state() const { return this->_xh(Eigen::placeholders::lastN(this->_get_hidden_size())); };
+
+  /// \brief Process a single input vector
+  /// \param x Input vector
   void process_(const Eigen::VectorXf& x);
 
 private:
@@ -47,13 +51,31 @@ class LSTMCell
   long _get_input_size() const { return this->_xh.size() - this->_get_hidden_size(); };
 };
 
-// The multi-layer LSTM model
+/// \brief A multi-layer LSTM model
+///
+/// A multi-layer LSTM processes audio frame-by-frame, maintaining hidden states
+/// across layers. Each layer processes the hidden state from the previous layer as input.
 class LSTM : public DSP
 {
 public:
+  /// \brief Constructor
+  /// \param in_channels Number of input channels
+  /// \param out_channels Number of output channels
+  /// \param num_layers Number of LSTM layers
+  /// \param input_size Size of the input to each LSTM cell
+  /// \param hidden_size Size of the hidden state in each LSTM cell
+  /// \param weights Model weights vector
+  /// \param expected_sample_rate Expected sample rate in Hz (-1.0 if unknown)
   LSTM(const int in_channels, const int out_channels, const int num_layers, const int input_size, const int hidden_size,
        std::vector<float>& weights, const double expected_sample_rate = -1.0);
+
+  /// \brief Destructor
   ~LSTM() = default;
+
+  /// \brief Process audio frames
+  /// \param input Input audio buffers
+  /// \param output Output audio buffers
+  /// \param num_frames Number of frames to process
   void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override;
 
 protected:
@@ -73,7 +95,11 @@ class LSTM : public DSP
   Eigen::VectorXf _output;
 };
 
-// Factory to instantiate from nlohmann json
+/// \brief Factory function to instantiate LSTM from JSON
+/// \param config JSON configuration object
+/// \param weights Model weights vector
+/// \param expectedSampleRate Expected sample rate in Hz (-1.0 if unknown)
+/// \return Unique pointer to a DSP object (LSTM instance)
 std::unique_ptr<DSP> Factory(const nlohmann::json& config, std::vector<float>& weights,
                              const double expectedSampleRate);
 
diff --git a/NAM/registry.h b/NAM/registry.h
index e3bc4e8..0e90699 100644
--- a/NAM/registry.h
+++ b/NAM/registry.h
@@ -13,19 +13,28 @@ namespace nam
 {
 namespace factory
 {
-// TODO get rid of weights and expectedSampleRate
+/// \brief Factory function type for creating DSP objects
 using FactoryFunction = std::function<std::unique_ptr<DSP>(const nlohmann::json&, std::vector<float>&, const double)>;
 
-// Register factories for instantiating DSP objects
+/// \brief Registry for factories that instantiate DSP objects
+///
+/// Singleton registry that maps architecture names to factory functions.
+/// Allows dynamic registration of new DSP architectures.
 class FactoryRegistry
 {
 public:
+  /// \brief Get the singleton instance
+  /// \return Reference to the factory registry instance
   static FactoryRegistry& instance()
   {
     static FactoryRegistry inst;
     return inst;
   }
 
+  /// \brief Register a factory function for an architecture
+  /// \param key Architecture name (e.g., "WaveNet", "LSTM")
+  /// \param func Factory function that creates DSP instances
+  /// \throws std::runtime_error If the key is already registered
   void registerFactory(const std::string& key, FactoryFunction func)
   {
     // Assert that the key is not already registered
@@ -36,6 +45,13 @@ class FactoryRegistry
     factories_[key] = func;
   }
 
+  /// \brief Create a DSP object using a registered factory
+  /// \param name Architecture name
+  /// \param config JSON configuration object
+  /// \param weights Model weights vector
+  /// \param expectedSampleRate Expected sample rate in Hz
+  /// \return Unique pointer to a DSP object
+  /// \throws std::runtime_error If no factory is registered for the given name
   std::unique_ptr<DSP> create(const std::string& name, const nlohmann::json& config, std::vector<float>& weights,
                               const double expectedSampleRate) const
   {
@@ -51,9 +67,15 @@ class FactoryRegistry
   std::unordered_map<std::string, FactoryFunction> factories_;
 };
 
-// Registration helper. Use this to register your factories.
+/// \brief Registration helper for factories
+///
+/// Use this to register your factories. Create a static instance to automatically
+/// register a factory when the program starts.
 struct Helper
 {
+  /// \brief Constructor that registers a factory
+  /// \param name Architecture name
+  /// \param factory Factory function
   Helper(const std::string& name, FactoryFunction factory)
   {
     FactoryRegistry::instance().registerFactory(name, std::move(factory));
diff --git a/NAM/ring_buffer.h b/NAM/ring_buffer.h
index f2c3dfe..5d0e9b3 100644
--- a/NAM/ring_buffer.h
+++ b/NAM/ring_buffer.h
@@ -4,38 +4,52 @@
 
 namespace nam
 {
-// Ring buffer for managing Eigen::MatrixXf buffers with write/read pointers
+/// \brief Ring buffer for managing Eigen::MatrixXf buffers with write/read pointers
+///
+/// Provides efficient circular buffer functionality for maintaining input history
+/// in convolutional layers. Automatically handles buffer wrapping when needed.
 class RingBuffer
 {
 public:
+  /// \brief Default constructor
   RingBuffer() {};
-  // Initialize/resize storage
-  // :param channels: Number of channels (rows in the storage matrix)
-  // :param max_buffer_size: Maximum amount that will be written or read at once
+
+  /// \brief Initialize/resize storage
+  /// \param channels Number of channels (rows in the storage matrix)
+  /// \param max_buffer_size Maximum amount that will be written or read at once
   void Reset(const int channels, const int max_buffer_size);
-  // Write new data at write pointer
-  // :param input: Input matrix (channels x num_frames)
-  // :param num_frames: Number of frames to write
-  // NOTE: This function expects a full, pre-allocated, column-major MatrixXf
-  //       covering the entire valid buffer range. Callers should not pass
-  //       Block expressions (e.g. .leftCols()) across the API boundary; instead,
-  //       pass the full buffer and slice inside the callee. This avoids Eigen
-  //       evaluating Blocks into temporaries (which would allocate) when
-  //       binding to MatrixXf.
+
+  /// \brief Write new data at write pointer
+  ///
+  /// NOTE: This function expects a full, pre-allocated, column-major MatrixXf
+  /// covering the entire valid buffer range. Callers should not pass Block expressions
+  /// (e.g. .leftCols()) across the API boundary; instead, pass the full buffer and
+  /// slice inside the callee. This avoids Eigen evaluating Blocks into temporaries
+  /// (which would allocate) when binding to MatrixXf.
+  /// \param input Input matrix (channels x num_frames)
+  /// \param num_frames Number of frames to write
   void Write(const Eigen::MatrixXf& input, const int num_frames);
-  // Read data with optional lookback
-  // :param num_frames: Number of frames to read
-  // :param lookback: Number of frames to look back from write pointer (default 0)
-  // :return: Block reference to the storage data
+
+  /// \brief Read data with optional lookback
+  /// \param num_frames Number of frames to read
+  /// \param lookback Number of frames to look back from write pointer (default 0)
+  /// \return Block reference to the storage data
   Eigen::Block<Eigen::MatrixXf> Read(const int num_frames, const long lookback = 0);
-  // Advance write pointer
-  // :param num_frames: Number of frames to advance
+
+  /// \brief Advance write pointer
+  /// \param num_frames Number of frames to advance
   void Advance(const int num_frames);
-  // Get max buffer size (the value passed to Reset())
+
+  /// \brief Get max buffer size (the value passed to Reset())
+  /// \return Maximum buffer size
   int GetMaxBufferSize() const { return _max_buffer_size; }
-  // Get number of channels (rows)
+
+  /// \brief Get number of channels (rows)
+  /// \return Number of channels
   int GetChannels() const { return _storage.rows(); }
-  // Set the max lookback (maximum history needed when rewinding)
+
+  /// \brief Set the max lookback (maximum history needed when rewinding)
+  /// \param max_lookback Maximum lookback distance
   void SetMaxLookback(const long max_lookback) { _max_lookback = max_lookback; }
 
 private:
diff --git a/NAM/util.h b/NAM/util.h
index c0a5bd4..a13cd4a 100644
--- a/NAM/util.h
+++ b/NAM/util.h
@@ -9,6 +9,9 @@ namespace nam
 {
 namespace util
 {
+/// \brief Convert a string to lowercase
+/// \param s Input string
+/// \return Lowercase version of the input string
 std::string lowercase(const std::string& s);
 }; // namespace util
 }; // namespace nam
diff --git a/NAM/version.h b/NAM/version.h
index f746905..11b3be5 100644
--- a/NAM/version.h
+++ b/NAM/version.h
@@ -1,6 +1,15 @@
 #pragma once
 
-// Make sure this matches NAM version in ../CMakeLists.txt!
+/// \file version.h
+/// \brief Version information for NeuralAmpModelerCore
+///
+/// Make sure this matches NAM version in ../CMakeLists.txt!
+
+/// \brief Major version number
 #define NEURAL_AMP_MODELER_DSP_VERSION_MAJOR 0
-#define NEURAL_AMP_MODELER_DSP_VERSION_MINOR 3
+
+/// \brief Minor version number
+#define NEURAL_AMP_MODELER_DSP_VERSION_MINOR 4
+
+/// \brief Patch version number
 #define NEURAL_AMP_MODELER_DSP_VERSION_PATCH 0
diff --git a/NAM/wavenet.h b/NAM/wavenet.h
index 336d2e4..e290ef4 100644
--- a/NAM/wavenet.h
+++ b/NAM/wavenet.h
@@ -19,22 +19,34 @@ namespace nam
 namespace wavenet
 {
 
-// Gating mode for WaveNet layers
+/// \brief Gating mode for WaveNet layers
+///
+/// Determines how the layer processes the doubled bottleneck channels when gating is enabled.
 enum class GatingMode
 {
-  NONE, // No gating or blending
-  GATED, // Traditional gating (element-wise multiplication)
-  BLENDED // Blending (weighted average)
+  NONE, ///< No gating or blending - standard activation
+  GATED, ///< Traditional gating (element-wise multiplication of activated channels)
+  BLENDED ///< Blending (weighted average between activated and pre-activated values)
 };
 
-// Helper function for backward compatibility with boolean gated parameter
+/// \brief Helper function for backward compatibility with boolean gated parameter
+/// \param gated Boolean indicating whether gating should be enabled
+/// \return GatingMode::GATED if gated is true, GatingMode::NONE otherwise
 inline GatingMode gating_mode_from_bool(bool gated)
 {
   return gated ? GatingMode::GATED : GatingMode::NONE;
 }
-// Parameters for head1x1 configuration
+
+/// \brief Parameters for head1x1 configuration
+///
+/// Configures an optional 1x1 convolution that outputs directly to the head (skip connection)
+/// instead of using the activation output directly.
 struct Head1x1Params
 {
+  /// \brief Constructor
+  /// \param active_ Whether the head1x1 convolution is active
+  /// \param out_channels_ Number of output channels for the head1x1 convolution
+  /// \param groups_ Number of groups for grouped convolution
   Head1x1Params(bool active_, int out_channels_, int groups_)
   : active(active_)
   , out_channels(out_channels_)
@@ -42,26 +54,66 @@ struct Head1x1Params
   {
   }
 
-  const bool active;
-  const int out_channels;
-  const int groups;
+  const bool active; ///< Whether the head1x1 convolution is active
+  const int out_channels; ///< Number of output channels
+  const int groups; ///< Number of groups for grouped convolution
 };
 
+/// \brief Parameters for FiLM (Feature-wise Linear Modulation) configuration
+///
+/// FiLM applies per-channel scaling and optional shifting based on conditioning input.
 struct _FiLMParams
 {
+  /// \brief Constructor
+  /// \param active_ Whether FiLM is active at this location
+  /// \param shift_ Whether to apply both scale and shift (true) or only scale (false)
   _FiLMParams(bool active_, bool shift_)
   : active(active_)
   , shift(shift_)
   {
   }
-  const bool active;
-  const bool shift;
+  const bool active; ///< Whether FiLM is active
+  const bool shift; ///< Whether to apply shift in addition to scale
 };
 
+/// \brief A single WaveNet layer block
+///
+/// A WaveNet layer performs the following operations:
+/// 1. Dilated convolution on the input (with optional pre/post-FiLM)
+/// 2. Input mixin (conditioning input processing, with optional pre/post-FiLM)
+/// 3. Sum of conv and input mixin outputs
+/// 4. Activation (with optional gating/blending and pre/post FiLM)
+/// 5. 1x1 convolution for the next layer (with optional post-FiLM)
+/// 6. Optional 1x1 convolution for the head output (with optional post-FiLM)
+/// 7. Residual connection (input + 1x1 output) and skip connection (to next layer)
+///
+/// The layer supports multiple gating modes and FiLM at various points in the computation.
+/// See the walkthrough documentation for detailed step-by-step explanation.
 class _Layer
 {
 public:
-  // Constructor with GatingMode enum and typed ActivationConfig
+  /// \brief Constructor with GatingMode enum and typed ActivationConfig
+  /// \param condition_size Size of the conditioning input
+  /// \param channels Number of input/output channels from layer to layer
+  /// \param bottleneck Internal channel count
+  /// \param kernel_size Kernel size for the dilated convolution
+  /// \param dilation Dilation factor for the convolution
+  /// \param activation_config Primary activation function configuration
+  /// \param gating_mode Gating mode (NONE, GATED, or BLENDED)
+  /// \param groups_input Number of groups for the input convolution
+  /// \param groups_input_mixin Number of groups for the input mixin convolution
+  /// \param groups_1x1 Number of groups for the 1x1 convolution
+  /// \param head1x1_params Configuration of the optional head1x1 convolution
+  /// \param secondary_activation_config Secondary activation (for gating/blending)
+  /// \param conv_pre_film_params FiLM parameters before the input convolution
+  /// \param conv_post_film_params FiLM parameters after the input convolution
+  /// \param input_mixin_pre_film_params FiLM parameters before the input mixin
+  /// \param input_mixin_post_film_params FiLM parameters after the input mixin
+  /// \param activation_pre_film_params FiLM parameters after the input/mixin summed output before activation
+  /// \param activation_post_film_params FiLM parameters after the activation output before the 1x1 convolution
+  /// \param _1x1_post_film_params FiLM parameters after the 1x1 convolution
+  /// \param head1x1_post_film_params FiLM parameters after the head1x1 convolution
+  /// \throws std::invalid_argument If head1x1_post_film_params is active but head1x1 is not
   _Layer(const int condition_size, const int channels, const int bottleneck, const int kernel_size, const int dilation,
          const activations::ActivationConfig& activation_config, const GatingMode gating_mode, const int groups_input,
          const int groups_input_mixin, const int groups_1x1, const Head1x1Params& head1x1_params,
@@ -144,36 +196,72 @@ class _Layer
     }
   };
 
-  // Resize all arrays to be able to process `maxBufferSize` frames.
+  /// \brief Resize all arrays to be able to process maxBufferSize frames
+  /// \param maxBufferSize Maximum number of frames to process in a single call
   void SetMaxBufferSize(const int maxBufferSize);
-  // Set the parameters of this module
+
+  /// \brief Set the parameters (weights) of this module
+  /// \param weights Iterator to the weights vector. Will be advanced as weights are consumed.
   void set_weights_(std::vector<float>::iterator& weights);
-  // Process a block of frames.
-  // :param `input`: from previous layer
-  // :param `condition`: conditioning input (input to the WaveNet / "skip-in")
-  // :param `num_frames`: number of frames to process
-  // Outputs are stored internally and accessible via GetOutputNextLayer() and GetOutputHead()
+
+  /// \brief Process a block of frames
+  ///
+  /// Performs the complete layer computation:
+  /// 1. Input convolution (with optional pre/post-FiLM)
+  /// 2. Input mixin processing (with optional pre/post-FiLM)
+  /// 3. Sum and activation (with optional gating/blending and pre/post-FiLM)
+  /// 4. 1x1 convolution toward the skip connection for next layer (with optional post-FiLM)
+  /// 5. Optional 1x1 convolution for the head output (with optional post-FiLM)
+  /// 6. Store outputs for next layer and the layer array head
+  ///
+  /// \param input Input from previous layer (channels x num_frames)
+  /// \param condition Conditioning input (condition_size x num_frames)
+  /// \param num_frames Number of frames to process
+  ///
+  /// Outputs are stored internally and accessible via GetOutputNextLayer() and GetOutputHead().
+  /// Only the first num_frames columns of the output buffers are valid.
   void Process(const Eigen::MatrixXf& input, const Eigen::MatrixXf& condition, const int num_frames);
-  // The number of channels expected as input/output from this layer
+
+  /// \brief Get the number of channels expected as input/output from this layer
+  /// \return Number of channels
   long get_channels() const { return this->_conv.get_in_channels(); };
-  // Dilation of the input convolution layer
+
+  /// \brief Get the dilation of the input convolution layer
+  /// \return Dilation factor
   int get_dilation() const { return this->_conv.get_dilation(); };
-  // Kernel size of the input convolution layer
+
+  /// \brief Get the kernel size of the input convolution layer
+  /// \return Kernel size
   long get_kernel_size() const { return this->_conv.get_kernel_size(); };
 
-  // Get output to next layer (residual connection: input + _1x1 output)
-  // Returns the full pre-allocated buffer; only the first `num_frames` columns
-  // are valid for a given processing call. Slice with .leftCols(num_frames) as needed.
+  /// \brief Get output to next layer (residual connection: input + _1x1 output)
+  ///
+  /// Returns the full pre-allocated buffer; only the first num_frames columns
+  /// are valid for a given processing call. Slice with .leftCols(num_frames) as needed.
+  /// \return Reference to the output buffer (channels x maxBufferSize)
   Eigen::MatrixXf& GetOutputNextLayer() { return this->_output_next_layer; }
+
+  /// \brief Get output to next layer (const version)
+  /// \return Const reference to the output buffer
   const Eigen::MatrixXf& GetOutputNextLayer() const { return this->_output_next_layer; }
-  // Get output to head (skip connection: activated conv output)
-  // Returns the full pre-allocated buffer; only the first `num_frames` columns
-  // are valid for a given processing call. Slice with .leftCols(num_frames) as needed.
+
+  /// \brief Get output to head (skip connection: activated conv output)
+  ///
+  /// Returns the full pre-allocated buffer; only the first num_frames columns
+  /// are valid for a given processing call. Slice with .leftCols(num_frames) as needed.
+  /// \return Reference to the head output buffer
   Eigen::MatrixXf& GetOutputHead() { return this->_output_head; }
+
+  /// \brief Get output to head (const version)
+  /// \return Const reference to the head output buffer
   const Eigen::MatrixXf& GetOutputHead() const { return this->_output_head; }
 
-  // Access Conv1D for Reset() propagation (needed for _LayerArray)
+  /// \brief Access Conv1D for Reset() propagation (needed for _LayerArray)
+  /// \return Reference to the internal Conv1D object
   Conv1D& get_conv() { return _conv; }
+
+  /// \brief Access Conv1D (const version)
+  /// \return Const reference to the internal Conv1D object
   const Conv1D& get_conv() const { return _conv; }
 
 private:
@@ -211,9 +299,37 @@ class _Layer
   std::unique_ptr<FiLM> _head1x1_post_film;
 };
 
+/// \brief Parameters for constructing a LayerArray
+///
+/// Contains all configuration needed to construct a _LayerArray with multiple layers
+/// sharing the same channel count, kernel size, and activation configuration.
 class LayerArrayParams
 {
 public:
+  /// \brief Constructor
+  /// \param input_size_ Input size (number of channels) to the layer array
+  /// \param condition_size_ Size of the conditioning input
+  /// \param head_size_ Size of the head output (after head rechannel)
+  /// \param channels_ Number of channels in each layer
+  /// \param bottleneck_ Bottleneck size (internal channel count)
+  /// \param kernel_size_ Kernel size for dilated convolutions
+  /// \param dilations_ Vector of dilation factors, one per layer
+  /// \param activation_ Primary activation configuration
+  /// \param gating_mode_ Gating mode for all layers
+  /// \param head_bias_ Whether to use bias in the head rechannel
+  /// \param groups_input Number of groups for input convolutions
+  /// \param groups_input_mixin_ Number of groups for input mixin convolutions
+  /// \param groups_1x1_ Number of groups for 1x1 convolutions
+  /// \param head1x1_params_ Parameters for optional head1x1 convolutions
+  /// \param secondary_activation_config_ Secondary activation for gating/blending
+  /// \param conv_pre_film_params_ FiLM parameters before input convolutions
+  /// \param conv_post_film_params_ FiLM parameters after input convolutions
+  /// \param input_mixin_pre_film_params_ FiLM parameters before input mixin
+  /// \param input_mixin_post_film_params_ FiLM parameters after input mixin
+  /// \param activation_pre_film_params_ FiLM parameters before activation
+  /// \param activation_post_film_params_ FiLM parameters after activation
+  /// \param _1x1_post_film_params_ FiLM parameters after 1x1 convolutions
+  /// \param head1x1_post_film_params_ FiLM parameters after head1x1 convolutions
   LayerArrayParams(const int input_size_, const int condition_size_, const int head_size_, const int channels_,
                    const int bottleneck_, const int kernel_size_, const std::vector<int>&& dilations_,
                    const activations::ActivationConfig& activation_, const GatingMode gating_mode_,
@@ -250,36 +366,70 @@ class LayerArrayParams
   {
   }
 
-  const int input_size;
-  const int condition_size;
-  const int head_size;
-  const int channels;
-  const int bottleneck;
-  const int kernel_size;
-  std::vector<int> dilations;
-  const activations::ActivationConfig activation_config;
-  const GatingMode gating_mode;
-  const bool head_bias;
-  const int groups_input;
-  const int groups_input_mixin;
-  const int groups_1x1;
-  const Head1x1Params head1x1_params;
-  const activations::ActivationConfig secondary_activation_config;
-  const _FiLMParams conv_pre_film_params;
-  const _FiLMParams conv_post_film_params;
-  const _FiLMParams input_mixin_pre_film_params;
-  const _FiLMParams input_mixin_post_film_params;
-  const _FiLMParams activation_pre_film_params;
-  const _FiLMParams activation_post_film_params;
-  const _FiLMParams _1x1_post_film_params;
-  const _FiLMParams head1x1_post_film_params;
+  const int input_size; ///< Input size (number of channels)
+  const int condition_size; ///< Size of conditioning input
+  const int head_size; ///< Size of head output (after rechannel)
+  const int channels; ///< Number of channels in each layer
+  const int bottleneck; ///< Bottleneck size (internal channel count)
+  const int kernel_size; ///< Kernel size for dilated convolutions
+  std::vector<int> dilations; ///< Dilation factors, one per layer
+  const activations::ActivationConfig activation_config; ///< Primary activation configuration
+  const GatingMode gating_mode; ///< Gating mode for all layers
+  const bool head_bias; ///< Whether to use bias in head rechannel
+  const int groups_input; ///< Number of groups for input convolutions
+  const int groups_input_mixin; ///< Number of groups for input mixin
+  const int groups_1x1; ///< Number of groups for 1x1 convolutions
+  const Head1x1Params head1x1_params; ///< Parameters for optional head1x1
+  const activations::ActivationConfig secondary_activation_config; ///< Secondary activation for gating/blending
+  const _FiLMParams conv_pre_film_params; ///< FiLM params before input conv
+  const _FiLMParams conv_post_film_params; ///< FiLM params after input conv
+  const _FiLMParams input_mixin_pre_film_params; ///< FiLM params before input mixin
+  const _FiLMParams input_mixin_post_film_params; ///< FiLM params after input mixin
+  const _FiLMParams activation_pre_film_params; ///< FiLM params before activation
+  const _FiLMParams activation_post_film_params; ///< FiLM params after activation
+  const _FiLMParams _1x1_post_film_params; ///< FiLM params after 1x1 conv
+  const _FiLMParams head1x1_post_film_params; ///< FiLM params after head1x1 conv
 };
 
-// An array of layers with the same channels, kernel sizes, activations.
+/// \brief An array of layers with the same channels, kernel sizes, and activations
+///
+/// A LayerArray chains multiple _Layer objects together, processing them sequentially.
+/// Each layer processes the output of the previous layer (residual connection).
+/// All layers contribute to a shared head output (skip connection) that is accumulated
+/// and then projected to the final head size.
+///
+/// The LayerArray handles:
+/// - Input projection to match layer channel count
+/// - Processing layers in sequence with residual connections
+/// - Accumulating head outputs from all layers
+/// - Projecting the accumulated head output to the final head size
 class _LayerArray
 {
 public:
-  // Constructor with GatingMode enum and typed ActivationConfig
+  /// \brief Constructor with GatingMode enum and typed ActivationConfig
+  /// \param input_size Input size (number of channels) to the layer array
+  /// \param condition_size Size of the conditioning input
+  /// \param head_size Size of the head output (after head rechannel)
+  /// \param channels Number of channels in each layer
+  /// \param bottleneck Bottleneck size (internal channel count)
+  /// \param kernel_size Kernel size for dilated convolutions
+  /// \param dilations Vector of dilation factors, one per layer
+  /// \param activation_config Primary activation configuration
+  /// \param gating_mode Gating mode for all layers
+  /// \param head_bias Whether to use bias in the head rechannel
+  /// \param groups_input Number of groups for input convolutions
+  /// \param groups_input_mixin Number of groups for input mixin
+  /// \param groups_1x1 Number of groups for 1x1 convolutions
+  /// \param head1x1_params Parameters for optional head1x1 convolutions
+  /// \param secondary_activation_config Secondary activation for gating/blending
+  /// \param conv_pre_film_params FiLM parameters before input convolutions
+  /// \param conv_post_film_params FiLM parameters after input convolutions
+  /// \param input_mixin_pre_film_params FiLM parameters before input mixin
+  /// \param input_mixin_post_film_params FiLM parameters after input mixin
+  /// \param activation_pre_film_params FiLM parameters before activation
+  /// \param activation_post_film_params FiLM parameters after activation
+  /// \param _1x1_post_film_params FiLM parameters after 1x1 convolutions
+  /// \param head1x1_post_film_params FiLM parameters after head1x1 convolutions
   _LayerArray(const int input_size, const int condition_size, const int head_size, const int channels,
               const int bottleneck, const int kernel_size, const std::vector<int>& dilations,
               const activations::ActivationConfig& activation_config, const GatingMode gating_mode,
@@ -290,32 +440,60 @@ class _LayerArray
               const _FiLMParams& activation_pre_film_params, const _FiLMParams& activation_post_film_params,
               const _FiLMParams& _1x1_post_film_params, const _FiLMParams& head1x1_post_film_params);
 
+  /// \brief Resize all arrays to be able to process maxBufferSize frames
+  /// \param maxBufferSize Maximum number of frames to process in a single call
   void SetMaxBufferSize(const int maxBufferSize);
 
-  // All arrays are "short".
-  // Process without head input (first layer array) - zeros head inputs before proceeding
-  void Process(const Eigen::MatrixXf& layer_inputs, // Short
-               const Eigen::MatrixXf& condition, // Short
-               const int num_frames);
-  // Process with head input (subsequent layer arrays) - copies head input before proceeding
-  void Process(const Eigen::MatrixXf& layer_inputs, // Short
-               const Eigen::MatrixXf& condition, // Short
-               const Eigen::MatrixXf& head_inputs, // Short - from previous layer array
-               const int num_frames);
-  // Get output from last layer (for next layer array)
-  // Returns the full pre-allocated buffer; only the first `num_frames` columns
-  // are valid for a given processing call. Slice with .leftCols(num_frames) as needed.
+  /// \brief Process without a given previous head input (first layer array)
+  ///
+  /// Zeros head accumulated output before proceeding. Used for the first layer array in a WaveNet.
+  /// \param layer_inputs Input to the layer array (input_size x num_frames)
+  /// \param condition Conditioning input (condition_size x num_frames)
+  /// \param num_frames Number of frames to process
+  void Process(const Eigen::MatrixXf& layer_inputs, const Eigen::MatrixXf& condition, const int num_frames);
+
+  /// \brief Process with a given previous head input (subsequent layer arrays)
+  ///
+  /// Copies head input before proceeding. Used for subsequent layer arrays that accumulate
+  /// head outputs from previous arrays.
+  /// \param layer_inputs Input to the layer array (input_size x num_frames)
+  /// \param condition Conditioning input (condition_size x num_frames)
+  /// \param head_inputs Head input from previous layer array (head_input_size x num_frames)
+  /// \param num_frames Number of frames to process
+  void Process(const Eigen::MatrixXf& layer_inputs, const Eigen::MatrixXf& condition,
+               const Eigen::MatrixXf& head_inputs, const int num_frames);
+
+  /// \brief Get output from last layer (for next layer array)
+  ///
+  /// Returns the full pre-allocated buffer; only the first num_frames columns
+  /// are valid for a given processing call. Slice with .leftCols(num_frames) as needed.
+  /// \return Reference to the layer output buffer (channels x maxBufferSize)
   Eigen::MatrixXf& GetLayerOutputs() { return this->_layer_outputs; }
+
+  /// \brief Get output from last layer (const version)
+  /// \return Const reference to the layer output buffer
   const Eigen::MatrixXf& GetLayerOutputs() const { return this->_layer_outputs; }
-  // Get head outputs (post head-rechannel)
-  // Returns the full pre-allocated buffer; only the first `num_frames` columns
-  // are valid for a given processing call. Slice with .leftCols(num_frames) as needed.
+
+  /// \brief Get head outputs (post head-rechannel)
+  ///
+  /// Returns the full pre-allocated buffer; only the first num_frames columns
+  /// are valid for a given processing call. Slice with .leftCols(num_frames) as needed.
+  /// \return Reference to the head output buffer (head_size x maxBufferSize)
   Eigen::MatrixXf& GetHeadOutputs();
+
+  /// \brief Get head outputs (const version)
+  /// \return Const reference to the head output buffer
   const Eigen::MatrixXf& GetHeadOutputs() const;
+
+  /// \brief Set the parameters (weights) of this module
+  /// \param it Iterator to the weights vector. Will be advanced as weights are consumed.
   void set_weights_(std::vector<float>::iterator& it);
 
-  // "Zero-indexed" receptive field.
-  // E.g. a 1x1 convolution has a z.i.r.f. of zero.
+  /// \brief Get the "zero-indexed" receptive field
+  ///
+  /// The receptive field is the number of input samples that affect the output.
+  /// A 1x1 convolution is defined to have a zero-indexed receptive field of zero.
+  /// \return Receptive field size
   long get_receptive_field() const;
 
 private:
@@ -341,16 +519,50 @@ class _LayerArray
   void ProcessInner(const Eigen::MatrixXf& layer_inputs, const Eigen::MatrixXf& condition, const int num_frames);
 };
 
-// The main WaveNet model
+/// \brief The main WaveNet model
+///
+/// WaveNet is a dilated convolutional neural network architecture for audio processing.
+/// It consists of multiple LayerArrays, each containing multiple layers with increasing
+/// dilation factors. The model processes audio through:
+///
+/// 1. Condition DSP (optional) - processes input to generate conditioning signal
+/// 2. LayerArrays - sequential processing with residual and skip connections
+/// 3. Head scaling - final output scaling
+///
+/// The model supports real-time audio processing with pre-allocated buffers.
 class WaveNet : public DSP
 {
 public:
+  /// \brief Constructor
+  /// \param in_channels Number of input channels
+  /// \param layer_array_params Parameters for each layer array
+  /// \param head_scale Scaling factor applied to the final head output
+  /// \param with_head Whether to use a custom "head" module that further processes the output (not currently supported)
+  /// \param weights Model weights (will be consumed during construction)
+  /// \param condition_dsp Optional DSP module for processing the conditioning input
+  /// \param expected_sample_rate Expected sample rate in Hz (-1.0 if unknown)
   WaveNet(const int in_channels, const std::vector<LayerArrayParams>& layer_array_params, const float head_scale,
           const bool with_head, std::vector<float> weights, std::unique_ptr<DSP> condition_dsp,
           const double expected_sample_rate = -1.0);
+
+  /// \brief Destructor
   ~WaveNet() = default;
+
+  /// \brief Process audio frames
+  ///
+  /// Implements the DSP::process() interface. Processes input audio through the
+  /// complete WaveNet pipeline and writes to output.
+  /// \param input Input audio buffers (in_channels x frames)
+  /// \param output Output audio buffers (out_channels x frames)
+  /// \param num_frames Number of frames to process
   void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override;
+
+  /// \brief Set model weights from a vector
+  /// \param weights Vector containing all model weights
   void set_weights_(std::vector<float>& weights);
+
+  /// \brief Set model weights from an iterator
+  /// \param weights Iterator to the weights vector. Will be advanced as weights are consumed.
   void set_weights_(std::vector<float>::iterator& weights);
 
 protected:
@@ -364,13 +576,28 @@ class WaveNet : public DSP
   std::vector<NAM_SAMPLE*> _condition_dsp_input_ptrs;
   std::vector<NAM_SAMPLE*> _condition_dsp_output_ptrs;
 
+  /// \brief Resize all buffers to handle maxBufferSize frames
+  /// \param maxBufferSize Maximum number of frames to process in a single call
   void SetMaxBufferSize(const int maxBufferSize) override;
-  // Compute the conditioning array to be given to the layer arrays
+
+  /// \brief Compute the conditioning array to be given to the layer arrays
+  ///
+  /// Processes the condition input through the condition DSP (if present) or
+  /// passes it through directly.
+  /// \param num_frames Number of frames to process
   virtual void _process_condition(const int num_frames);
-  // Fill in the "condition" array that's fed into the various parts of the net.
+
+  /// \brief Fill in the "condition" array that's fed into the various parts of the net
+  ///
+  /// Copies input audio into the condition buffer for processing.
+  /// \param input Input audio buffers
+  /// \param num_frames Number of frames to process
   virtual void _set_condition_array(NAM_SAMPLE** input, const int num_frames);
-  // How many conditioning inputs are there.
-  // Just one--the audio.
+
+  /// \brief Get the number of conditioning inputs
+  ///
+  /// For standard WaveNet, this is just the audio input (same as input channels).
+  /// \return Number of conditioning input channels
   virtual int _get_condition_dim() const { return NumInputChannels(); };
 
 private:
@@ -382,7 +609,11 @@ class WaveNet : public DSP
   int PrewarmSamples() override { return mPrewarmSamples; };
 };
 
-// Factory to instantiate from nlohmann json
+/// \brief Factory function to instantiate WaveNet from JSON configuration
+/// \param config JSON configuration object
+/// \param weights Model weights vector
+/// \param expectedSampleRate Expected sample rate in Hz (-1.0 if unknown)
+/// \return Unique pointer to a DSP object (WaveNet instance)
 std::unique_ptr<DSP> Factory(const nlohmann::json& config, std::vector<float>& weights,
                              const double expectedSampleRate);
 }; // namespace wavenet
diff --git a/docs/api/activations.rst b/docs/api/activations.rst
new file mode 100644
index 0000000..e1595a1
--- /dev/null
+++ b/docs/api/activations.rst
@@ -0,0 +1,17 @@
+Activations API
+===============
+
+.. doxygennamespace:: nam::activations
+   :project: NeuralAmpModelerCore
+   :members:
+
+.. doxygenclass:: nam::activations::Activation
+   :project: NeuralAmpModelerCore
+   :members:
+
+.. doxygenenum:: nam::activations::ActivationType
+   :project: NeuralAmpModelerCore
+
+.. doxygenstruct:: nam::activations::ActivationConfig
+   :project: NeuralAmpModelerCore
+   :members:
diff --git a/docs/api/conv1d.rst b/docs/api/conv1d.rst
new file mode 100644
index 0000000..f6d87e5
--- /dev/null
+++ b/docs/api/conv1d.rst
@@ -0,0 +1,6 @@
+Conv1D API
+==========
+
+.. doxygenclass:: nam::Conv1D
+   :project: NeuralAmpModelerCore
+   :members:
diff --git a/docs/api/convnet.rst b/docs/api/convnet.rst
new file mode 100644
index 0000000..f5fc886
--- /dev/null
+++ b/docs/api/convnet.rst
@@ -0,0 +1,18 @@
+ConvNet API
+===========
+
+.. doxygennamespace:: nam::convnet
+   :project: NeuralAmpModelerCore
+   :members:
+
+.. doxygenclass:: nam::convnet::ConvNet
+   :project: NeuralAmpModelerCore
+   :members:
+
+.. doxygenclass:: nam::convnet::ConvNetBlock
+   :project: NeuralAmpModelerCore
+   :members:
+
+.. doxygenclass:: nam::convnet::BatchNorm
+   :project: NeuralAmpModelerCore
+   :members:
diff --git a/docs/api/dsp.rst b/docs/api/dsp.rst
new file mode 100644
index 0000000..8fe6ae5
--- /dev/null
+++ b/docs/api/dsp.rst
@@ -0,0 +1,25 @@
+DSP API
+=======
+
+.. doxygenclass:: nam::DSP
+   :project: NeuralAmpModelerCore
+   :members:
+
+.. doxygenclass:: nam::Buffer
+   :project: NeuralAmpModelerCore
+   :members:
+
+.. doxygenclass:: nam::Linear
+   :project: NeuralAmpModelerCore
+   :members:
+
+.. doxygenclass:: nam::Conv1x1
+   :project: NeuralAmpModelerCore
+   :members:
+
+.. doxygenstruct:: nam::dspData
+   :project: NeuralAmpModelerCore
+   :members:
+
+.. doxygenenum:: nam::EArchitectures
+   :project: NeuralAmpModelerCore
diff --git a/docs/api/film.rst b/docs/api/film.rst
new file mode 100644
index 0000000..88dfef1
--- /dev/null
+++ b/docs/api/film.rst
@@ -0,0 +1,6 @@
+FiLM API
+========
+
+.. doxygenclass:: nam::FiLM
+   :project: NeuralAmpModelerCore
+   :members:
diff --git a/docs/api/gating_activations.rst b/docs/api/gating_activations.rst
new file mode 100644
index 0000000..09e8cab
--- /dev/null
+++ b/docs/api/gating_activations.rst
@@ -0,0 +1,14 @@
+Gating Activations API
+======================
+
+.. doxygennamespace:: nam::gating_activations
+   :project: NeuralAmpModelerCore
+   :members:
+
+.. doxygenclass:: nam::gating_activations::GatingActivation
+   :project: NeuralAmpModelerCore
+   :members:
+
+.. doxygenclass:: nam::gating_activations::BlendingActivation
+   :project: NeuralAmpModelerCore
+   :members:
diff --git a/docs/api/get_dsp.rst b/docs/api/get_dsp.rst
new file mode 100644
index 0000000..efab64a
--- /dev/null
+++ b/docs/api/get_dsp.rst
@@ -0,0 +1,6 @@
+Model Loading API
+==================
+
+.. doxygennamespace:: nam
+   :project: NeuralAmpModelerCore
+   :members:
diff --git a/docs/api/index.rst b/docs/api/index.rst
new file mode 100644
index 0000000..fd43328
--- /dev/null
+++ b/docs/api/index.rst
@@ -0,0 +1,20 @@
+API Reference
+=============
+
+This section contains the complete API reference for NeuralAmpModelerCore, automatically generated from the source code headers.
+
+.. toctree::
+   :maxdepth: 2
+
+   
+   dsp
+   ring_buffer
+   conv1d
+   activations
+   gating_activations
+   film
+   convnet
+   lstm
+   wavenet
+   get_dsp
+   util
diff --git a/docs/api/lstm.rst b/docs/api/lstm.rst
new file mode 100644
index 0000000..610eb4a
--- /dev/null
+++ b/docs/api/lstm.rst
@@ -0,0 +1,14 @@
+LSTM API
+========
+
+.. doxygennamespace:: nam::lstm
+   :project: NeuralAmpModelerCore
+   :members:
+
+.. doxygenclass:: nam::lstm::LSTM
+   :project: NeuralAmpModelerCore
+   :members:
+
+.. doxygenclass:: nam::lstm::LSTMCell
+   :project: NeuralAmpModelerCore
+   :members:
diff --git a/docs/api/ring_buffer.rst b/docs/api/ring_buffer.rst
new file mode 100644
index 0000000..ba504eb
--- /dev/null
+++ b/docs/api/ring_buffer.rst
@@ -0,0 +1,6 @@
+Ring Buffer API
+================
+
+.. doxygenclass:: nam::RingBuffer
+   :project: NeuralAmpModelerCore
+   :members:
diff --git a/docs/api/util.rst b/docs/api/util.rst
new file mode 100644
index 0000000..a505f84
--- /dev/null
+++ b/docs/api/util.rst
@@ -0,0 +1,6 @@
+Utilities API
+=============
+
+.. doxygennamespace:: nam::util
+   :project: NeuralAmpModelerCore
+   :members:
diff --git a/docs/api/wavenet.rst b/docs/api/wavenet.rst
new file mode 100644
index 0000000..571c7e4
--- /dev/null
+++ b/docs/api/wavenet.rst
@@ -0,0 +1,29 @@
+WaveNet API
+===========
+
+.. doxygennamespace:: nam::wavenet
+   :project: NeuralAmpModelerCore
+   :members:
+
+.. doxygenclass:: nam::wavenet::WaveNet
+   :project: NeuralAmpModelerCore
+   :members:
+
+.. doxygenclass:: nam::wavenet::_LayerArray
+   :project: NeuralAmpModelerCore
+   :members:
+
+.. doxygenclass:: nam::wavenet::_Layer
+   :project: NeuralAmpModelerCore
+   :members:
+
+.. doxygenclass:: nam::wavenet::LayerArrayParams
+   :project: NeuralAmpModelerCore
+   :members:
+
+.. doxygenstruct:: nam::wavenet::Head1x1Params
+   :project: NeuralAmpModelerCore
+   :members:
+
+.. doxygenenum:: nam::wavenet::GatingMode
+   :project: NeuralAmpModelerCore
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..305152c
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,64 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# For the full list of built-in configuration values, see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+import os
+import sys
+from pathlib import Path
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here.
+sys.path.insert(0, os.path.abspath('.'))
+
+# -- Project information -----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
+
+project = 'NeuralAmpModelerCore'
+copyright = '2023-present Steven Atkinson'
+author = 'Neural Amp Modeler Contributors'
+release = '0.4.0'
+version = '0.4.0'
+
+# -- General configuration ---------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
+
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.viewcode',
+    'sphinx.ext.intersphinx',
+    'breathe',
+    'sphinxcontrib.mermaid',
+]
+
+templates_path = ['_templates']
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+# -- Options for HTML output -------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
+
+html_theme = 'sphinx_rtd_theme'
+# html_static_path = ['_static']  # Commented out until _static directory is created
+
+# -- Breathe configuration ----------------------------------------------------
+# https://breathe.readthedocs.io/
+
+breathe_projects = {
+    'NeuralAmpModelerCore': 'doxygen/xml',
+}
+breathe_default_project = 'NeuralAmpModelerCore'
+breathe_default_members = ('members', 'undoc-members')
+
+# -- Intersphinx configuration -----------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html
+
+intersphinx_mapping = {
+    'cpp': ('https://en.cppreference.com/mwiki/', None),
+}
+
+# -- Extension configuration --------------------------------------------------
+
+# Autodoc settings
+autodoc_mock_imports = ['Eigen', 'nlohmann']
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..7f9bdda
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,64 @@
+NeuralAmpModelerCore Documentation
+===================================
+
+Welcome to the NeuralAmpModelerCore documentation. This library provides a core C++ DSP implementation for Neural Amp Modeler plugins.
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+   wavenet_walkthrough
+   api/index
+
+Overview
+--------
+
+NeuralAmpModelerCore is a high-performance C++ library for running neural network-based audio processing models. It supports multiple architectures including:
+
+* **WaveNet**: Dilated convolutional neural networks with gating and conditioning
+* **ConvNet**: Convolutional neural networks with batch normalization
+* **LSTM**: Long Short-Term Memory networks
+* **Linear**: Simple linear models (impulse responses)
+
+The library is designed for real-time audio processing with a focus on:
+
+* **Real-time safety**: Pre-allocated buffers and no dynamic allocations during processing
+* **Performance**: Optimized implementations using Eigen for linear algebra
+* **Flexibility**: Support for various activation functions, gating modes, and conditioning mechanisms
+
+Getting Started
+---------------
+
+For an example of how to use this library, see the `NeuralAmpModelerPlugin <https://github.com/sdatkinson/NeuralAmpModelerPlugin>`_ repository.
+
+Architecture
+------------
+
+The library is organized into several namespaces:
+
+* :ref:`nam::wavenet <namespace_nam_wavenet>`: WaveNet architecture implementation
+* :ref:`nam::convnet <namespace_nam_convnet>`: ConvNet architecture implementation
+* :ref:`nam::lstm <namespace_nam_lstm>`: LSTM architecture implementation
+* :ref:`nam::activations <namespace_nam_activations>`: Activation function implementations
+* :ref:`nam::gating_activations <namespace_nam_gating_activations>`: Gating and blending activation functions
+
+Key Components
+--------------
+
+* :ref:`DSP <class_nam_dsp>`: Base class for all DSP models
+* :ref:`WaveNet <class_nam_wavenet_wavenet>`: Main WaveNet model class
+* :ref:`Conv1D <class_nam_conv1d>`: Dilated 1D convolution implementation
+* :ref:`FiLM <class_nam_film>`: Feature-wise Linear Modulation
+
+Documentation
+-------------
+
+* :doc:`wavenet_walkthrough`: Step-by-step explanation of WaveNet architecture, LayerArray, and Layer computations
+* :doc:`api/index`: Complete API reference
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 0000000..f5ac4b8
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,4 @@
+sphinx>=5.0.0
+breathe>=4.35.0
+sphinx-rtd-theme>=1.2.0
+sphinxcontrib-mermaid>=0.8.0
diff --git a/docs/wavenet_walkthrough.rst b/docs/wavenet_walkthrough.rst
new file mode 100644
index 0000000..9dd90e6
--- /dev/null
+++ b/docs/wavenet_walkthrough.rst
@@ -0,0 +1,443 @@
+WaveNet Computation Walkthrough
+==================================
+
+This document provides a detailed step-by-step explanation of how the NAM WaveNet architecture performs its computations, including the LayerArray and Layer objects that make up a the model.
+
+"It's not *really* a Wavenet"
+-----------------------------
+
+The name "WaveNet" is a bit of a misnomer. 
+There are similarities to the architecture from 
+`van den Oord et al. (2016) <https://arxiv.org/abs/1609.03499>`_--this is a
+convolutional neural network that repeats a "Layer" motif withskip connections that
+give good accuracy typical of convnets along with good training stability, but there are
+a lot of differences.
+
+Here's a rundown of what's not exactly the same at an informal level:
+
+* The model in NAM is feedforward and used in a "regression" setting;
+  the model from the original paper is autoregressive and used for generative tasks.
+* The class in NAM actually composes several "Layer array" objects. 
+  Each one of these individually is actually far closer to a "WaveNet" in architecture.
+  In other words, this is more like a "stacked WaveNet".
+* There are additional skip connections (e.g. input mixin) that aren't really part of 
+  the original WaveNet architecture.
+* And finally, the actual recipe within the layer has a lot of modifications.
+  The original layer has, roughly, a "convolution-activation-convolution" sequence with a 
+  gated activation.
+  Here, the gated activation is optional (and is frequently not used, like in the popular 
+  A1 standard/lite/feather/nano configurations).
+* In v0.4.0, even more modifications have been added in--FiLMs, a bottlneck, and an
+  arbitrary "conditioning DSP" module that can be used to embed the input signal in a more
+  effective way to modulate the layers in the main model.
+  It doesn't need to be a WaveNet, but if it were then this feels more like a "cascading 
+  (stacked) WaveNet".
+
+WaveNet Overview
+----------------
+
+WaveNet is a dilated convolutional neural network architecture designed for audio processing. The model consists of:
+
+* **Multiple LayerArrays**: Each LayerArray contains multiple layers with the same channel configuration
+* **Conditioning**: Optional DSP processing of the input to generate conditioning signals and "skip in" this signal to the layers.
+* **Residual and Skip Connections**: Information flows through both residual (layer-to-layer) and skip (to head) paths
+
+Computation graphs of the layer, layer array, and full model are below on this page.
+
+Layer Computation
+-----------------
+
+A single Layer performs the core computation of a WaveNet block. 
+The computation proceeds through several stages:
+
+Step 1: Input Convolution
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The input first goes through a dilated 1D convolution:
+
+1. **Optional Pre-FiLM**: If `conv_pre_film` is active, the input is modulated by the condition signal before convolution
+2. **Dilated Convolution**: The input is convolved with a dilated kernel
+3. **Optional Post-FiLM**: If `conv_post_film` is active, the convolution output is modulated by the condition signal
+
+.. note::
+    Having two FiLM layers bookending the convolution layer is mathematically equivalent
+    to a sort of "rank 1 adaptive LoRA" on the convolution weights.
+
+.. code-block:: cpp
+   :caption: Input convolution processing
+
+   if (this->_conv_pre_film) {
+       this->_conv_pre_film->Process(input, condition, num_frames);
+       this->_conv.Process(this->_conv_pre_film->GetOutput(), num_frames);
+   } else {
+       this->_conv.Process(input, num_frames);
+   }
+   if (this->_conv_post_film) {
+       Eigen::MatrixXf& conv_output = this->_conv.GetOutput();
+       this->_conv_post_film->Process_(conv_output, condition, num_frames);
+   }
+
+Step 2: Input Mixin
+~~~~~~~~~~~~~~~~~~~
+
+The conditioning input is processed separately and added to the convolution output:
+
+1. **Optional Pre-FiLM**: If `input_mixin_pre_film` is active, the condition is modulated before the mixin convolution
+2. **Input Mixin Convolution**: A 1x1 convolution processes the condition signal
+3. **Optional Post-FiLM**: If `input_mixin_post_film` is active, the mixin output is modulated
+
+.. code-block:: cpp
+   :caption: Input mixin processing
+
+   if (this->_input_mixin_pre_film) {
+       this->_input_mixin_pre_film->Process(condition, condition, num_frames);
+       this->_input_mixin.process_(this->_input_mixin_pre_film->GetOutput(), num_frames);
+   } else {
+       this->_input_mixin.process_(condition, num_frames);
+   }
+   if (this->_input_mixin_post_film) {
+       Eigen::MatrixXf& input_mixin_output = this->_input_mixin.GetOutput();
+       this->_input_mixin_post_film->Process_(input_mixin_output, condition, num_frames);
+   }
+
+Step 3: Sum and Pre-Activation FiLM
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The convolution output and input mixin output are summed, and optionally modulated:
+
+.. code-block:: cpp
+   :caption: Sum and pre-activation FiLM
+
+   this->_z.leftCols(num_frames).noalias() =
+       _conv.GetOutput().leftCols(num_frames) + _input_mixin.GetOutput().leftCols(num_frames);
+   if (this->_activation_pre_film) {
+       this->_activation_pre_film->Process_(this->_z, condition, num_frames);
+   }
+
+Step 4: Activation
+~~~~~~~~~~~~~~~~~~
+
+The activation stage depends on the gating mode:
+
+**No Gating (GatingMode::NONE)**
+   Simple activation function applied to the summed output.
+
+**Gated (GatingMode::GATED)**
+   The output channels are doubled (2 * bottleneck). The top half goes through the primary activation,
+   the bottom half through a secondary activation (typically sigmoid). The results are multiplied element-wise.
+
+**Blended (GatingMode::BLENDED)**
+   Similar to gated, but instead of multiplication, a weighted blend is performed:
+   output = alpha * activated_input + (1 - alpha) * pre_activation_input
+   where alpha comes from the secondary activation.
+
+After activation, an optional post-activation FiLM may be applied.
+
+.. note::
+    Even though the secondary activation is calssically chosen to be a sigmoid, it doesn't
+    need to be. It doesn't even need to output a value between 0 and 1.
+    The operation is still well-defined.
+
+.. code-block:: cpp
+   :caption: Activation processing (gated mode example)
+
+   if (this->_gating_mode == GatingMode::GATED) {
+       auto input_block = this->_z.leftCols(num_frames);
+       auto output_block = this->_z.topRows(bottleneck).leftCols(num_frames);
+       this->_gating_activation->apply(input_block, output_block);
+       if (this->_activation_post_film) {
+           this->_activation_post_film->Process(this->_z.topRows(bottleneck), condition, num_frames);
+           this->_z.topRows(bottleneck).leftCols(num_frames).noalias() =
+               this->_activation_post_film->GetOutput().leftCols(num_frames);
+       }
+   }
+
+Step 5: 1x1 Convolution
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+A 1x1 convolution reduces the bottleneck channels back to the layer channel count:
+
+.. code-block:: cpp
+   :caption: 1x1 convolution
+
+   _1x1.process_(this->_z.topRows(bottleneck), num_frames);
+   if (this->_1x1_post_film) {
+       Eigen::MatrixXf& _1x1_output = this->_1x1.GetOutput();
+       this->_1x1_post_film->Process_(_1x1_output, condition, num_frames);
+   }
+
+Step 6: Head 1x1 (Optional)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If a head1x1 convolution is configured, it processes the activated output for the skip connection:
+
+.. code-block:: cpp
+   :caption: Head 1x1 processing
+
+   if (this->_head1x1) {
+       this->_head1x1->process_(this->_z.topRows(bottleneck).leftCols(num_frames), num_frames);
+       if (this->_head1x1_post_film) {
+           Eigen::MatrixXf& head1x1_output = this->_head1x1->GetOutput();
+           this->_head1x1_post_film->Process_(head1x1_output, condition, num_frames);
+       }
+       this->_output_head.leftCols(num_frames).noalias() = 
+           this->_head1x1->GetOutput().leftCols(num_frames);
+   }
+
+.. note::
+    If there is no head 1x1, then the output dimension is the same as the activation 
+    output dimension (the "bottleneck" dimension).
+    If there is, then the head can project to an arbitrary dimension.
+
+Step 7: Residual and Skip Connections
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Finally, the outputs are computed:
+
+* **Residual Connection**: `output_next_layer = input + 1x1_output`
+* **Skip Connection**: `output_head = activated_output` (or head1x1 output if present)
+
+.. code-block:: cpp
+   :caption: Residual and skip connections
+
+   // Store output to next layer (residual connection)
+   this->_output_next_layer.leftCols(num_frames).noalias() =
+       input.leftCols(num_frames) + _1x1.GetOutput().leftCols(num_frames);
+   
+   // Store output to head (skip connection)
+   if (this->_head1x1) {
+       this->_output_head.leftCols(num_frames).noalias() = 
+           this->_head1x1->GetOutput().leftCols(num_frames);
+   } else {
+       this->_output_head.leftCols(num_frames).noalias() = 
+           this->_z.topRows(bottleneck).leftCols(num_frames);
+   }
+
+Data Flow Diagram
+~~~~~~~~~~~~~~~~~
+
+Data arrays are marked with their dimensions as (channels, frames).
+Notes:
+
+* ``g=2`` if a gating or blending activation is used, and ``1`` otherwise.
+
+* The head output dimension ``dh`` is the bottleneck dimension ``b`` when no head 1x1 is
+  used; otherwise, it is determined by the head 1x1's number of output channels.
+
+
+.. mermaid::
+   :caption: Layer Computation Flow
+
+   graph TD
+       Input["Input (dx,n)"] --> PreFiLM1{Pre-FiLM?}
+       PreFiLM1 -->|Yes| ConvPre[Conv Pre-FiLM]
+       PreFiLM1 -->|No| Conv["Dilated Conv (g*b,n)"]
+       ConvPre --> Conv
+       Conv --> PostFiLM1{Post-FiLM?}
+       PostFiLM1 -->|Yes| ConvPost[Conv Post-FiLM]
+       PostFiLM1 -->|No| Sum["Sum (g*b,n)"]
+       ConvPost --> Sum
+       
+       Condition["Condition (dc,n)"] --> PreFiLM2{Pre-FiLM?}
+       PreFiLM2 -->|Yes| MixinPre[Input Mixin Pre-FiLM]
+       PreFiLM2 -->|No| Mixin["Input Mixin (g*b,n)"]
+       MixinPre --> Mixin
+       Mixin --> PostFiLM2{Post-FiLM?}
+       PostFiLM2 -->|Yes| MixinPost[Input Mixin Post-FiLM]
+       PostFiLM2 -->|No| Sum
+       MixinPost --> Sum
+       
+       Sum --> PreActFiLM{Pre-Act FiLM?}
+       PreActFiLM -->|Yes| PreAct[Pre-Activation FiLM]
+       PreActFiLM -->|No| Act["Activation (b,n)"]
+       PreAct --> Act
+       
+       Act --> PostActFiLM{Post-Act FiLM?}
+       PostActFiLM -->|Yes| PostActFilm[Post-Activation FiLM]
+       PostActFiLM -->|No| PostAct["Post-Activation Output (b,n)"]
+       PostActFilm --> PostAct
+       
+       PostAct --> Conv1x1["1x1 Conv (dx,n)"]
+       Conv1x1 --> Post1x1FiLM{Post-1x1 FiLM?}
+       Post1x1FiLM -->|Yes| Post1x1[Post-1x1 FiLM]
+       Post1x1FiLM -->|No| Residual["Residual (dx,n)"]
+       Post1x1 --> Residual
+
+       Input --> ResidualSum["Residual Sum (dx,n)"]
+       Residual --> ResidualSum
+       ResidualSum --> LayerOutput["Layer Output (dx,n)"]
+       
+       PostAct --> Head1x1{Head 1x1?}
+       Head1x1 -->|Yes| HeadConv["Head 1x1 Conv (dh,n)"]
+       Head1x1 -->|No| HeadOutput["Head Output (dh,n)"]
+       HeadConv --> HeadFiLM{Head FiLM?}
+       HeadFiLM -->|Yes| HeadPost[Head Post-FiLM]
+       HeadFiLM -->|No| HeadOutput
+       HeadPost --> HeadOutput
+
+LayerArray Computation
+----------------------
+
+A LayerArray chains multiple Layer objects together, processing them sequentially while 
+accumulating their "head outputs" via skip-out connections.
+
+Step 1: Rechanneling
+~~~~~~~~~~~~~~~~~~~~~
+
+The input is first proejcted (rechanneled) to match the layer channel count:
+
+.. code-block:: cpp
+   :caption: Input rechanneling
+
+   this->_rechannel.process_(layer_inputs, num_frames);
+   Eigen::MatrixXf& rechannel_output = _rechannel.GetOutput();
+
+Step 2: Layer Processing
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Each layer processes the output of the previous layer:
+
+1. **First Layer**: Processes the rechanneled input
+2. **Subsequent Layers**: Process the residual output from the previous layer
+3. **Head Accumulation**: Each "head output" is accumulated into the head buffer
+
+.. code-block:: cpp
+   :caption: Layer processing loop
+
+   for (size_t i = 0; i < this->_layers.size(); i++) {
+       if (i == 0) {
+           // First layer consumes the rechannel output buffer
+           this->_layers[i].Process(rechannel_output, condition, num_frames);
+       } else {
+           // Subsequent layers consume the previous layer's output
+           Eigen::MatrixXf& prev_output = this->_layers[i - 1].GetOutputNextLayer();
+           this->_layers[i].Process(prev_output, condition, num_frames);
+       }
+       
+       // Accumulate head output from this layer
+       this->_head_inputs.leftCols(num_frames).noalias() += 
+           this->_layers[i].GetOutputHead().leftCols(num_frames);
+   }
+
+Step 3: Head Rechanneling
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The accumulated head outputs are proejcted (rechanneled) to the final output dimension 
+for the layer array:
+
+.. code-block:: cpp
+   :caption: Head rechanneling
+
+   _head_rechannel.process_(this->_head_inputs, num_frames);
+
+LayerArray Structure
+~~~~~~~~~~~~~~~~~~~~
+
+.. mermaid::
+   :caption: LayerArray Structure
+
+   graph TD
+       Input[Layer Input] --> Rechannel[Rechannel]
+       Rechannel --> Layer1[Layer 1]
+       Layer1 --> Layer2[Layer 2]
+       Layer2 --> Layer3[Layer 3]
+       Layer3 --> LayerN[Layer N]
+       Layer1 -->|Skip| HeadAccum[Head Accumulator]
+       Layer2 -->|Skip| HeadAccum
+       Layer3 -->|Skip| HeadAccum
+       LayerN -->|Skip| HeadAccum
+       HeadAccum --> HeadRechannel[Head Rechannel]
+       HeadRechannel --> HeadOut[Head Output]
+       LayerN --> LayerOut[Layer Output]
+
+WaveNet Processing
+------------------
+
+The complete WaveNet processing pipeline:
+
+Step 1: Condition Processing
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If a condition DSP is provided, the input is processed through it to generate the 
+conditioning signal:
+
+.. code-block:: cpp
+   :caption: Condition processing
+
+   void WaveNet::_process_condition(const int num_frames) {
+       if (this->_condition_dsp != nullptr) {
+           // Process input through condition DSP
+           this->_condition_dsp->process(/* input */, /* output */, num_frames);
+           // Copy output to condition buffer
+       } else {
+           // Use input directly as condition
+           this->_condition_output = this->_condition_input;
+       }
+   }
+
+The condition module can be a WaveNet, but it can also be something else--a convolution,
+an RNN, etc.
+
+Step 2: LayerArray Processing
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Each LayerArray processes the output of the previous array:
+
+1. **First LayerArray**: Processes the input with zeroed head inputs
+2. **Subsequent LayerArrays**: Process the previous array's output and accumulate head inputs
+
+.. code-block:: cpp
+   :caption: LayerArray processing
+
+   // First layer array
+   this->_layer_arrays[0].Process(input, condition, num_frames);
+   
+   // Subsequent layer arrays
+   for (size_t i = 1; i < this->_layer_arrays.size(); i++) {
+       Eigen::MatrixXf& prev_output = this->_layer_arrays[i-1].GetLayerOutputs();
+       Eigen::MatrixXf& prev_head = this->_layer_arrays[i-1].GetHeadOutputs();
+       this->_layer_arrays[i].Process(prev_output, condition, prev_head, num_frames);
+   }
+
+Step 3: Head Scaling and Output
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The final head output from the last LayerArray is scaled and written to output:
+
+.. code-block:: cpp
+   :caption: Head scaling and output
+
+   Eigen::MatrixXf& final_head = this->_layer_arrays.back().GetHeadOutputs();
+   // Apply head scale and write to output buffers
+   // (implementation details in wavenet.cpp)
+
+Complete WaveNet Flow
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. mermaid::
+   :caption: Complete WaveNet Processing Flow
+
+   graph TD
+       AudioIn[Audio Input] --> ConditionProc{Condition DSP?}
+       ConditionProc -->|Yes| CondDSP[Condition DSP]
+       ConditionProc -->|No| Condition[Condition Signal]
+       CondDSP --> Condition
+       AudioIn --> LayerArray1[LayerArray 1]
+       Condition --> LayerArray1
+       LayerArray1 -->|LayerN Output| LayerArray2[LayerArray 2]
+       LayerArray1 -->|Head Output| LayerArray2
+       Condition --> LayerArray2
+       LayerArray2 -->|LayerN Output| LayerArrayN[LayerArray N]
+       LayerArray2 -->|Head Output| LayerArrayN
+       Condition --> LayerArrayN
+       LayerArrayN -->|LayerN Output| Unused("(Unused)")
+       LayerArrayN -->|Head Output| HeadAccum[Head Accumulator]
+       HeadAccum --> HeadScale[Head Scale]
+       HeadScale --> AudioOut[Audio Output]
+
+See Also
+--------
+
+* :doc:`api/wavenet` - Complete API reference for WaveNet classes
+* :doc:`api/dsp` - Base DSP interface documentation
+* :doc:`api/conv1d` - Convolution implementation details
diff --git a/tools/benchmodel.cpp b/tools/benchmodel.cpp
index d8a1690..39c14b0 100644
--- a/tools/benchmodel.cpp
+++ b/tools/benchmodel.cpp
@@ -1,7 +1,9 @@
 #include <iostream>
 #include <chrono>
+#include <filesystem>
 
 #include "NAM/dsp.h"
+#include "NAM/get_dsp.h"
 
 using std::chrono::duration;
 using std::chrono::duration_cast;
@@ -27,7 +29,7 @@ int main(int argc, char* argv[])
     std::unique_ptr<nam::DSP> model;
 
     model.reset();
-    model = nam::get_dsp(modelPath);
+    model = nam::get_dsp(std::filesystem::path(modelPath));
 
     if (model == nullptr)
     {
diff --git a/tools/loadmodel.cpp b/tools/loadmodel.cpp
index 8a1b889..265139a 100644
--- a/tools/loadmodel.cpp
+++ b/tools/loadmodel.cpp
@@ -1,5 +1,7 @@
 #include <stdlib.h>
+#include <filesystem>
 #include "NAM/dsp.h"
+#include "NAM/get_dsp.h"
 
 int main(int argc, char* argv[])
 {
@@ -9,7 +11,7 @@ int main(int argc, char* argv[])
 
     fprintf(stderr, "Loading model [%s]\n", modelPath);
 
-    auto model = nam::get_dsp(modelPath);
+    auto model = nam::get_dsp(std::filesystem::path(modelPath));
 
     if (model != nullptr)
     {