diff --git a/brainsmith/_internal/io/dependency_installers.py b/brainsmith/_internal/io/dependency_installers.py
index 942eb59d..37fa410c 100644
--- a/brainsmith/_internal/io/dependency_installers.py
+++ b/brainsmith/_internal/io/dependency_installers.py
@@ -104,7 +104,7 @@ def install(self, name: str, dep: dict, dest: Path, force: bool, quiet: bool) ->
         cmd.extend([dep["url"], str(dest)])
 
         if not quiet:
-            logger.info("Cloning %s from %s", name, dep["url"])
+            logger.debug("Cloning %s from %s", name, dep['url'])
 
         result = subprocess.run(cmd, capture_output=True, text=True)
         if result.returncode != 0:
@@ -170,7 +170,7 @@ def install(self, name: str, dep: dict, dest: Path, force: bool, quiet: bool) ->
 
         try:
             if not quiet:
-                logger.info("Downloading %s from %s", name, dep["url"])
+                logger.debug("Downloading %s from %s", name, dep['url'])
 
             urlretrieve(dep["url"], zip_path)
 
@@ -254,7 +254,7 @@ def _install_finn_xsim(self, force: bool, quiet: bool) -> None:
 
         # Build with finn-xsim
         if not quiet:
-            logger.info("Building finn-xsim...")
+            logger.debug("Building finn-xsim...")
 
         # Construct build command
         build_cmd = ["python3", "-m", "finn.xsi.setup"]
@@ -265,16 +265,16 @@ def _install_finn_xsim(self, force: bool, quiet: bool) -> None:
         python_cmd = " ".join(build_cmd)
         bash_cmd = f"source {settings_script} && {python_cmd}"
 
-        logger.info("Running: %s", bash_cmd)
+        logger.debug("Running: %s", bash_cmd)
 
         # Execute build
         result = subprocess.run(["bash", "-c", bash_cmd], capture_output=True, text=True)
 
-        # Log output at INFO level (visible with --logs info)
+        # Log output at DEBUG level (visible with --logs debug)
         if result.stdout:
             for line in result.stdout.splitlines():
                 if line.strip():
-                    logger.info(line)
+                    logger.debug(line)
 
         if result.stderr:
             for line in result.stderr.splitlines():
@@ -322,7 +322,7 @@ def _install_generic_build(self, name: str, dep: dict, force: bool, quiet: bool)
             raise BuildError(error_msg)
 
         if not quiet:
-            logger.info("Building %s in %s", name, source_dir)
+            logger.debug("Building %s in %s", name, source_dir)
 
         # Run build command
         env = os.environ.copy()
@@ -334,7 +334,7 @@ def _install_generic_build(self, name: str, dep: dict, force: bool, quiet: bool)
         if result.stdout:
             for line in result.stdout.splitlines():
                 if line.strip():
-                    logger.info(line)
+                    logger.debug(line)
 
         if result.stderr:
             for line in result.stderr.splitlines():
diff --git a/brainsmith/_version.py b/brainsmith/_version.py
index d2e96efe..256e000e 100644
--- a/brainsmith/_version.py
+++ b/brainsmith/_version.py
@@ -2,5 +2,5 @@
 # Licensed under the MIT License.
 
 # Version information for brainsmith
-__version__ = "0.0.1a"
-__version_tuple__ = (0, 0, 1, "a")
+__version__ = "0.1.0"
+__version_tuple__ = (0, 1, 0)
diff --git a/brainsmith/dataflow/builder.py b/brainsmith/dataflow/builder.py
index db252a1e..56e83110 100644
--- a/brainsmith/dataflow/builder.py
+++ b/brainsmith/dataflow/builder.py
@@ -31,8 +31,10 @@
 from math import gcd
 from typing import TYPE_CHECKING, Any
 
+from onnx import NodeProto
 from qonnx.core.datatype import BaseDataType
 from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.util.basic import get_by_name
 
 from brainsmith._internal.math import divisors
 
@@ -57,20 +59,16 @@ class BuildContext:
     Attributes:
         schema: KernelSchema defining structure
         model_w: ModelWrapper for ONNX graph access
-        node_inputs: ONNX node input tensor names
-        node_outputs: ONNX node output tensor names
+        node: ONNX NodeProto (provides .input, .output, .name)
         param_getter: Function to retrieve nodeattr values
         param_setter: Function to store nodeattr values
-        node_name: Node name for error messages
     """
 
     schema: KernelSchema
     model_w: ModelWrapper
-    node_inputs: list[str]
-    node_outputs: list[str]
+    node: NodeProto
     param_getter: Callable[[str], Any]
     param_setter: Callable[[str, Any], None]
-    node_name: str = "<unknown>"
 
 
 class DesignSpaceBuilder:
@@ -85,11 +83,9 @@ class DesignSpaceBuilder:
         >>> context = BuildContext(
         ...     schema=kernel_schema,
         ...     model_w=model_wrapper,
-        ...     node_inputs=list(node.input),
-        ...     node_outputs=list(node.output),
+        ...     node=node,
         ...     param_getter=self.get_nodeattr,
         ...     param_setter=self.set_nodeattr,
-        ...     node_name=node.name
         ... )
         >>> design_space = builder.build(context)
         >>> point = design_space.configure({"SIMD": 64, "PE": 1})
@@ -195,12 +191,12 @@ def build(self, ctx: BuildContext) -> KernelDesignSpace:
         self._ctx = ctx
         self._interfaces: dict[str, Any] = {}
 
-        logger.debug(f"Building KernelDesignSpace for {ctx.node_name}")
+        logger.debug(f"Building KernelDesignSpace for {ctx.node.name}")
 
         # Build input interfaces from ONNX graph
         inputs: dict[str, InterfaceDesignSpace] = {}
 
-        for i, inp_name in enumerate(ctx.node_inputs):
+        for i, inp_name in enumerate(ctx.node.input):
             if not inp_name:
                 continue
 
@@ -248,7 +244,7 @@ def build(self, ctx: BuildContext) -> KernelDesignSpace:
         # Build output interfaces (may derive datatypes from inputs)
         outputs: dict[str, InterfaceDesignSpace] = {}
 
-        for i, out_name in enumerate(ctx.node_outputs):
+        for i, out_name in enumerate(ctx.node.output):
             if i >= len(ctx.schema.outputs):
                 logger.warning(
                     f"Node has output {i} but schema only defines {len(ctx.schema.outputs)} outputs"
@@ -294,7 +290,7 @@ def build(self, ctx: BuildContext) -> KernelDesignSpace:
                 if (e := c.check(validation_ctx))
             ]
             if failed:
-                raise ValueError(f"{ctx.node_name} validation failed:\n" + "\n".join(failed))
+                raise ValueError(f"{ctx.node.name} validation failed:\n" + "\n".join(failed))
 
             logger.debug(f"  All {len(structural_constraints)} structural constraints passed")
 
@@ -317,7 +313,7 @@ def build(self, ctx: BuildContext) -> KernelDesignSpace:
             parameters=all_dimensions,
         )
 
-        logger.debug(f"KernelDesignSpace built successfully for {ctx.node_name}")
+        logger.debug(f"KernelDesignSpace built successfully for {ctx.node.name}")
         return design_space
 
     def _resolve_datatype(
@@ -696,8 +692,42 @@ def _compute_dimension_ranges(
                 f"{ordered_count} ordered, {discrete_count} discrete"
             )
 
-        # Combine tiling + DSE dimensions
-        all_dimensions = {**tiling_dimensions, **dse_dimensions}
+        # Generate input<idx>MemType parameters from mem_modes
+        mem_mode_dimensions = {}
+        for idx, inp in enumerate(schema.inputs):
+            if inp.mem_modes is None:
+                continue
+
+            param_name = f"input{idx}MemType"
+
+            # Check if InferKernel marked this input as a weight
+            # Attribute presence indicates weight; absence indicates pure streaming input
+            attr = get_by_name(self._ctx.node.attribute, param_name)
+            if attr is None:
+                # Not a weight - skip parameter creation
+                logger.debug(f"Skipping {param_name}: not marked as weight by InferKernel")
+                continue
+
+            values = inp.mem_modes
+
+            # Support callable for context-aware filtering (e.g., MLO)
+            if callable(values):
+                values = values(self._ctx)
+
+            # Ensure frozenset for discrete parameter
+            if not isinstance(values, frozenset):
+                values = frozenset(values)
+
+            mem_mode_dimensions[param_name] = values
+
+        if mem_mode_dimensions:
+            logger.debug(
+                f"Added {len(mem_mode_dimensions)} mem_mode dimensions: "
+                + ", ".join(f"{k}={v}" for k, v in mem_mode_dimensions.items())
+            )
+
+        # Combine tiling + DSE + mem_mode dimensions
+        all_dimensions = {**tiling_dimensions, **dse_dimensions, **mem_mode_dimensions}
 
         return all_dimensions
 
diff --git a/brainsmith/dataflow/dse_models.py b/brainsmith/dataflow/dse_models.py
index 6445c9d0..c67d0f16 100644
--- a/brainsmith/dataflow/dse_models.py
+++ b/brainsmith/dataflow/dse_models.py
@@ -66,6 +66,7 @@ class InterfaceDesignSpace:
         datatype: Interface datatype
         is_weight: Whether this is a weight tensor (constant)
         tensor_name: ONNX tensor name for initializer lookups
+        mem_mode: Memory mode for weight inputs (embedded/decoupled/dynamic)
         parallelism_dimension: OrderedParameter for stream parameter (None if no parallelism)
         parallelism_param: Parameter name for stream dimension (e.g., "SIMD", "PE")
     """
@@ -88,16 +89,18 @@ class InterfaceDesignPoint:
     """Interface instance with resolved parallelization.
 
     Flyweight pattern: references parent design space, stores only configuration-
-    specific stream_shape. Delegates tensor_shape, block_shape, and datatype
-    to design space for minimal memory overhead.
+    specific stream_shape and mem_mode. Delegates tensor_shape, block_shape, and
+    datatype to design space for minimal memory overhead.
 
     Attributes:
         design_space: Parent InterfaceDesignSpace
         stream_shape: Resolved stream dimensions for this configuration
+        mem_mode: Memory mode for weight inputs (embedded/decoupled/dynamic)
     """
 
     design_space: InterfaceDesignSpace
     stream_shape: Shape
+    mem_mode: str | None = None  # Memory mode (embedded/decoupled/dynamic) for weight inputs
 
     # Convenience properties (delegate to design space)
     @property
@@ -399,7 +402,7 @@ def _instantiate_interfaces(
         from .template_resolution import resolve_template
 
         configured = {}
-        for interface in interfaces.values():
+        for idx, interface in enumerate(interfaces.values()):
             stream_shape = (
                 interface.block_shape
                 if interface.stream_tiling is None
@@ -413,8 +416,12 @@ def _instantiate_interfaces(
                 )
             )
 
+            # Extract mem_mode from params if this is an input with mem_modes
+            mem_mode_param = f"input{idx}MemType"
+            mem_mode = params.get(mem_mode_param)
+
             configured_interface = InterfaceDesignPoint(
-                design_space=interface, stream_shape=stream_shape
+                design_space=interface, stream_shape=stream_shape, mem_mode=mem_mode
             )
             configured[interface.name] = configured_interface
             interface_lookup[interface.name] = configured_interface
diff --git a/brainsmith/dataflow/kernel_op.py b/brainsmith/dataflow/kernel_op.py
index d3c7f1fd..430285ee 100644
--- a/brainsmith/dataflow/kernel_op.py
+++ b/brainsmith/dataflow/kernel_op.py
@@ -300,11 +300,9 @@ def _ensure_ready(self, model_w: ModelWrapper) -> None:
             build_ctx = BuildContext(
                 schema=self.kernel_schema,
                 model_w=model_w,
-                node_inputs=list(self.onnx_node.input),
-                node_outputs=list(self.onnx_node.output),
+                node=self.onnx_node,
                 param_getter=self.get_nodeattr,
                 param_setter=self.set_nodeattr,
-                node_name=self.onnx_node.name,
             )
 
             try:
@@ -324,6 +322,12 @@ def _ensure_ready(self, model_w: ModelWrapper) -> None:
                         # OrderedParameter: use get_default() (explicit default or minimum)
                         initial_value = param.get_default()
                     else:  # frozenset
+                        # Defensive: skip empty parameter sets (shouldn't happen with new design)
+                        if len(param) == 0:
+                            logger.debug(
+                                f"{self.onnx_node.name}: Skipping empty parameter {param_name}"
+                            )
+                            continue
                         # Discrete: use sorted first value
                         initial_value = sorted(param)[0]
 
diff --git a/brainsmith/dataflow/schemas.py b/brainsmith/dataflow/schemas.py
index cc44779f..8977f436 100644
--- a/brainsmith/dataflow/schemas.py
+++ b/brainsmith/dataflow/schemas.py
@@ -255,6 +255,9 @@ class InputSchema:
         stream_tiling: Stream tiling specification (e.g., ["SIMD"], [1, 1, 1, "PE"])
         datatype: Datatype spec (None to use from ONNX, or DatatypeSpec union type to derive/optimize)
         required_layout: Expected input layout (e.g., "NHWC", "NCHW"), None if no requirement
+        mem_modes: Memory mode options for weight inputs (frozenset or callable returning frozenset).
+                   Valid modes: "embedded" (compile-time constant), "decoupled" (separate memory),
+                   "dynamic"/"external" (streaming). Generates input<idx>MemType DSE parameter.
     """
 
     # Identity
@@ -268,6 +271,9 @@ class InputSchema:
     # Transformation requirements (NEW - embedded in interface)
     required_layout: str | None = None
 
+    # Memory mode specification for weight inputs
+    mem_modes: frozenset[str] | Callable | None = None
+
     def __post_init__(self):
         """Validate interface requirements."""
         if self.required_layout and self.required_layout not in {"NCHW", "NHWC"}:
@@ -276,6 +282,21 @@ def __post_init__(self):
                 f"Must be 'NCHW' or 'NHWC'."
             )
 
+        # Validate mem_modes if specified
+        if self.mem_modes is not None and not callable(self.mem_modes):
+            VALID_MEM_MODES = {"embedded", "decoupled", "dynamic", "external"}
+            if not isinstance(self.mem_modes, frozenset):
+                raise TypeError(
+                    f"mem_modes for input '{self.name}' must be frozenset or callable, "
+                    f"got {type(self.mem_modes).__name__}"
+                )
+            invalid = self.mem_modes - VALID_MEM_MODES
+            if invalid:
+                raise ValueError(
+                    f"Invalid mem_modes {invalid} for input '{self.name}'. "
+                    f"Valid modes: {VALID_MEM_MODES}"
+                )
+
     @property
     def tiling_attrs(self) -> list[str]:
         """Extract unique template parameter names from tiling specs."""
@@ -461,6 +482,12 @@ def build_nodeattr_registry(self) -> dict[str, tuple]:
         for param in template_params:
             attrs[param] = ("i", False, 1)  # Default 1, will be computed from factoring
 
+        # Memory mode parameters (input<idx>MemType) - auto-extracted from mem_modes
+        for idx, inp in enumerate(self.inputs):
+            if inp.mem_modes is not None:
+                # Add input<idx>MemType as a string parameter
+                attrs[f"input{idx}MemType"] = ("s", False, "embedded")
+
         # DSE parameters (resource parameters)
         for param_name, param_spec in self.dse_parameters.items():
             attrs[param_name] = _infer_nodeattr_type(param_spec)
diff --git a/brainsmith/dataflow/spec_helpers.py b/brainsmith/dataflow/spec_helpers.py
index b34bdc61..f509ca16 100644
--- a/brainsmith/dataflow/spec_helpers.py
+++ b/brainsmith/dataflow/spec_helpers.py
@@ -510,3 +510,71 @@ def max_datatype(
 ) -> Callable[[dict, Callable, Any, str], "BaseDataType"]:
     """Compute max() output datatype (context-aware)."""
     return _binary_op_datatype(a_interface, b_interface, compute_max_range)
+
+
+def threshold_datatype(input_interface: str) -> Callable[[dict, Callable, Any, str], "BaseDataType"]:
+    """Compute threshold datatype with ceil() rounding accommodation (FINN-compatible).
+
+    Implements FINN's RoundAndClipThresholds algorithm:
+    1. Thresholds undergo ceil() rounding: ceil(127.1) = 128
+    2. Clipped to [input.min, input.max + 1]
+    3. Therefore threshold dtype must accommodate input.max + 1
+
+    Mathematical rationale:
+    - Thresholds divide input domain into quantization bins
+    - After ceil(), highest threshold can reach input.max + 1
+    - This value represents the supremum (least upper bound) of input domain
+    - For INT8 input (-128 to 127), thresholds need INT9 to hold 128
+
+    Args:
+        input_interface: Name of input interface (e.g., "input")
+
+    Returns:
+        Callable that resolves threshold datatype from input dtype
+
+    Example:
+        # In Thresholding schema
+        df.InputSchema(
+            name="thresholds",
+            datatype=threshold_datatype("input"),  # Accommodates ceil() rounding
+        )
+
+        # Result: INT8 input → INT9 thresholds (to hold value 128)
+    """
+
+    def resolver(
+        interfaces: dict[str, Any],
+        param_getter: Callable,
+        model: Any,  # ModelWrapper
+        tensor_name: str,
+    ) -> "BaseDataType":
+        """Resolve threshold datatype with ceil() accommodation."""
+        from qonnx.core.datatype import DataType
+
+        if input_interface not in interfaces:
+            available = list(interfaces.keys())
+            raise ValueError(
+                f"Interface '{input_interface}' not found for threshold datatype. "
+                f"Available: {', '.join(available)}"
+            )
+
+        input_dt = interfaces[input_interface].datatype
+
+        # Skip optimization for float inputs (keep as-is)
+        if not input_dt.is_integer():
+            return model.get_tensor_datatype(tensor_name) if model else input_dt
+
+        # Accommodate ceil() rounding: thresholds can reach input.max + 1
+        # This is the supremum of the input domain
+        max_val = input_dt.max() + 1
+
+        # Find smallest dtype that can hold the ceil-rounded range
+        if input_dt.signed():
+            # For signed inputs: range is [input.min, input.max+1]
+            # Use get_smallest_possible with negative bound for signed dtype selection
+            return smallest_datatype_for_range(-max_val - 1, max_val)
+        else:
+            # For unsigned inputs: range is [0, input.max+1]
+            return smallest_datatype_for_range(0, max_val)
+
+    return resolver
diff --git a/brainsmith/dse/_parser/kernels.py b/brainsmith/dse/_parser/kernels.py
index 191f5790..e35ea6f0 100644
--- a/brainsmith/dse/_parser/kernels.py
+++ b/brainsmith/dse/_parser/kernels.py
@@ -30,7 +30,7 @@ def parse_kernels(kernels_data: list[str | dict]) -> list[tuple[str, list[type]]
         List of (kernel_name, backend_classes) tuples where backend_classes
         are in priority order (first backend is tried first during specialization).
         Kernels with no backends will have an empty backend_classes list and will
-        not be specialized during build_hw_graph (a warning is logged).
+        not be specialized during specialize_kernel_backends (a warning is logged).
 
     Raises:
         ValueError: If kernel spec format is invalid
@@ -86,7 +86,7 @@ def parse_kernels(kernels_data: list[str | dict]) -> list[tuple[str, list[type]]
         if not backend_classes:
             logger.debug(
                 f"Kernel '{kernel_name}' has no registered backends. "
-                f"This kernel will not be specialized during build_hw_graph."
+                f"This kernel will not be specialized during specialize_kernel_backends."
             )
 
         kernel_backends.append((kernel_name, backend_classes))
diff --git a/brainsmith/kernels/addstreams/addstreams.py b/brainsmith/kernels/addstreams/addstreams.py
index 9fd5a56d..7b47cac3 100644
--- a/brainsmith/kernels/addstreams/addstreams.py
+++ b/brainsmith/kernels/addstreams/addstreams.py
@@ -88,7 +88,7 @@ def can_infer_from(cls, node: NodeProto, model: ModelWrapper) -> bool:
 
     @classmethod
     def infer_from(
-        cls, node: NodeProto, model: ModelWrapper, insert_index: int
+        cls, node: NodeProto, model: ModelWrapper, insert_index: int, kernel_index: int = None
     ) -> df.TransformationResult:
         """Create AddStreams HW node from ONNX Add node.
 
@@ -98,18 +98,20 @@ def infer_from(
             node: ONNX Add node to convert
             model: ModelWrapper for graph access
             insert_index: Where to insert new nodes (unused - no layout conversion)
+            kernel_index: Sequential index for this kernel type (for naming)
 
         Returns:
             TransformationResult with AddStreams node and removed Add node
         """
-        # Create AddStreams HW node
+        # Create AddStreams HW node with sequential naming
+        node_name = f"AddStreams_{kernel_index}" if kernel_index is not None else f"AddStreams_{node.name}"
         hw_node = helper.make_node(
             "AddStreams",
             inputs=list(node.input),
             outputs=list(node.output),
             domain="brainsmith.kernels",
             backend="fpgadataflow",
-            name=f"AddStreams_{node.name}",
+            name=node_name,
         )
 
         return df.TransformationResult(
diff --git a/brainsmith/kernels/channelwise/channelwise.py b/brainsmith/kernels/channelwise/channelwise.py
index de1ab204..14323209 100644
--- a/brainsmith/kernels/channelwise/channelwise.py
+++ b/brainsmith/kernels/channelwise/channelwise.py
@@ -72,6 +72,7 @@ def resolver(interfaces, param_getter, model, tensor_name):
             block_tiling=[],  # No tiling (static data)
             stream_tiling=[],  # Not streamed
             datatype=VALUE_OPTIMIZED,  # Optimize from actual values
+            mem_modes=frozenset({"embedded"}),  # Only embedded mode (FINN parity)
         ),
     ],
     outputs=[
@@ -153,12 +154,18 @@ def can_infer_from(cls, node: NodeProto, model: ModelWrapper) -> bool:
 
     @classmethod
     def infer_from(
-        cls, node: NodeProto, model: ModelWrapper, insert_index: int
+        cls, node: NodeProto, model: ModelWrapper, insert_index: int, kernel_index: int = None
     ) -> TransformationResult:
         """Infer ChannelwiseOp from ONNX Add/Mul/LessOrEqual/GreaterOrEqual.
 
         Uses helper functions to detect and reorder inputs to canonical
         (dynamic, static) order before creating HW node.
+
+        Args:
+            node: ONNX node to convert
+            model: ModelWrapper for graph access
+            insert_index: Where to insert new nodes
+            kernel_index: Sequential index for this kernel type (for naming)
         """
         # Detect and reorder inputs to (dynamic, static)
         pair = find_static_dynamic_pair(node.input, model)
@@ -177,12 +184,13 @@ def infer_from(
             # Expand scalar to per-channel
             param_input = expand_scalar_to_channels(static_input, num_channels, model)
 
-        # Create ChannelwiseOp node in canonical order
+        # Create ChannelwiseOp node in canonical order with sequential naming
+        node_name = f"ChannelwiseOp_{kernel_index}" if kernel_index is not None else node.name
         hw_node = helper.make_node(
             "ChannelwiseOp",
             inputs=[dynamic_input, param_input],  # Canonical order
             outputs=node.output,
-            name=node.name,
+            name=node_name,
             domain="brainsmith.kernels",
             backend="fpgadataflow",
             # Kernel parameters (use ONNX op type directly)
@@ -284,3 +292,21 @@ def execute_node(self, context, graph):
             raise ValueError(f"Unknown func '{func}'")
 
         context[node.output[0]] = result.astype(np.float32)
+
+    # ================================================================
+    # MLO Loop Body Adaptation
+    # ================================================================
+
+    def adapt_for_loop_body(self, loop_signature):
+        """Adapt ChannelwiseOp for use in FINNLoop body.
+
+        ChannelwiseOp doesn't require adaptation - parameters remain static
+        even in MLO context. Unlike ElementwiseBinaryOp, there's no pattern
+        switching needed.
+
+        Args:
+            loop_signature: Loop signature describing streaming parameters (unused)
+        """
+        # No-op for ChannelwiseOp - parameters are always static
+        # This method exists for interface compatibility with FINNLoop
+        pass
diff --git a/brainsmith/kernels/channelwise/channelwise_hls.py b/brainsmith/kernels/channelwise/channelwise_hls.py
index 80fe6884..6992d983 100644
--- a/brainsmith/kernels/channelwise/channelwise_hls.py
+++ b/brainsmith/kernels/channelwise/channelwise_hls.py
@@ -31,6 +31,7 @@
 
 from math import ceil
 
+import numpy as np
 from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend
 from finn.util.data_packing import numpy_to_hls_code
 from qonnx.core.datatype import DataType
@@ -57,6 +58,8 @@ def get_nodeattr_types(self):
         my_attrs.update(HLSBackend.get_nodeattr_types(self))
         return my_attrs
 
+    # Removed _get_mem_mode() and get_instream_width() - embedded mode only, use base class
+
     # ================================================================
     # Resource Estimation (Uses design_point)
     # ================================================================
@@ -107,15 +110,31 @@ def get_template_param_values(self):
         return ret
 
     def generate_params(self, model, path):
-        """Generate params.h with parameter tensor."""
+        """Generate parameter header file (embedded mode only)."""
         code_gen_dir = path
-
-        # Get parameters and format for HLS
         parameters = model.get_initializer(self.onnx_node.input[1])
-        parameter_tensor = self.get_hls_compatible_parameter_tensor(parameters)
+        # Embedded mode: generate params.h header file
+        weight_filename = f"{code_gen_dir}/params.h"
+        self.make_weight_file(parameters, "hls_header", weight_filename)
+
+    def make_weight_file(self, weights, weight_file_mode, weight_file_name):
+        """Produce file containing parameters in HLS header format.
+
+        Args:
+            weights: numpy array with parameters
+            weight_file_mode: must be "hls_header" (embedded mode only)
+            weight_file_name: filename for weight file
+        """
+        if weight_file_mode != "hls_header":
+            raise Exception(f"Only hls_header mode supported (got {weight_file_mode})")
+
+        parameter_tensor = self.get_hls_compatible_parameter_tensor(weights)
         pdt = DataType[self.get_input_datatype(1).name]
 
-        parameters_hls_code = numpy_to_hls_code(parameter_tensor, pdt, "parameters", False, True)
+        # Generate params.h with ChannelWiseOperation initializer
+        parameters_hls_code = numpy_to_hls_code(
+            parameter_tensor, pdt, "parameters", False, True
+        )
 
         # Get datatypes
         idt = self.get_input_datatype(0)
@@ -151,7 +170,7 @@ def generate_params(self, model, path):
         tmem = self.calc_tmem()
 
         # Write params.h
-        with open(f"{code_gen_dir}/params.h", "w") as f:
+        with open(weight_file_name, "w") as f:
             f.write(
                 f"static ChannelWiseOperation<{tmem},{pe},{idt_hls},"
                 f"{pdt_hls},{odt_hls},{func_str}> threshs = "
@@ -162,8 +181,7 @@ def generate_params(self, model, path):
     # No overrides needed - FINN's implementation works correctly!
 
     def global_includes(self):
-        self.code_gen_dict["$GLOBALS$"] = ['#include "activations.hpp"']
-        self.code_gen_dict["$GLOBALS$"] += ['#include "params.h"']
+        self.code_gen_dict["$GLOBALS$"] = ['#include "activations.hpp"', '#include "params.h"']
 
     def defines(self, var):
         # Use design_point for semantic shape (not nodeattrs)
@@ -181,6 +199,7 @@ def defines(self, var):
         ]
 
     def read_npy_data(self):
+        """Read NPY data for input stream (embedded mode only)."""
         code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim")
         dtype = self.get_input_datatype(0)
         elem_bits = dtype.bitwidth()
@@ -190,13 +209,20 @@ def read_npy_data(self):
         npy_type = "float"
         npy_in = f"{code_gen_dir}/input_0.npy"
 
-        self.code_gen_dict["$READNPYDATA$"] = []
-        self.code_gen_dict["$READNPYDATA$"].append(
+        self.code_gen_dict["$READNPYDATA$"] = [
             f"npy2apintstream<{packed_hls_type}, {elem_hls_type}, {elem_bits}, "
             f'{npy_type}>("{npy_in}", in0_V, false);'
-        )
+        ]
+
+    def strm_decl(self):
+        """Generate stream declarations (embedded mode only)."""
+        self.code_gen_dict["$STREAMDECLARATIONS$"] = [
+            f'hls::stream<ap_uint<{self.get_instream_width(0)}>> in0_V ("in0_V");',
+            f'hls::stream<ap_uint<{self.get_outstream_width()}>> out0_V ("out0_V");',
+        ]
 
     def docompute(self):
+        """Generate compute code (embedded mode only, matches FINN implementation)."""
         tmpl_args = self.get_template_param_values()
 
         # Spatial dim from semantic NHWC shape (design_point)
@@ -208,8 +234,9 @@ def docompute(self):
         elif len(block_shape) == 2:  # [N, C] - fully connected
             spatial_dim = 1
         else:
-            raise Exception(f"Unexpected block shape {block_shape}")
+            raise Exception(f"Unexpected block_shape {block_shape}")
 
+        # Embedded mode: parameters from params.h (FINN parity)
         self.code_gen_dict["$DOCOMPUTE$"] = [
             f"Thresholding_Batch<{spatial_dim}, NumChannels1, PE1, "
             f"{tmpl_args['TSrcI']}, {tmpl_args['TDstI']}>"
@@ -238,22 +265,22 @@ def dataoutstrm(self):
         ]
 
     def blackboxfunction(self):
+        """Generate blackbox function signature (embedded mode only)."""
         self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
-            f"void {self.onnx_node.name}(hls::stream<ap_uint<{self.get_instream_width()}>> &in0_V, "
+            f"void {self.onnx_node.name}(hls::stream<ap_uint<{self.get_instream_width(0)}>> &in0_V, "
             f"hls::stream<ap_uint<{self.get_outstream_width()}>> &out0_V)"
         ]
 
     def pragmas(self):
-        self.code_gen_dict["$PRAGMAS$"] = ["#pragma HLS INTERFACE axis port=in0_V"]
-        self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out0_V")
-        self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE ap_ctrl_none port=return")
-
-        # Partition parameter array
-        self.code_gen_dict["$PRAGMAS$"].append(
-            "#pragma HLS ARRAY_PARTITION variable=threshs.parameters complete dim=1"
-        )
+        """Generate HLS pragmas (embedded mode only)."""
+        self.code_gen_dict["$PRAGMAS$"] = [
+            "#pragma HLS INTERFACE axis port=in0_V",
+            "#pragma HLS INTERFACE axis port=out0_V",
+            "#pragma HLS INTERFACE ap_ctrl_none port=return",
+            "#pragma HLS ARRAY_PARTITION variable=threshs.parameters complete dim=1",
+        ]
 
-        # Set resource type
+        # Set resource type for parameter storage
         ram_style = self.get_nodeattr("ram_style")
         input_iface = self.design_point.inputs["input"]
         pe = input_iface.stream_shape[-1]  # PE from stream tiling
diff --git a/brainsmith/kernels/crop/crop.py b/brainsmith/kernels/crop/crop.py
index f1787af7..f3a0ae7b 100644
--- a/brainsmith/kernels/crop/crop.py
+++ b/brainsmith/kernels/crop/crop.py
@@ -326,7 +326,7 @@ def can_infer_from(cls, node: NodeProto, model: ModelWrapper) -> bool:
 
     @classmethod
     def infer_from(
-        cls, node: NodeProto, model: ModelWrapper, insert_index: int
+        cls, node: NodeProto, model: ModelWrapper, insert_index: int, kernel_index: int = None
     ) -> df.TransformationResult:
         """Create Crop HW node from ONNX Gather node.
 
@@ -340,6 +340,7 @@ def infer_from(
             node: ONNX Gather node to convert
             model: ModelWrapper for graph access
             insert_index: Where to insert new nodes (unused - no layout conversion)
+            kernel_index: Sequential index for this kernel type (for naming)
 
         Returns:
             TransformationResult with Crop node and removed Gather node
@@ -405,14 +406,15 @@ def infer_from(
             # Should not reach here due to earlier validation
             raise ValueError(f"Unsupported axis {axis}")
 
-        # Create HW node with crop parameters
+        # Create HW node with crop parameters and sequential naming
+        node_name = f"Crop_{kernel_index}" if kernel_index is not None else f"Crop_{node.name}"
         hw_node = helper.make_node(
             "Crop",
             inputs=list(node.input[:1]),  # Only first input (data, not indices)
             outputs=list(node.output),
             domain="brainsmith.kernels",
             backend="fpgadataflow",
-            name=f"Crop_{node.name}",
+            name=node_name,
             crop_north=int(crop_north),
             crop_south=int(crop_south),
             crop_east=int(crop_east),
diff --git a/brainsmith/kernels/elementwise_binary/elementwise_binary.py b/brainsmith/kernels/elementwise_binary/elementwise_binary.py
index fa70bca9..73f60ebb 100644
--- a/brainsmith/kernels/elementwise_binary/elementwise_binary.py
+++ b/brainsmith/kernels/elementwise_binary/elementwise_binary.py
@@ -181,48 +181,29 @@ def resolver(interfaces, param_getter, model, tensor_name):
 
 
 def _validate_input_pattern(ctx):
-    """Validate input pattern and broadcasting compatibility.
+    """Validate input pattern based on weight status.
 
-    Supports two patterns:
-    - "dynamic_static": LHS dynamic, RHS static (Phase 1)
-    - "dynamic_dynamic": Both dynamic with broadcasting (Phase 2)
+    Validation rules (derived from mem_mode behavior):
+    - If RHS is weight: valid (dynamic_static or MLO dynamic_dynamic)
+    - If RHS is not weight: LHS also can't be weight (both streaming)
 
     Returns:
         None if valid, error message string if invalid
     """
-    input_pattern = ctx.param_getter("input_pattern")
+    if "lhs" not in ctx.inputs or "rhs" not in ctx.inputs:
+        return "Missing required inputs 'lhs' or 'rhs'"
 
-    # Validate pattern-specific constraints
-    if input_pattern == "dynamic_static":
-        # Phase 1: LHS dynamic, RHS static
-        if "lhs" not in ctx.inputs or "rhs" not in ctx.inputs:
-            return "Missing required inputs 'lhs' or 'rhs'"
+    lhs = ctx.inputs["lhs"]
+    rhs = ctx.inputs["rhs"]
 
-        lhs = ctx.inputs["lhs"]
-        rhs = ctx.inputs["rhs"]
+    # Simple validation: if RHS is not a weight, LHS can't be either
+    if not rhs.is_weight and lhs.is_weight:
+        return "LHS cannot be a weight when RHS is not a weight (invalid pattern)"
 
-        # RHS must be static (weight)
-        if not rhs.is_weight:
-            return "RHS must be static (initializer) for dynamic_static pattern"
-
-    elif input_pattern == "dynamic_dynamic":
-        # Phase 2: Both dynamic, must be broadcastable
-        if "lhs" not in ctx.inputs or "rhs" not in ctx.inputs:
-            return "Missing required inputs 'lhs' or 'rhs'"
-
-        lhs = ctx.inputs["lhs"]
-        rhs = ctx.inputs["rhs"]
-
-        # Both must be dynamic (not weights)
-        if lhs.is_weight or rhs.is_weight:
-            return "Both inputs must be dynamic (not initializers) for dynamic_dynamic pattern"
-
-        # Shapes must be broadcastable (checked at design space build time)
-        # Note: BroadcastInfo.compute() will be called during HLS code generation
-        # to get detailed broadcasting metadata
-
-    else:
-        return f"Unknown input_pattern '{input_pattern}'. Expected 'dynamic_static' or 'dynamic_dynamic'"
+    # All valid cases:
+    # 1. RHS is weight, LHS is not → dynamic_static
+    # 2. RHS is weight (MLO context) → dynamic_dynamic
+    # 3. Neither is weight → dynamic_dynamic (both activations)
 
     return None
 
@@ -240,11 +221,13 @@ def _validate_input_pattern(ctx):
             name="rhs",
             # Note: Tiling is minimal for backward compatibility with Phase 1 (static)
             # For Phase 2 dynamic+dynamic, HLS backend will create streaming interface
-            # based on input_pattern parameter
+            # based on derived pattern from mem_mode
             block_tiling=[FULL_DIM],  # Full tensor (needed for shape inference)
             stream_tiling=["PE"],  # PE parallelism (used only if dynamic)
             datatype=VALUE_OPTIMIZED,  # Optimize from actual values
             required_layout=None,
+            # Memory modes for RHS - static capabilities (what CAN it be if weight)
+            mem_modes=frozenset({"embedded", "decoupled", "dynamic"}),  # All possible modes
         ),
     ],
     outputs=[
@@ -260,8 +243,6 @@ def _validate_input_pattern(ctx):
     kernel_params={
         # Operation type: matches ONNX op_type (from operations registry)
         "func": ("s", True, "Add", BinaryOperations.all_operation_names()),
-        # Input pattern: determines which inputs are streaming
-        "input_pattern": ("s", True, "dynamic_static", {"dynamic_static", "dynamic_dynamic"}),
         # Direction for BitShift operations (optional, only used when func="BitShift")
         "direction": (
             "s",
@@ -269,6 +250,10 @@ def _validate_input_pattern(ctx):
             "",  # Optional parameter
             {"LEFT", "RIGHT", ""},
         ),
+        # NOTE: input_pattern removed - now derived from rhs.mem_mode (single source of truth)
+        # Pattern derivation:
+        #   - mem_mode in ("embedded", "decoupled"): dynamic_static
+        #   - mem_mode == "dynamic" or None: dynamic_dynamic
     },
     # DSE PARAMETERS (explorable resource parameters)
     dse_parameters={
@@ -276,12 +261,7 @@ def _validate_input_pattern(ctx):
         "ram_style": df.ParameterSpec(
             name="ram_style", values={"auto", "distributed", "block", "ultra"}, default="auto"
         ),
-        # Memory mode for constant parameters
-        "mem_mode": df.ParameterSpec(
-            name="mem_mode",
-            values={"internal_embedded", "internal_decoupled"},
-            default="internal_embedded",
-        ),
+        # NOTE: mem_mode moved to interface-level (rhs.mem_modes) → generates input1MemType
     },
     constraints=[
         # Pattern-specific validation (dynamic vs static, broadcasting)
@@ -340,6 +320,37 @@ def __init__(self, onnx_node, **kwargs):
     def build_schema(cls, node: NodeProto, model: ModelWrapper | None) -> df.KernelSchema:
         return ELEMENTWISE_BINARY_SCHEMA
 
+    # ================================================================
+    # Derived Properties (Single Source of Truth)
+    # ================================================================
+
+    @property
+    def input_pattern(self) -> str | None:
+        """Derive input pattern from RHS memory mode (single source of truth).
+
+        Pattern derivation:
+        - mem_mode=None: dynamic_dynamic (both activations streaming)
+        - mem_mode="embedded"/"decoupled": dynamic_static (static weight)
+        - mem_mode="dynamic": dynamic_dynamic (weight from loop/MLO)
+
+        Returns:
+            "dynamic_static" or "dynamic_dynamic", or None if not yet configured
+        """
+        if not hasattr(self, 'design_point') or self.design_point is None:
+            return None  # Not yet configured
+
+        rhs_iface = self.design_point.inputs.get("rhs")
+        if rhs_iface is None:
+            return None
+
+        # Derive from mem_mode
+        if rhs_iface.mem_mode in ("embedded", "decoupled"):
+            return "dynamic_static"  # Static weight
+        elif rhs_iface.mem_mode == "dynamic" or rhs_iface.mem_mode is None:
+            return "dynamic_dynamic"  # Streaming (from loop or both activations)
+        else:
+            raise ValueError(f"Unknown mem_mode: {rhs_iface.mem_mode}")
+
     # ================================================================
     # ONNX → KernelOp Inference (Unified System)
     # ================================================================
@@ -397,7 +408,7 @@ def can_infer_from(cls, node: NodeProto, model: ModelWrapper) -> bool:
 
     @classmethod
     def infer_from(
-        cls, node: NodeProto, model: ModelWrapper, insert_index: int
+        cls, node: NodeProto, model: ModelWrapper, insert_index: int, kernel_index: int = None
     ) -> TransformationResult:
         """Infer ElementwiseBinaryOp from ONNX binary operation.
 
@@ -409,6 +420,7 @@ def infer_from(
             node: ONNX node to transform
             model: ModelWrapper for accessing graph info
             insert_index: Index where to insert new node
+            kernel_index: Sequential index for this kernel type (for naming)
 
         Returns:
             TransformationResult with new HW node
@@ -432,14 +444,14 @@ def infer_from(
         static_dynamic_pair = find_static_dynamic_pair(node.input, model)
         if static_dynamic_pair is not None:
             lhs_input, rhs_input = static_dynamic_pair  # (dynamic, static)
-            input_pattern = "dynamic_static"
+            # Pattern will be derived from mem_mode: "dynamic_static"
 
         else:
             # Try dynamic+dynamic pattern (Phase 2)
             dynamic_dynamic_pair = find_dynamic_dynamic_pair(node.input, model)
             if dynamic_dynamic_pair is not None:
                 lhs_input, rhs_input = dynamic_dynamic_pair  # Both dynamic
-                input_pattern = "dynamic_dynamic"
+                # Pattern will be derived from mem_mode: "dynamic_dynamic"
 
                 # Log broadcasting information for debugging
                 broadcast_info = get_broadcast_info(lhs_input, rhs_input, model)
@@ -455,17 +467,19 @@ def infer_from(
                     f"Expected either (dynamic, static) or (dynamic, dynamic) inputs."
                 )
 
-        # Create ElementwiseBinaryOp node with detected pattern
+        # Create ElementwiseBinaryOp node with sequential naming
+        # NOTE: input_pattern removed - now derived from rhs.mem_mode during design space building
+        node_name = f"ElementwiseBinaryOp_{kernel_index}" if kernel_index is not None else node.name
         hw_node = helper.make_node(
             "ElementwiseBinaryOp",
             inputs=[lhs_input, rhs_input],
             outputs=node.output,
-            name=node.name,
+            name=node_name,
             domain="brainsmith.kernels",
             backend="fpgadataflow",
             # Kernel parameters
             func=node.op_type,
-            input_pattern=input_pattern,  # NEW: Track which pattern is active
+            # input_pattern removed - derived from mem_mode
         )
 
         # Copy direction attribute for BitShift operations
@@ -484,6 +498,20 @@ def infer_from(
                     f"BitShift node {node.name} missing required 'direction' attribute"
                 )
 
+        # Mark RHS as weight if it's an initializer (for mem_mode parameter creation)
+        # Attribute presence indicates weight; absence indicates pure streaming input
+        rhs_input = hw_node.input[1]
+        if model.get_initializer(rhs_input) is not None:
+            hw_node.attribute.append(helper.make_attribute("input1MemType", "embedded"))
+
+        # Copy metadata_props (e.g., PyTorch name scopes for loop rolling)
+        # metadata_props is a protobuf RepeatedCompositeFieldContainer
+        if hasattr(node, 'metadata_props') and len(node.metadata_props) > 0:
+            for entry in node.metadata_props:
+                new_entry = hw_node.metadata_props.add()
+                new_entry.key = entry.key
+                new_entry.value = entry.value
+
         # Return transformation result
         return TransformationResult(
             nodes_to_remove=[node],
@@ -637,10 +665,11 @@ def _validate_broadcast_compatibility(self):
         Raises:
             ValueError: If shapes not broadcastable, with examples
         """
-        input_pattern = self.get_nodeattr("input_pattern")
+        # Use property to derive pattern from mem_mode
+        input_pattern = self.input_pattern
 
         if input_pattern != "dynamic_dynamic":
-            # Only validate for dynamic_dynamic pattern
+            # Only validate for dynamic_dynamic pattern (both inputs streaming)
             return
 
         if not hasattr(self, "design_point") or self.design_point is None:
@@ -791,6 +820,36 @@ def execute_node(self, context, graph):
         # Store result (as float32 container, QONNX convention)
         context[node.output[0]] = out.astype(np.float32)
 
+    # ================================================================
+    # MLO Loop Body Adaptation
+    # ================================================================
+
+    def adapt_for_loop_body(self, loop_signature):
+        """Adapt ElementwiseBinaryOp for use in FINNLoop body.
+
+        Forces RHS memory mode to "dynamic" when weights are streamed from loop level.
+        Only modifies the attribute if:
+        1. RHS is marked as weight (attribute exists from InferKernel)
+        2. Loop signature indicates input is PARAMETER (streamed per iteration)
+
+        Args:
+            loop_signature: List of LoopBodyInputType values for each input
+        """
+        from qonnx.util.basic import get_by_name
+
+        # Check if RHS is marked as weight
+        attr = get_by_name(self.onnx_node.attribute, "input1MemType")
+        if attr is None:
+            return  # Not a weight, nothing to adapt
+
+        # Check if loop signature indicates this input is streamed as parameter
+        if loop_signature and len(loop_signature) > 1:
+            from finn.transformation.fpgadataflow.loop_rolling import LoopBodyInputType
+
+            if loop_signature[1] == LoopBodyInputType.PARAMETER:
+                self.set_nodeattr("input1MemType", "dynamic")
+                logger.debug(f"{self.onnx_node.name}: Forced input1MemType=dynamic for MLO")
+
     # ================================================================
     # ONNX Shape Compatibility
     # ================================================================
diff --git a/brainsmith/kernels/elementwise_binary/elementwise_binary_hls.py b/brainsmith/kernels/elementwise_binary/elementwise_binary_hls.py
index dcf078e7..07c543c1 100644
--- a/brainsmith/kernels/elementwise_binary/elementwise_binary_hls.py
+++ b/brainsmith/kernels/elementwise_binary/elementwise_binary_hls.py
@@ -27,8 +27,12 @@
 
 import numpy as np
 from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend
-from finn.util.data_packing import numpy_to_hls_code
+from finn.util.data_packing import (
+    numpy_to_hls_code,
+    pack_innermost_dim_as_hex_string,
+)
 from qonnx.core.datatype import DataType
+from qonnx.util.basic import roundup_to_integer_multiple
 
 from brainsmith.kernels.elementwise_binary.elementwise_binary import ElementwiseBinaryOp
 from brainsmith.registry import backend
@@ -63,7 +67,7 @@ def emit(self) -> list[str]:
 @backend(
     target_kernel="brainsmith:ElementwiseBinaryOp",
     language="hls",
-    author="Migrated from AMD FINN by Thomas Keller",
+    author="AMD FINN",
 )
 class ElementwiseBinaryOp_hls(ElementwiseBinaryOp, HLSBackend):
     """HLS backend for ElementwiseBinaryOp (KernelOp-based).
@@ -260,22 +264,33 @@ def _get_broadcast_info(self, input_name):
     def _needs_streaming_interface(self, input_name):
         """Check if input needs a streaming (dynamic) interface.
 
+        Determines streaming vs static based on mem_mode (single source of truth):
+        - RHS with mem_mode in ("embedded", "decoupled"): static (loaded from params.hpp)
+        - RHS with mem_mode == "dynamic": streaming (MLO case)
+        - RHS with no mem_mode: streaming (not a weight)
+        - LHS: always streaming
+
         Args:
             input_name: "lhs" or "rhs"
 
         Returns:
             True if input should be streamed, False if static parameter
         """
-        input_pattern = self.get_nodeattr("input_pattern")
-
-        if input_pattern == "dynamic_static":
-            # Phase 1: Only LHS is streaming
-            return input_name == "lhs"
-        elif input_pattern == "dynamic_dynamic":
-            # Phase 2: Both inputs are streaming
+        if input_name == "lhs":
+            # LHS is always streaming
             return True
-        else:
-            raise ValueError(f"Unknown input_pattern: {input_pattern}")
+
+        # RHS: check mem_mode to determine if static or streaming
+        rhs_iface = self.design_point.inputs.get("rhs")
+        if rhs_iface and hasattr(rhs_iface, "mem_mode") and rhs_iface.mem_mode:
+            # Weight with mem_mode set
+            if rhs_iface.mem_mode in ("embedded", "decoupled"):
+                return False  # Static - loaded from params.hpp
+            else:  # "dynamic"
+                return True  # Streaming - MLO case
+
+        # No mem_mode or not a weight - streaming
+        return True
 
     def _get_buffer_declaration(self, input_name: str, pe: int) -> BufferDeclaration | None:
         """Generate buffer array declaration for an input.
@@ -383,6 +398,7 @@ def generate_params(self, model, path):
 
         For dynamic_static pattern: Generates RHS parameter array
         For dynamic_dynamic pattern: Creates empty params.hpp (no static inputs)
+        For MLO (dynamic mem_mode): Generates memblock.dat for FINNLoop
 
         Implements FINN-compatible parameter reshaping:
         1. Reshape to folded input shape (matches PE-parallelized access)
@@ -390,7 +406,6 @@ def generate_params(self, model, path):
         3. Pad dimensions from left to align with output shape for broadcasting
         """
         code_gen_dir = path
-        input_pattern = self.get_nodeattr("input_pattern")
 
         # Collect parameter code for static inputs
         param_code_sections = []
@@ -436,15 +451,19 @@ def generate_params(self, model, path):
                     f"#pragma HLS ARRAY_PARTITION variable=lhs complete dim={len(lhs_shape)}"
                 )
 
-        # Check RHS (static in dynamic_static pattern)
-        if not self._needs_streaming_interface("rhs"):
-            rhs_parameters = model.get_initializer(self.onnx_node.input[1])
-            if rhs_parameters is None:
-                raise ValueError(
-                    f"ElementwiseBinaryOp with pattern '{input_pattern}' requires static RHS parameter, "
-                    f"but {self.onnx_node.input[1]} is not an initializer"
-                )
+        # Check RHS - handle both static (embedded) and MLO (dynamic) cases
+        # For MLO, RHS is streaming but FINNLoop.generate_params() sets the initializer
+        rhs_parameters = model.get_initializer(self.onnx_node.input[1])
 
+        # Determine if this is MLO mode (dynamic mem_mode with initializer)
+        rhs_iface = self.design_point.inputs.get("rhs")
+        is_mlo_mode = (
+            rhs_iface is not None
+            and hasattr(rhs_iface, "mem_mode")
+            and rhs_iface.mem_mode == "dynamic"
+        )
+
+        if rhs_parameters is not None:
             rhs_dtype = DataType[self.get_input_datatype(1).name]
 
             # FINN-compatible reshaping: folded shape → PE broadcast → dimension padding
@@ -459,17 +478,43 @@ def generate_params(self, model, path):
             rhs_shape = (len(out_shape) - len(rhs_shape)) * (1,) + rhs_shape
             rhs_parameters = rhs_parameters.reshape(*rhs_shape)
 
-            rhs_code = numpy_to_hls_code(rhs_parameters, rhs_dtype, "rhs", False, False)
-
-            param_code_sections.append("// RHS parameter tensor\n")
-            param_code_sections.append(rhs_code)
+            if is_mlo_mode:
+                # MLO mode: write memblock.dat for FINNLoop.generate_params()
+                # This matches FINN's ElementwiseBinaryOperation_hls behavior
+                # Merge first dimensions together for streaming format
+                rhs_flat = rhs_parameters.reshape(-1, pe)
+                # Flip PE dimension (FINN convention for streaming)
+                rhs_flat = np.flip(rhs_flat, axis=-1)
+                rhs_width = self.get_instream_width(1)
+                # Pad to nearest 4 bits to get hex strings
+                rhs_width_padded = roundup_to_integer_multiple(rhs_width, 4)
+                rhs_tensor = pack_innermost_dim_as_hex_string(
+                    rhs_flat, rhs_dtype, rhs_width_padded, prefix=""
+                )
+                rhs_stream = rhs_tensor.flatten()
+                rhs_stream = rhs_stream.copy()
+                with open(f"{code_gen_dir}/memblock.dat", "w") as f:
+                    for val in rhs_stream:
+                        f.write(val + "\n")
+            elif not self._needs_streaming_interface("rhs"):
+                # Static embedded mode: write to params.hpp
+                rhs_code = numpy_to_hls_code(rhs_parameters, rhs_dtype, "rhs", False, False)
+
+                param_code_sections.append("// RHS parameter tensor\n")
+                param_code_sections.append(rhs_code)
 
-            # Add HLS pragmas for parameter storage and partitioning
-            self.code_gen_dict["$PRAGMAS$"].append(
-                "#pragma HLS BIND_STORAGE variable=rhs type=ROM_2P impl=distributed"
-            )
-            self.code_gen_dict["$PRAGMAS$"].append(
-                f"#pragma HLS ARRAY_PARTITION variable=rhs complete dim={len(rhs_shape)}"
+                # Add HLS pragmas for parameter storage and partitioning
+                self.code_gen_dict["$PRAGMAS$"].append(
+                    "#pragma HLS BIND_STORAGE variable=rhs type=ROM_2P impl=distributed"
+                )
+                self.code_gen_dict["$PRAGMAS$"].append(
+                    f"#pragma HLS ARRAY_PARTITION variable=rhs complete dim={len(rhs_shape)}"
+                )
+        elif not self._needs_streaming_interface("rhs"):
+            # Static mode but no initializer - error
+            raise ValueError(
+                f"ElementwiseBinaryOp with static RHS (mem_mode=embedded/decoupled) requires RHS parameter, "
+                f"but {self.onnx_node.input[1]} is not an initializer"
             )
 
         # Write params.hpp
@@ -477,8 +522,8 @@ def generate_params(self, model, path):
             if param_code_sections:
                 f.write("".join(param_code_sections))
             else:
-                # No static parameters (dynamic_dynamic pattern)
-                f.write("// No static parameters (both inputs are streaming)\n")
+                # No static parameters (dynamic_dynamic pattern or MLO)
+                f.write("// No static parameters (inputs are streaming or MLO)\n")
 
     def execute_node(self, context, graph):
         """Execute ElementwiseBinaryOp in python, cppsim, or rtlsim mode.
@@ -750,11 +795,15 @@ def _generate_header(self, tmpl_args: dict) -> list[str]:
         Returns:
             List of C++ code lines for header section
         """
-        input_pattern = self.get_nodeattr("input_pattern")
         func = self.get_nodeattr("func")
 
+        # Determine pattern for documentation
+        lhs_streaming = self._needs_streaming_interface("lhs")
+        rhs_streaming = self._needs_streaming_interface("rhs")
+        pattern_desc = f"lhs={'stream' if lhs_streaming else 'static'}, rhs={'stream' if rhs_streaming else 'static'}"
+
         return [
-            f"// Elementwise binary operation: {func} ({input_pattern})",
+            f"// Elementwise binary operation: {func} ({pattern_desc})",
             f"{tmpl_args['OutType']} out[PE];",
             "#pragma HLS ARRAY_PARTITION variable=out complete dim=1",
             "",
diff --git a/brainsmith/kernels/layernorm/__init__.py b/brainsmith/kernels/layernorm/__init__.py
index d090a2b3..d776a1ae 100644
--- a/brainsmith/kernels/layernorm/__init__.py
+++ b/brainsmith/kernels/layernorm/__init__.py
@@ -5,7 +5,4 @@
 from .layernorm import LayerNorm
 from .layernorm_hls import LayerNorm_hls
 
-__all__ = [
-    "LayerNorm",
-    "LayerNorm_hls",
-]
+__all__ = ["LayerNorm", "LayerNorm_hls"]
diff --git a/brainsmith/kernels/layernorm/layernorm.py b/brainsmith/kernels/layernorm/layernorm.py
index abe29226..54d64a6e 100644
--- a/brainsmith/kernels/layernorm/layernorm.py
+++ b/brainsmith/kernels/layernorm/layernorm.py
@@ -76,7 +76,7 @@ def can_infer_from(cls, node: NodeProto, model: ModelWrapper) -> bool:
 
     @classmethod
     def infer_from(
-        cls, node: NodeProto, model: ModelWrapper, insert_index: int
+        cls, node: NodeProto, model: ModelWrapper, insert_index: int, kernel_index: int = None
     ) -> df.TransformationResult:
         """Create LayerNorm HW node from FuncLayerNorm node.
 
@@ -84,6 +84,7 @@ def infer_from(
             node: FuncLayerNorm node
             model: ModelWrapper for graph access
             insert_index: Where to insert new nodes (unused - no layout conversion)
+            kernel_index: Sequential index for this kernel type (for naming)
 
         Returns:
             TransformationResult with LayerNorm node
@@ -95,14 +96,15 @@ def infer_from(
         # Pass along None case, handled by kernel schema default
         epsilon = epsilon_attr if epsilon_attr is None else epsilon_attr.f
 
-        # Create HW node
+        # Create HW node with sequential naming
+        node_name = f"LayerNorm_{kernel_index}" if kernel_index is not None else f"LayerNorm_{node.name}"
         hw_node = helper.make_node(
             "LayerNorm",
             inputs=list(node.input),
             outputs=list(node.output),
             domain="brainsmith.kernels",
             backend="fpgadataflow",
-            name=f"LayerNorm_{node.name}",
+            name=node_name,
             epsilon=epsilon,
         )
 
diff --git a/brainsmith/kernels/rotaryembedding/python/rotaryembedding_rtl.py b/brainsmith/kernels/rotaryembedding/python/rotaryembedding_rtl.py
index ad97e097..69f4f58c 100644
--- a/brainsmith/kernels/rotaryembedding/python/rotaryembedding_rtl.py
+++ b/brainsmith/kernels/rotaryembedding/python/rotaryembedding_rtl.py
@@ -305,7 +305,7 @@ def prepare_rtlsim(self):
         self.set_nodeattr("rtlsim_so", sim.lib._name)
         return None
 
-    def code_generation_ipi(self):
+    def code_generation_ipi(self, behavioral=False):
         """Constructs and returns the TCL for node instantiation in Vivado IPI."""
         code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
 
diff --git a/brainsmith/kernels/rotaryembedding/tests/test_fpga_dataflow_rope.py b/brainsmith/kernels/rotaryembedding/tests/test_fpga_dataflow_rope.py
index 372dfff3..bba82ab2 100644
--- a/brainsmith/kernels/rotaryembedding/tests/test_fpga_dataflow_rope.py
+++ b/brainsmith/kernels/rotaryembedding/tests/test_fpga_dataflow_rope.py
@@ -53,7 +53,8 @@
 from qonnx.core.datatype import DataType
 from qonnx.core.modelwrapper import ModelWrapper
 from qonnx.custom_op.registry import getCustomOp
-from qonnx.transformation.general import ApplyConfig, GiveUniqueNodeNames
+from finn.transformation.general import ApplyConfig
+from qonnx.transformation.general import GiveUniqueNodeNames
 from qonnx.transformation.infer_shapes import InferShapes
 from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model
 
diff --git a/brainsmith/kernels/softmax/__init__.py b/brainsmith/kernels/softmax/__init__.py
index 8398b58a..188069ff 100644
--- a/brainsmith/kernels/softmax/__init__.py
+++ b/brainsmith/kernels/softmax/__init__.py
@@ -11,7 +11,4 @@
 from .softmax import Softmax
 from .softmax_hls import Softmax_hls
 
-__all__ = [
-    "Softmax",
-    "Softmax_hls",
-]
+__all__ = ["Softmax", "Softmax_hls"]
diff --git a/brainsmith/kernels/softmax/softmax.py b/brainsmith/kernels/softmax/softmax.py
index d0e3661a..1d70ee31 100644
--- a/brainsmith/kernels/softmax/softmax.py
+++ b/brainsmith/kernels/softmax/softmax.py
@@ -74,24 +74,31 @@ def can_infer_from(cls, node: NodeProto, model: ModelWrapper) -> bool:
 
     @classmethod
     def infer_from(
-        cls, node: NodeProto, model: ModelWrapper, insert_index: int
+        cls, node: NodeProto, model: ModelWrapper, insert_index: int, kernel_index: int = None
     ) -> df.TransformationResult:
         """Create Softmax Kernel node from ONNX Softmax node.
 
         NOTE: Softmax operates on the last dimension (axis=-1) and is layout-agnostic.
         However, the global normalize_dataflow_layouts preprocessing pass ensures
         inputs are in NHWC layout for consistency with other dataflow kernels.
+
+        Args:
+            node: ONNX Softmax node to convert
+            model: ModelWrapper for graph access
+            insert_index: Where to insert new nodes
+            kernel_index: Sequential index for this kernel type (for naming)
         """
         cls.build_schema(node, model)
 
-        # Create HW node
+        # Create HW node with sequential naming
+        node_name = f"Softmax_{kernel_index}" if kernel_index is not None else f"Softmax_{node.name}"
         hw_node = helper.make_node(
             "Softmax",
             inputs=list(node.input),
             outputs=list(node.output),
             domain="brainsmith.kernels",
             backend="fpgadataflow",
-            name=f"Softmax_{node.name}",
+            name=node_name,
         )
 
         return df.TransformationResult(nodes_to_insert=[hw_node], nodes_to_remove=[node])
diff --git a/brainsmith/kernels/thresholding/thresholding.py b/brainsmith/kernels/thresholding/thresholding.py
index e124b561..4a930d06 100644
--- a/brainsmith/kernels/thresholding/thresholding.py
+++ b/brainsmith/kernels/thresholding/thresholding.py
@@ -17,6 +17,8 @@
 ############################################################################
 
 
+import logging
+
 import numpy as np
 from onnx import NodeProto, helper
 from qonnx.core.datatype import DataType
@@ -27,19 +29,19 @@
 import brainsmith.dataflow as df
 from brainsmith.dataflow import FULL_DIM, KernelOp
 from brainsmith.dataflow.constraints import (
-    DatatypeInteger,
-    DimensionDivisible,
     IsDynamic,
-    IsStatic,
 )
 from brainsmith.dataflow.spec_helpers import derive_dim
-from brainsmith.dataflow.types import VALUE_OPTIMIZED, ShapeHierarchy
+from brainsmith.dataflow.types import ShapeHierarchy
 from brainsmith.registry import kernel
 
+logger = logging.getLogger(__name__)
+
 # =============================================================================
 # Thresholding Schema
 # =============================================================================
 
+
 THRESHOLDING_SCHEMA = df.KernelSchema(
     name="Thresholding",
     inputs=[
@@ -55,7 +57,8 @@
             # Not tiled or streamed - full tensor loaded as initializer
             block_tiling=[],  # No block tiling (static data)
             stream_tiling=[],  # Not streamed (static data)
-            datatype=VALUE_OPTIMIZED,  # Optimize from actual values
+            datatype=None,  # Read from graph (ImportQONNXQuantization already set it)
+            mem_modes=frozenset({"embedded", "decoupled", "dynamic"}),  # All possible modes
         ),
     ],
     outputs=[
@@ -74,19 +77,15 @@
         "num_steps": ("i", True, 1),  # Number of threshold steps (required)
         "act_val": ("i", False, 0),  # Activation bias value (ActVal)
         "num_input_vectors": ("ints", False, [1]),  # Batch/spatial dims (legacy)
-        "runtime_writeable_weights": ("i", False, 0),  # AXI-lite writable (1/0)
+        "runtime_writeable_weights": ("i", False, 0),  # Legacy FINN compat (always 0)
     },
     # =========================================================================
     # VALIDATION: Constraints
     # =========================================================================
     constraints=[
-        # Input must be dynamic, thresholds must be static
         IsDynamic(("input",)),
-        IsStatic(("thresholds",)),
-        # PE must divide number of channels
-        DimensionDivisible("input", -1, "PE", hierarchy=df.ShapeHierarchy.STREAM),
-        # Datatypes must be integer (enforced in can_infer_from)
-        DatatypeInteger(("input", "output")),
+        # Note: IsStatic(("thresholds",)) removed - causes issues in loop bodies
+        # where thresholds are streamed. mem_modes handles embedded/decoupled/dynamic.
     ],
     # Parallelization
 )
@@ -153,7 +152,7 @@ def can_infer_from(node, model: ModelWrapper) -> bool:
         ) == mt_inst.get_nodeattr("out_bias")
 
     @staticmethod
-    def infer_from(node, model: ModelWrapper, insert_index: int) -> df.TransformationResult:
+    def infer_from(node, model: ModelWrapper, insert_index: int, kernel_index: int = None) -> df.TransformationResult:
         """Convert MultiThreshold node to Thresholding node.
 
         Extracts and validates MultiThreshold-specific parameters (scale, actval).
@@ -164,6 +163,7 @@ def infer_from(node, model: ModelWrapper, insert_index: int) -> df.Transformatio
             node: MultiThreshold ONNX node
             model: Model wrapper
             insert_index: Where to insert new node (unused - no layout conversion)
+            kernel_index: Sequential index for this kernel type (for naming)
 
         Returns:
             df.TransformationResult with new Thresholding node
@@ -195,14 +195,15 @@ def infer_from(node, model: ModelWrapper, insert_index: int) -> df.Transformatio
         thl_thres_shape = model.get_tensor_shape(node.input[1])
         thl_in_shape = model.get_tensor_shape(node.input[0])
 
-        # Create HW node
+        # Create HW node with sequential naming
+        node_name = f"Thresholding_{kernel_index}" if kernel_index is not None else f"Thresholding_{node.name}"
         hw_node = helper.make_node(
             "Thresholding",
             inputs=list(node.input),
             outputs=list(node.output),
             domain="brainsmith.kernels",
             backend="fpgadataflow",
-            name=f"Thresholding_{node.name}",
+            name=node_name,
             # Kernel parameters
             num_steps=int(thl_thres_shape[1]),
             act_val=actval,
@@ -210,6 +211,13 @@ def infer_from(node, model: ModelWrapper, insert_index: int) -> df.Transformatio
             runtime_writeable_weights=0,
         )
 
+        # Mark thresholds as weight (for mem_mode parameter creation)
+        # Thresholds input (index 1) is always an initializer
+        # Attribute presence indicates weight; builder will create parameter
+        thresholds_input = node.input[1]
+        if model.get_initializer(thresholds_input) is not None:
+            hw_node.attribute.append(helper.make_attribute("input1MemType", "embedded"))
+
         return df.TransformationResult(nodes_to_insert=[hw_node], nodes_to_remove=[node])
 
     # ================================================================
@@ -219,30 +227,28 @@ def infer_from(node, model: ModelWrapper, insert_index: int) -> df.Transformatio
     def get_instream_width(self, ind=0):
         """Get input stream width in bits.
 
-        Overrides base class for ind=1 to handle decoupled threshold memory mode.
-        In decoupled mode, thresholds stream in via AXI-Stream instead of being
-        embedded in BRAM.
+        Overrides base class for ind=1 to handle threshold memory modes.
+        In decoupled and dynamic modes, thresholds stream in via AXI-Stream.
 
         For ind=0 (data): Uses base class (PE * input_datatype.bitwidth())
-        For ind=1 (thresholds): PE * weight_datatype.bitwidth() * num_steps if decoupled, else 0
+        For ind=1 (thresholds): PE * weight_datatype.bitwidth() * num_steps if streaming, else 0
         """
         if ind == 0:
             # Use base class implementation
             return super().get_instream_width(ind)
         elif ind == 1:
-            # Custom logic for threshold memory modes
-            mem_mode = (
-                self.get_nodeattr("mem_mode")
-                if self.has_nodeattr("mem_mode")
-                else "internal_embedded"
-            )
+            # Get mem_mode from design point inputs (defaults to "embedded")
+            thresholds_iface = self.design_point.inputs.get("thresholds")
+            mem_mode = (thresholds_iface.mem_mode if thresholds_iface and thresholds_iface.mem_mode
+                       else "embedded")
 
-            if mem_mode == "internal_decoupled":
+            # Both decoupled and dynamic modes require streaming interface
+            if mem_mode in ("decoupled", "dynamic"):
                 pe = self.get_nodeattr("PE")
                 wp = self.get_input_datatype(1).bitwidth()
                 n_thres_steps = self.get_nodeattr("num_steps")
                 return pe * wp * n_thres_steps
-            return 0
+            return 0  # embedded mode: no streaming interface
         else:
             raise ValueError(f"Invalid input index: {ind}")
 
@@ -251,12 +257,23 @@ def calc_tmem(self):
 
         Returns: NumChannels // PE
         """
-        self.get_
         ki = self.design_point
-        num_channels = ki.inputs["input"].tensor_shape[-1]
+        num_channels = ki.inputs["input"].block_shape[-1]
         pe = self.get_nodeattr("PE")
         return num_channels // pe
 
+    def get_exp_cycles(self):
+        """Return expected cycles for thresholding operation.
+
+        Formula: Channels/PE × batch_size × fmdim × fmdim
+        This is the product of all folded output shape dimensions except the last (PE).
+
+        Returns:
+            int: Expected number of cycles
+        """
+        import numpy as np
+        return np.prod(self.get_folded_output_shape()[:-1])
+
     def get_hw_compatible_threshold_tensor(self, orig_thres_matrix):
         """Convert threshold matrix to HW-compatible format.
 
@@ -273,7 +290,7 @@ def get_hw_compatible_threshold_tensor(self, orig_thres_matrix):
             Reshaped threshold tensor (1, PE, TMEM, n_thres_steps)
         """
         ki = self.design_point
-        num_channels = ki.inputs["input"].tensor_shape[-1]
+        num_channels = ki.inputs["input"].block_shape[-1]
         pe = self.get_nodeattr("PE")
         tmem = num_channels // pe
 
@@ -320,6 +337,9 @@ def execute_node(self, context, graph):
 
         Applies multi-threshold activation to input tensor.
         """
+        # Ensure design_space initialized (QONNX executor creates fresh instances)
+        self._ensure_initialized_for_execution(graph)
+
         node = self.onnx_node
         inp_values = context[node.input[0]]
         th_val = context[node.input[1]]
@@ -346,20 +366,53 @@ def execute_node(self, context, graph):
 
         context[node.output[0]] = y.astype(np.float32)
 
-    def make_shape_compatible_op(self, model):
-        """Create a shape-compatible ONNX node.
+    def infer_node_datatype(self, model):
+        """Infer and propagate datatypes (inputs and outputs).
 
-        Used during shape inference to create a temporary node
-        with explicit shape information.
+        Overrides base class to also propagate threshold datatype to model.
+        Base class only propagates outputs, but threshold dtype optimization
+        requires updating the model's input[1] tensor datatype.
         """
-        in_shape = self.get_normal_input_shape(0)
-        out_shape = self.get_normal_output_shape(0)
+        # Call base class (initializes design space, propagates outputs)
+        super().infer_node_datatype(model)
 
-        return helper.make_node(
-            "Thresholding",
-            inputs=self.onnx_node.input,
-            outputs=self.onnx_node.output,
-            domain="brainsmith.kernels",
-            input_shape=list(in_shape),
-            output_shape=list(out_shape),
-        )
+        # Additionally propagate threshold datatype to model
+        # This matches FINN's minimize_accumulator_width which updates model tensor dtype
+        if len(self.onnx_node.input) > 1:
+            thresh_dtype = self.get_input_datatype(1)
+            model.set_tensor_datatype(self.onnx_node.input[1], thresh_dtype)
+
+    # ================================================================
+    # MLO Loop Body Adaptation
+    # ================================================================
+
+    def adapt_for_loop_body(self, loop_signature):
+        """Adapt Thresholding for use in FINNLoop body.
+
+        Forces threshold memory mode to "dynamic" when weights are streamed from loop level.
+        Only modifies the attribute if:
+        1. Thresholds are marked as weight (attribute exists from InferKernel)
+        2. Loop signature indicates input is PARAMETER (streamed per iteration)
+
+        Args:
+            loop_signature: List of LoopBodyInputType values for each input
+        """
+        from qonnx.util.basic import get_by_name
+
+        # Check if thresholds are marked as weight
+        attr = get_by_name(self.onnx_node.attribute, "input1MemType")
+        if attr is None:
+            return  # Not a weight, nothing to adapt
+
+        # Check if loop signature indicates this input is streamed as parameter
+        if loop_signature and len(loop_signature) > 1:
+            from finn.transformation.fpgadataflow.loop_rolling import LoopBodyInputType
+
+            if loop_signature[1] == LoopBodyInputType.PARAMETER:
+                self.set_nodeattr("input1MemType", "dynamic")
+                logger.debug(f"{self.onnx_node.name}: Forced input1MemType=dynamic for MLO")
+
+    def make_shape_compatible_op(self, model):
+        oshape = model.get_tensor_shape(self.onnx_node.output[0])
+        # implement tensor with correct shape
+        return super().make_const_shape_op(oshape)
diff --git a/brainsmith/kernels/thresholding/thresholding_hls.py b/brainsmith/kernels/thresholding/thresholding_hls.py
index fd6a7571..b7707a23 100644
--- a/brainsmith/kernels/thresholding/thresholding_hls.py
+++ b/brainsmith/kernels/thresholding/thresholding_hls.py
@@ -27,7 +27,6 @@
 
 
 @backend(
-    name="ThresholdingHLS",
     target_kernel="brainsmith:Thresholding",
     language="hls",
     description="HLS implementation of Thresholding",
@@ -41,20 +40,30 @@ class Thresholding_hls(Thresholding, HLSBackend):
 
     Key features:
     - Extracts shapes from design_point (not nodeattrs)
-    - Supports two memory modes:
-      * internal_embedded: Thresholds in thresh.h header
-      * internal_decoupled: Streaming thresholds via separate interface
-    - Optional runtime-writable weights (internal_decoupled mode)
+    - Supports three memory modes (via input1MemType DSE parameter):
+      * embedded: Thresholds in thresh.h header (compile-time constant)
+      * decoupled: Thresholds in separate memory, streamed via in1_V
+      * dynamic: Thresholds streamed from external source (MLO mode)
 
     Memory Modes:
-    - internal_embedded: Thresholds embedded in HLS (static, no AXI-lite)
-    - internal_decoupled: Thresholds streamed via in1_V interface
-      (optionally writable via AXI-lite if runtime_writeable_weights=1)
+    - embedded: Thresholds embedded in HLS code (smallest, fastest)
+    - decoupled: Thresholds in separate BRAM/LUT, streamed via in1_V
+    - dynamic: External streaming (MLO), no internal storage
     """
 
     def __init__(self, onnx_node, **kwargs):
         super().__init__(onnx_node, **kwargs)
 
+    def _get_mem_mode(self) -> str:
+        """Get memory mode from design point interface.
+
+        Returns:
+            Memory mode string: "embedded", "decoupled", or "dynamic"
+        """
+        thresholds_iface = self.design_point.inputs.get("thresholds")
+        return (thresholds_iface.mem_mode if thresholds_iface and thresholds_iface.mem_mode
+               else "embedded")
+
     def get_nodeattr_types(self):
         """Define nodeattrs for Thresholding_hls backend.
 
@@ -69,14 +78,8 @@ def get_nodeattr_types(self):
         # Add HLS-specific nodeattrs
         my_attrs.update(
             {
-                # Memory mode for thresholds
-                "mem_mode": (
-                    "s",
-                    False,
-                    "internal_decoupled",
-                    {"internal_embedded", "internal_decoupled"},
-                ),
-                # String defining memory type (for internal_embedded)
+                # Memory type for embedded mode (BRAM vs LUT)
+                # NOTE: mem_mode now comes from design point interface (input1MemType DSE param)
                 "ram_style": ("s", False, "distributed", {"distributed", "block"}),
             }
         )
@@ -120,7 +123,8 @@ def get_ap_int_max_w(self):
         """Get maximum ap_int width needed."""
         ap_int_max_w = HLSBackend.get_ap_int_max_w(self)
 
-        if self.get_nodeattr("mem_mode") == "internal_decoupled":
+        # Decoupled and dynamic modes have streaming threshold interface
+        if self._get_mem_mode() in ("decoupled", "dynamic"):
             weightstream = self.get_instream_width(1)
             ap_int_max_w = max([weightstream, ap_int_max_w])
 
@@ -130,8 +134,8 @@ def code_generation_ipgen(self, model, fpgapart, clk):
         """Generates c++ code and tcl script for ip generation."""
         super().code_generation_ipgen(model, fpgapart, clk)
 
-        mem_mode = self.get_nodeattr("mem_mode")
-        if mem_mode == "internal_decoupled":
+        # Decoupled and dynamic modes need memstream HDL
+        if self._get_mem_mode() in ("decoupled", "dynamic"):
             self.generate_hdl_memstream(fpgapart)
 
     def get_template_param_values(self):
@@ -259,26 +263,33 @@ def generate_params(self, model, path):
         """Generate parameter files for HLS compilation."""
         code_gen_dir = path
         thresholds = model.get_initializer(self.onnx_node.input[1])
-        mem_mode = self.get_nodeattr("mem_mode")
+        mem_mode = self._get_mem_mode()
 
-        if mem_mode == "internal_embedded":
+        if mem_mode == "embedded":
             # Save thresholds in thresh.h
             weight_filename = f"{code_gen_dir}/thresh.h"
             self.make_weight_file(thresholds, "hls_header", weight_filename)
 
-        elif mem_mode == "internal_decoupled":
-            # Save internal_decoupled weights for cppsim
+        elif mem_mode == "decoupled":
+            # Save decoupled weights for cppsim
             weight_filename_sim = f"{code_gen_dir}/thresholds.npy"
             self.make_weight_file(thresholds, "decoupled_npy", weight_filename_sim)
 
             # Also save weights as Verilog .dat file
             weight_filename_rtl = f"{code_gen_dir}/memblock.dat"
             self.make_weight_file(thresholds, "decoupled_verilog_dat", weight_filename_rtl)
+        elif mem_mode == "dynamic":
+            # Dynamic mode: thresholds streamed from external source (MLO)
+            # No weight files needed - thresholds come from loop level
+            pass
         else:
-            raise Exception("Unrecognized mem_mode")
+            raise Exception(f"Unrecognized mem_mode: {mem_mode}")
 
     def execute_node(self, context, graph):
         """Execute node in cppsim or rtlsim mode."""
+        # Ensure design_space initialized (QONNX executor creates fresh instances)
+        self._ensure_initialized_for_execution(graph)
+
         mode = self.get_nodeattr("exec_mode")
         node = self.onnx_node
 
@@ -340,7 +351,9 @@ def execute_node(self, context, graph):
             inp = npy_to_rtlsim_input(f"{code_gen_dir}/input_0.npy", export_idt, nbits)
             super().reset_rtlsim(sim)
 
-            if self.get_nodeattr("mem_mode") == "internal_decoupled":
+            mem_mode = self._get_mem_mode()
+            # Decoupled and dynamic modes need threshold input
+            if mem_mode in ("decoupled", "dynamic"):
                 wnbits = self.get_instream_width(1)
                 export_wdt = self.get_input_datatype(1)
                 wei = npy_to_rtlsim_input(f"{code_gen_dir}/thresholds.npy", export_wdt, wnbits)
@@ -349,13 +362,13 @@ def execute_node(self, context, graph):
                     "inputs": {"in0": inp, "in1": wei * num_w_reps},
                     "outputs": {"out0": []},
                 }
-            elif self.get_nodeattr("mem_mode") == "internal_embedded":
+            elif mem_mode == "embedded":
                 io_dict = {
                     "inputs": {"in0": inp},
                     "outputs": {"out0": []},
                 }
             else:
-                raise Exception("Unrecognized mem_mode")
+                raise Exception(f"Unrecognized mem_mode: {mem_mode}")
 
             self.rtlsim_multi_io(sim, io_dict)
             super().close_rtlsim(sim)
@@ -381,7 +394,8 @@ def global_includes(self):
         """Generate global include directives."""
         self.code_gen_dict["$GLOBALS$"] = ['#include "activations.hpp"']
 
-        if self.get_nodeattr("mem_mode") == "internal_embedded":
+        # Only embedded mode includes thresh.h header
+        if self._get_mem_mode() == "embedded":
             self.code_gen_dict["$GLOBALS$"] += ['#include "thresh.h"']
 
     def defines(self, var):
@@ -399,7 +413,8 @@ def defines(self, var):
                #define ImgDim1 {total_spatial_size}"""
         ]
 
-        if self.get_nodeattr("mem_mode") == "internal_decoupled":
+        # Decoupled and dynamic modes need these defines for streaming interface
+        if self._get_mem_mode() in ("decoupled", "dynamic"):
             self.code_gen_dict["$DEFINES$"].append(
                 f"#define ActVal1 {self.get_nodeattr('act_val')}"
             )
@@ -428,8 +443,9 @@ def read_npy_data(self):
             f'npy2apintstream<{packed_hls_type}, {elem_hls_type}, {elem_bits}, {npy_type}>("{npy_in}", in0_V, false);'
         )
 
-        mem_mode = self.get_nodeattr("mem_mode")
-        if mem_mode == "internal_decoupled":
+        mem_mode = self._get_mem_mode()
+        # Decoupled and dynamic modes need to read threshold data for cppsim
+        if mem_mode in ("decoupled", "dynamic"):
             tdt = self.get_input_datatype(1)
             elem_bits = tdt.bitwidth()
             packed_bits = self.get_instream_width(1)
@@ -453,8 +469,9 @@ def strm_decl(self):
             f'hls::stream<ap_uint<{self.get_outstream_width()}>> out0_V ("out0_V");'
         )
 
-        mem_mode = self.get_nodeattr("mem_mode")
-        if mem_mode == "internal_decoupled":
+        # Decoupled and dynamic modes have threshold streaming interface
+        mem_mode = self._get_mem_mode()
+        if mem_mode in ("decoupled", "dynamic"):
             self.code_gen_dict["$STREAMDECLARATIONS$"].append(
                 f'hls::stream<ap_uint<{self.get_instream_width(1)}>> in1_V ("in1_V");'
             )
@@ -462,21 +479,21 @@ def strm_decl(self):
     def docompute(self):
         """Generate HLS docompute code."""
         tmpl_args = self.get_template_param_values()
-        mem_mode = self.get_nodeattr("mem_mode")
+        mem_mode = self._get_mem_mode()
 
-        if mem_mode == "internal_embedded":
+        if mem_mode == "embedded":
             self.code_gen_dict["$DOCOMPUTE$"] = [
                 f"""Thresholding_Batch<ImgDim1, NumChannels1, PE1, {tmpl_args['TSrcI']}, {tmpl_args['TDstI']}>
                 (in0_V, out0_V, threshs, numReps);"""
             ]
-        elif mem_mode == "internal_decoupled":
+        elif mem_mode in ("decoupled", "dynamic"):
             # Note: numReps is set to 1, repetition comes from threshold stream
             self.code_gen_dict["$DOCOMPUTE$"] = [
                 f"""Thresholding_Stream_Batch<ImgDim1, NumChannels1, PE1, {tmpl_args['TSrcI']}, {tmpl_args['TDstI']}, ActVal1, ThresType1, NumSteps1>
                 (in0_V, out0_V, in1_V, numReps);"""
             ]
         else:
-            raise Exception("Unrecognized mem_mode")
+            raise Exception(f"Unrecognized mem_mode: {mem_mode}")
 
     def dataoutstrm(self):
         """Generate code for output stream."""
@@ -503,13 +520,15 @@ def dataoutstrm(self):
 
     def blackboxfunction(self):
         """Generate black box function signature."""
-        if self.get_nodeattr("mem_mode") == "internal_embedded":
+        mem_mode = self._get_mem_mode()
+
+        if mem_mode == "embedded":
             self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
                 f"""void {self.onnx_node.name}(hls::stream<ap_uint<{self.get_instream_width(0)}>> &in0_V,
                     hls::stream<ap_uint<{self.get_outstream_width()}>> &out0_V
                     )"""
             ]
-        elif self.get_nodeattr("mem_mode") == "internal_decoupled":
+        elif mem_mode in ("decoupled", "dynamic"):
             self.code_gen_dict["$BLACKBOXFUNCTION$"] = [
                 f"""void {self.onnx_node.name}(hls::stream<ap_uint<{self.get_instream_width(0)}>> &in0_V,
                     hls::stream<ap_uint<{self.get_instream_width(1)}>> &in1_V,
@@ -517,7 +536,7 @@ def blackboxfunction(self):
                     )"""
             ]
         else:
-            raise Exception("Unrecognized mem_mode")
+            raise Exception(f"Unrecognized mem_mode: {mem_mode}")
 
     def pragmas(self):
         """Generate HLS pragmas."""
@@ -525,7 +544,8 @@ def pragmas(self):
         self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=out0_V")
         self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE ap_ctrl_none port=return")
 
-        if self.get_nodeattr("mem_mode") == "internal_embedded":
+        mem_mode = self._get_mem_mode()
+        if mem_mode == "embedded":
             # Threshold tensor is acc_type [PE][TMEM][N_THRES]
             # Partition for parallel access along PE and N_THRES dimensions (dims 1 and 3)
             self.code_gen_dict["$PRAGMAS$"].append(
@@ -559,20 +579,19 @@ def pragmas(self):
                         f"Invalid ram_style: {ram_style}. Must be 'block' or 'distributed'"
                     )
 
-        elif self.get_nodeattr("mem_mode") == "internal_decoupled":
+        elif mem_mode in ("decoupled", "dynamic"):
             self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE axis port=in1_V")
 
-    def code_generation_ipi(self):
+    def code_generation_ipi(self, behavioral=False):
         """Generate TCL commands for IPI integration."""
         source_target = f"./ip/verilog/rtl_ops/{self.onnx_node.name}"
         cmd = [f"file mkdir {source_target}"]
 
-        # Add streamer if needed (internal_decoupled mode)
-        mem_mode = self.get_nodeattr("mem_mode")
+        # Add streamer if needed (decoupled/dynamic modes)
+        mem_mode = self._get_mem_mode()
 
-        if mem_mode == "internal_decoupled":
+        if mem_mode in ("decoupled", "dynamic"):
             node_name = self.onnx_node.name
-            runtime_writable = self.get_nodeattr("runtime_writeable_weights") == 1
 
             # Create hierarchy for this layer
             clk_name = self.get_verilog_top_module_intf_names()["clk"][0]
@@ -670,27 +689,15 @@ def code_generation_ipi(self):
                 f"[get_bd_intf_pins {node_name}/{node_name}/{dout_name}]"
             )
 
-            if runtime_writable:
-                # Expose AXI lite interface for writable weights
-                axilite_name = self.get_verilog_top_module_intf_names()["axilite"][0]
-                cmd.append(
-                    f"create_bd_intf_pin -mode Slave "
-                    f"-vlnv xilinx.com:interface:aximm_rtl:1.0 /{node_name}/{axilite_name}"
-                )
-                cmd.append(
-                    f"connect_bd_intf_net [get_bd_intf_pins {node_name}/{axilite_name}] "
-                    f"[get_bd_intf_pins {node_name}/{strm_inst}/{axilite_name}]"
-                )
-                cmd.append("assign_bd_address")
-
+            # Note: AXI-lite runtime-writeable weights removed for simplicity
             cmd.append("save_bd_design")
 
-        elif mem_mode == "internal_embedded":
-            # Base class impl sufficient for internal_embedded mode
-            return super().code_generation_ipi()
+        elif mem_mode == "embedded":
+            # Base class impl sufficient for embedded mode
+            return super().code_generation_ipi(behavioral)
 
         else:
-            raise Exception("Unrecognized mem_mode for Thresholding")
+            raise Exception(f"Unrecognized mem_mode for Thresholding: {mem_mode}")
 
         return cmd
 
@@ -698,13 +705,8 @@ def get_verilog_top_module_intf_names(self):
         """Get Verilog top module interface names."""
         intf_names = super().get_verilog_top_module_intf_names()
 
-        mem_mode = self.get_nodeattr("mem_mode")
-        if mem_mode == "internal_decoupled":
-            # Only expose axilite interface if runtime_writeable_weights is set
-            runtime_writable = self.get_nodeattr("runtime_writeable_weights") == 1
-            if runtime_writable:
-                intf_names["axilite"] = ["s_axilite"]
-
+        # Note: AXI-lite support for runtime-writeable weights removed for simplicity
+        # Decoupled and dynamic modes only expose streaming interfaces
         return intf_names
 
     def get_op_and_param_counts(self):
@@ -741,8 +743,9 @@ def derive_characteristic_fxns(self, period):
             "outputs": {"out0": []},
         }
 
-        mem_mode = self.get_nodeattr("mem_mode")
-        if mem_mode in ["internal_decoupled", "external"]:
+        mem_mode = self._get_mem_mode()
+        # Decoupled and dynamic modes have weight input
+        if mem_mode in ("decoupled", "dynamic", "external"):
             n_weight_inps = self.calc_tmem()
             num_w_reps = np.prod(self.get_nodeattr("num_input_vectors"))
             io_dict["inputs"]["in1"] = [0 for i in range(num_w_reps * n_weight_inps)]
diff --git a/brainsmith/kernels/thresholding/thresholding_rtl.py b/brainsmith/kernels/thresholding/thresholding_rtl.py
index 88ef6623..e80d81b1 100644
--- a/brainsmith/kernels/thresholding/thresholding_rtl.py
+++ b/brainsmith/kernels/thresholding/thresholding_rtl.py
@@ -27,11 +27,10 @@
 
 
 @backend(
-    name="ThresholdingRTL",
     target_kernel="brainsmith:Thresholding",
     language="rtl",
     description="RTL implementation of Thresholding",
-    author="Microsoft Corporation",
+    author="AMD FINN"
 )
 class Thresholding_rtl(Thresholding, RTLBackend):
     """RTL backend for Thresholding kernel (KernelOp-based).
@@ -90,9 +89,9 @@ def get_pe_mem_geometries(self):
         odt = self.get_output_datatype()
         odt_bits = odt.bitwidth()
 
-        # Extract NumChannels from design_point (Arete principle)
+        # Extract NumChannels from design_point block_shape (channels before stream tiling)
         ki = self.design_point
-        t_channels = ki.inputs["input"].tensor_shape[-1]
+        t_channels = ki.inputs["input"].block_shape[-1]
 
         cf = t_channels / pe
         is_uniform = self.get_nodeattr("uniform_thres")
@@ -161,8 +160,8 @@ def get_all_meminit_filenames(self, abspath=False):
         dat_files = []
         t_path = self.get_nodeattr("code_gen_dir_ipgen") if abspath else "."
         pe = self.get_nodeattr("PE")
-        output_data_type = self.get_nodeattr("output_dtype")
-        o_bitwidth = DataType[output_data_type].bitwidth()
+        odt = self.get_output_datatype(0)
+        o_bitwidth = odt.bitwidth()
 
         for stage in range(o_bitwidth):
             for pe_value in range(pe):
@@ -180,17 +179,20 @@ def prepare_codegen_rtl_values(self, model):
         code_gen_dict = {}
         t_path = self.get_nodeattr("code_gen_dir_ipgen")
 
-        self.generate_params(model, t_path)
+        # For MLO nodes, thresholds are graph inputs (PARAMETERS), not initializers
+        # Skip generate_params as there are no initializers to process
+        if not self.get_nodeattr("mlo_max_iter"):
+            self.generate_params(model, t_path)
 
         bias = self.get_nodeattr("act_val")
-        output_data_type = self.get_nodeattr("output_dtype")
-        input_data_type = self.get_nodeattr("input_dtype")
-        o_bitwidth = DataType[output_data_type].bitwidth()
+        odt = self.get_output_datatype(0)
+        idt = self.get_input_datatype(0)
+        o_bitwidth = odt.bitwidth()
         pe = self.get_nodeattr("PE")
 
-        # Extract NumChannels from design_point (Arete principle)
+        # Extract NumChannels from design_point block_shape (channels before stream tiling)
         ki = self.design_point
-        num_channels = ki.inputs["input"].tensor_shape[-1]
+        num_channels = ki.inputs["input"].block_shape[-1]
 
         # RTL expects 2^N-1 thresholds, but narrow range quantization results in
         # one less threshold. Prepend a dummy threshold (minimal possible value
@@ -200,11 +202,11 @@ def prepare_codegen_rtl_values(self, model):
         wdt = self.get_input_datatype(1)
 
         if expected_thresholds != n_thres_steps:
-            if DataType[output_data_type].signed():
+            if odt.signed():
                 bias = bias - 1
             else:
                 max_val = wdt.max()
-                if max_val <= DataType[input_data_type].max():
+                if max_val <= idt.max():
                     max_val = max_val + 1
                     # Increase wdt
                     if not wdt.signed():
@@ -212,11 +214,6 @@ def prepare_codegen_rtl_values(self, model):
                     else:
                         wdt = DataType.get_smallest_possible(-max_val - 1)
 
-        # If single threshold value found, set num_channels to PE
-        thresholds = model.get_initializer(self.onnx_node.input[1])
-        if thresholds.shape[0] == 1:
-            num_channels = pe
-
         code_gen_dict["$THRESHOLDS_PATH$"] = [f'"./{self.onnx_node.name}_"']
 
         # Identify module name
@@ -226,7 +223,7 @@ def prepare_codegen_rtl_values(self, model):
         code_gen_dict["$TOP_MODULE$"] = code_gen_dict["$MODULE_NAME_AXI_WRAPPER$"]
 
         # Identify module variables
-        i_bitwidth = DataType[input_data_type].bitwidth()
+        i_bitwidth = idt.bitwidth()
 
         code_gen_dict["$N$"] = [str(2**o_bitwidth - 1)]  # Number of needed thresholds
         code_gen_dict["$WT$"] = [str(wdt.bitwidth())]  # Threshold precision
@@ -235,6 +232,13 @@ def prepare_codegen_rtl_values(self, model):
         code_gen_dict["$BIAS$"] = [str(bias)]  # Activation bias value
         code_gen_dict["$PE$"] = [str(pe)]  # PE
 
+        # MLO support: Set SETS parameter based on mlo_max_iter
+        mlo_max_iter = self.get_nodeattr("mlo_max_iter")
+        if mlo_max_iter:
+            code_gen_dict["$SETS$"] = [str(mlo_max_iter)]
+        else:
+            code_gen_dict["$SETS$"] = [str(1)]
+
         # Is input datatype signed or unsigned?
         # Thresholding core needs to know this when comparing weights to inputs
         if self.get_input_datatype(0).signed():
@@ -257,9 +261,8 @@ def prepare_codegen_rtl_values(self, model):
             )
         code_gen_dict["$O_BITS$"] = [str(int(o_bits))]
 
-        # Runtime-writable weights
-        rt_weights = self.get_nodeattr("runtime_writeable_weights")
-        code_gen_dict["$USE_AXILITE$"] = [str(rt_weights)]
+        # Runtime-writable weights (AXI-lite support removed)
+        code_gen_dict["$USE_AXILITE$"] = ["0"]
 
         # Depth triggers and deep pipeline
         depth_trigger_uram = self.get_nodeattr("depth_trigger_uram")
@@ -419,7 +422,7 @@ def execute_node(self, context, graph):
         else:
             raise Exception(f"Invalid exec_mode: {mode}. Must be 'cppsim' or 'rtlsim'")
 
-    def code_generation_ipi(self):
+    def code_generation_ipi(self, behavioral=False):
         """Constructs and returns TCL commands for node instantiation as RTL block."""
         rtl_file_list = self.get_rtl_file_list()
         code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
@@ -442,12 +445,7 @@ def code_generation_ipi(self):
 
     def get_verilog_top_module_intf_names(self):
         """Get Verilog top module interface names."""
-        intf_names = super().get_verilog_top_module_intf_names()
-
-        if self.get_nodeattr("runtime_writeable_weights") == 1:
-            intf_names["axilite"] = ["s_axilite"]
-
-        return intf_names
+        return super().get_verilog_top_module_intf_names()
 
     def generate_params(self, model, path):
         """Generate parameter files for RTL compilation.
@@ -457,12 +455,10 @@ def generate_params(self, model, path):
             path: Output directory path
         """
         thresholds = model.get_initializer(self.onnx_node.input[1])
-        rt_weights = self.get_nodeattr("runtime_writeable_weights")
+        # Skip if no initializer (will be provided at runtime)
+        if thresholds is None:
+            return
         file_name = f"{path}/memblock.dat"
-
-        if rt_weights:
-            self.make_weight_file(thresholds, "decoupled_runtime", file_name)
-
         self.make_weight_file(thresholds, "internal_embedded", file_name)
 
     def make_weight_file(self, weights, weight_file_mode, weight_file_name):
@@ -480,13 +476,13 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name):
         thresholds = weights
         pe = self.get_nodeattr("PE")
 
-        # Extract NumChannels from design_point (Arete principle)
+        # Extract NumChannels from design_point block_shape (channels before stream tiling)
         ki = self.design_point
-        num_channels = ki.inputs["input"].tensor_shape[-1]
+        num_channels = ki.inputs["input"].block_shape[-1]
 
-        output_data_type = self.get_nodeattr("output_dtype")
-        o_bitwidth = DataType[output_data_type].bitwidth()
-        input_data_type = self.get_nodeattr("input_dtype")
+        odt = self.get_output_datatype(0)
+        idt = self.get_input_datatype(0)
+        o_bitwidth = odt.bitwidth()
 
         # RTL expects 2^N-1 thresholds, but narrow range quantization results in
         # one less threshold. Prepend/append dummy threshold and increase numSteps.
@@ -495,13 +491,13 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name):
         wdt = self.get_input_datatype(1)
 
         if expected_thresholds != n_thres_steps:
-            if DataType[output_data_type].signed():
+            if odt.signed():
                 min_val = wdt.min()
                 thresholds = np.insert(thresholds, 0, min_val, axis=1)
             else:
                 # Temporary fix for unsigned narrow quantization
                 max_val = wdt.max()
-                if max_val > DataType[input_data_type].max():
+                if max_val > idt.max():
                     thresholds = np.insert(thresholds, len(thresholds[0]), max_val, axis=1)
                 else:
                     max_val = max_val + 1
@@ -515,10 +511,9 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name):
             n_thres_steps += 1
 
         if weight_file_mode == "decoupled_runtime":
-            # If single threshold value found, broadcast
+            # If single threshold value found, tile to all channels (per-tensor quantization)
             if thresholds.shape[0] == 1:
-                thresholds = np.broadcast_to(thresholds, (pe, expected_thresholds))
-                num_channels = pe
+                thresholds = np.tile(thresholds, (num_channels, 1))
 
             width_padded = roundup_to_integer_multiple(thresholds.shape[1], 2**o_bitwidth)
             thresh_padded = np.zeros((thresholds.shape[0], width_padded))
@@ -551,16 +546,15 @@ def make_weight_file(self, weights, weight_file_mode, weight_file_name):
                     f.write(val + "\n")
 
         elif weight_file_mode == "internal_embedded":
+            # If single threshold value found, tile to all channels (per-tensor quantization)
+            if thresholds.shape[0] == 1:
+                thresholds = np.tile(thresholds, (num_channels, 1))
+
             # Add dummy dimension as final dimension (gets packed)
             t_expand = np.expand_dims(thresholds, axis=-1)
             bw_hexdigit = roundup_to_integer_multiple(wdt.bitwidth(), 4)
             t_packed = pack_innermost_dim_as_hex_string(t_expand, wdt, bw_hexdigit, prefix="")
 
-            # If single threshold value found, broadcast
-            if t_packed.shape[0] == 1:
-                t_packed = np.broadcast_to(t_packed, (pe, expected_thresholds))
-                num_channels = pe
-
             channel_fold = int(num_channels / pe)
 
             for stage in range(o_bitwidth):
diff --git a/brainsmith/primitives/transforms/expand_norms.py b/brainsmith/primitives/transforms/expand_norms.py
index 33c6ca57..dba0780c 100644
--- a/brainsmith/primitives/transforms/expand_norms.py
+++ b/brainsmith/primitives/transforms/expand_norms.py
@@ -77,17 +77,21 @@ def apply(self, model):
                 else:
                     last_node = func_ln_node
 
-                # Add bias if present
+                # Add bias if non-trivial (not all zeros)
                 if bias is not None:
-                    bias_intermediate = oh.make_tensor_value_info(
-                        model.make_new_valueinfo_name(), TensorProto.FLOAT, act_shape
-                    )
-                    graph.value_info.append(bias_intermediate)
-                    last_node.output[0] = bias_intermediate.name
-
-                    add_node = oh.make_node("Add", [bias_intermediate.name, bias], [act_out])
-                    nodes_to_insert.append(add_node)
-                    model.set_tensor_datatype(bias_intermediate.name, wdt)
+                    bias_data = model.get_initializer(bias)
+                    if bias_data is not None and not np.allclose(bias_data, 0.0):
+                        bias_intermediate = oh.make_tensor_value_info(
+                            model.make_new_valueinfo_name(),
+                            TensorProto.FLOAT,
+                            act_shape
+                        )
+                        graph.value_info.append(bias_intermediate)
+                        last_node.output[0] = bias_intermediate.name
+
+                        add_node = oh.make_node("Add", [bias_intermediate.name, bias], [act_out])
+                        nodes_to_insert.append(add_node)
+                        model.set_tensor_datatype(bias_intermediate.name, wdt)
 
                 replacements.append((node_idx, node, nodes_to_insert))
 
diff --git a/brainsmith/primitives/transforms/extract_shell_integration_metadata.py b/brainsmith/primitives/transforms/extract_shell_integration_metadata.py
index 96bc8b6d..6c2e24a6 100644
--- a/brainsmith/primitives/transforms/extract_shell_integration_metadata.py
+++ b/brainsmith/primitives/transforms/extract_shell_integration_metadata.py
@@ -4,7 +4,10 @@
 """Shell integration metadata extraction transform."""
 
 import json
+import os
 
+import numpy as np
+from finn.util.mlo_sim import dat_file_to_numpy_array
 import qonnx.custom_op.registry as registry
 from qonnx.transformation.base import Transformation
 
@@ -21,23 +24,70 @@ def __init__(self, metadata_file: str):
     def apply(self, model):
         graph = model.graph
 
+        # destination dir to copy artifacts
+        dirname = os.path.dirname(self.metadata_file)
+
+        # Search for FINNLoop ops (Does not currently support nested FINNLoops)
+        finn_loops={}
+        mlo = False
+        for node in model.graph.node:
+            if node.op_type == "FINNLoop":
+                finnloop_op = registry.getCustomOp(node)
+                finnloop_body = finnloop_op.get_nodeattr("body");
+
+                mvau_hbm_weights = {}
+                extern_idx = 0
+                for idx, lb_inp in enumerate(finnloop_body.graph.input):
+                    downstream = finnloop_body.find_consumer(lb_inp.name)
+                    if downstream.op_type.startswith("MVAU"):
+                        mlo = True
+                        mvau_hbm_weights[idx] = {}
+                        mvau_hbm_weights[idx]["name"] = lb_inp.name
+                        datfile = (
+                            f"{finnloop_op.get_nodeattr('code_gen_dir_ipgen')}/memblock_MVAU_rtl_id_{idx}.dat"
+                        )
+
+                        # Save the weights as a numpy file
+                        np_dat = dat_file_to_numpy_array(datfile)
+                        mvau_hbm_weights[idx]["weight_npy"] = f"memblock_MVAU_rtl_id_{idx}.npy"
+                        np.save(f"{dirname}/{mvau_hbm_weights[idx]['weight_npy']}", np_dat)
+
+                        # Copy to the destination dir
+                        mvau_hbm_weights[idx]["extern_idx"] = extern_idx
+                        mvau_hbm_weights[idx]["extern_name"] = f"m_axi_MVAU_id_{idx}"
+                        mlo_mvau = registry.getCustomOp(downstream)
+                        mvau_hbm_weights[idx]["PE"] = mlo_mvau.get_nodeattr("PE")
+                        mvau_hbm_weights[idx]["SIMD"] = mlo_mvau.get_nodeattr("SIMD")
+                        mvau_hbm_weights[idx]["MH"] = mlo_mvau.get_nodeattr("MH")
+                        mvau_hbm_weights[idx]["MW"] = mlo_mvau.get_nodeattr("MW")
+                        mvau_hbm_weights[idx]["weightDataType"] = mlo_mvau.get_nodeattr("weightDataType")
+                        extern_idx += 1
+                finn_loops[node.name] = mvau_hbm_weights
+        self.md["mlo"] = mlo
+        self.md["finn_loops"] = finn_loops
+
+
+        # Extract instream widths
         instreams = {}
         for input_tensor in graph.input:
             consumer = model.find_consumer(input_tensor.name)
             inst = registry.getCustomOp(consumer)
             instreams[input_tensor.name] = {
+                "datatype": inst.get_input_datatype().name,
                 "width": inst.get_instream_width(),
                 "shape": inst.get_normal_input_shape(),
             }
-        self.md["instreams"] = instreams
+
+        self.md['instreams'] = instreams
 
         outstreams = {}
         for output_tensor in graph.output:
             producer = model.find_producer(output_tensor.name)
             inst = registry.getCustomOp(producer)
             outstreams[output_tensor.name] = {
+                "datatype": inst.get_output_datatype().name,
                 "width": inst.get_outstream_width(),
-                "shape": inst.get_normal_output_shape(),
+                "shape": inst.get_normal_output_shape()
             }
         self.md["outstreams"] = outstreams
 
@@ -56,4 +106,4 @@ def apply(self, model):
         with open(self.metadata_file, "w") as fp:
             json.dump(self.md, fp, indent=4)
 
-        return (model, False)
+        return(model, False)
diff --git a/brainsmith/primitives/transforms/import_qonnx_quantization.py b/brainsmith/primitives/transforms/import_qonnx_quantization.py
new file mode 100644
index 00000000..2b757089
--- /dev/null
+++ b/brainsmith/primitives/transforms/import_qonnx_quantization.py
@@ -0,0 +1,68 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""Import quantization metadata from QONNX format.
+
+This transform prepares QONNX models for Brainsmith hardware compilation by:
+1. Folding quantization into weight initializers
+2. Converting activation quantization nodes (Quant, BipolarQuant) to MultiThreshold
+3. Converting AvgPool+Trunc patterns to QuantAvgPool2d
+
+This transform handles ONLY quantization-specific operations. Topology transformations
+(GemmToMatMul, ExtractBiasFromConv, etc.) belong in finn_topology_cleanup_step.
+
+Similar transforms can be added for other quantization frameworks
+(e.g., ImportTensorRTQuantization, ImportPyTorchQuantization).
+"""
+
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.base import Transformation
+from qonnx.transformation.infer_datatypes import InferDataTypes
+
+from finn.transformation.qonnx.fold_quant_weights import FoldQuantWeights
+from finn.transformation.qonnx.infer_quant_avg_pool_2d import AvgPoolAndTruncToQuantAvgPool
+from finn.transformation.qonnx.quant_act_to_multithreshold import (
+    ConvertQuantActToMultiThreshold,
+    default_filter_function_generator,
+)
+
+
+class ImportQONNXQuantization(Transformation):
+    """Import QONNX quantization metadata for Brainsmith.
+
+    Handles ONLY quantization-specific transforms:
+    1. FoldQuantWeights - Fold quantization into weight initializers
+    2. ConvertQuantActToMultiThreshold - Convert Quant/BipolarQuant to MultiThreshold
+    3. AvgPoolAndTruncToQuantAvgPool - Convert AvgPool+Trunc pattern to QuantAvgPool2d
+
+    Topology transforms (GemmToMatMul, ExtractBiasFromConv, etc.) belong in
+    finn_topology_cleanup_step, not here.
+
+    Should be run after topology cleanup, before streamlining.
+    """
+
+    def __init__(
+        self,
+        filter_function=default_filter_function_generator(max_multithreshold_bit_width=8),
+    ):
+        super().__init__()
+        self._filter_function = filter_function
+
+    def apply(self, model: ModelWrapper):
+        """Apply QONNX quantization import.
+
+        Args:
+            model: QONNX ModelWrapper (after topology cleanup)
+
+        Returns:
+            Tuple of (transformed_model, graph_modified)
+        """
+        model = model.transform(InferDataTypes())
+        model = model.transform(FoldQuantWeights())
+        model = model.transform(
+            ConvertQuantActToMultiThreshold(filter_function=self._filter_function)
+        )
+        model = model.transform(InferDataTypes())
+        model = model.transform(AvgPoolAndTruncToQuantAvgPool())
+
+        return model, False
diff --git a/brainsmith/primitives/transforms/infer_kernel.py b/brainsmith/primitives/transforms/infer_kernel.py
index b9e10753..75ecf4e6 100644
--- a/brainsmith/primitives/transforms/infer_kernel.py
+++ b/brainsmith/primitives/transforms/infer_kernel.py
@@ -108,6 +108,7 @@ def apply(self, model: ModelWrapper):
         nodes_processed = 0
         nodes_converted = 0
         nodes_failed = 0
+        kernel_index = 0  # Track sequential index for this kernel type
 
         # Iterate nodes (copy list since we'll modify it)
         for node_ind, node in enumerate(list(graph.node)):
@@ -121,7 +122,8 @@ def apply(self, model: ModelWrapper):
                 logger.debug(f"Inferring {self.kernel_name} from {node.op_type} node {node.name}")
 
                 # Delegate to kernel-specific inference (naive node creation)
-                result = self.kernel_cls.infer_from(node, model, node_ind + 1)
+                # Pass kernel_index for sequential naming
+                result = self.kernel_cls.infer_from(node, model, node_ind + 1, kernel_index=kernel_index)
 
                 # VALIDATE new kernel nodes before applying transformation
                 # Try to create KernelOp instances and validate design space
@@ -143,6 +145,16 @@ def apply(self, model: ModelWrapper):
                             raise  # Re-raise to outer catch block
 
                 # All validations passed - apply graph modifications
+
+                # Ensure opset import exists for new kernel domain
+                for new_node in result.nodes_to_insert:
+                    if new_node.domain:  # Only add if node has a domain
+                        existing_domains = {op.domain for op in model.model.opset_import}
+                        if new_node.domain not in existing_domains:
+                            import onnx.helper as oh
+                            model.model.opset_import.append(oh.make_opsetid(new_node.domain, 1))
+                            logger.debug(f"  Added opset import for domain: {new_node.domain}")
+
                 for i, new_node in enumerate(result.nodes_to_insert):
                     graph.node.insert(node_ind + 1 + i, new_node)
                     logger.debug(f"  Inserted {new_node.op_type} node {new_node.name}")
@@ -156,6 +168,7 @@ def apply(self, model: ModelWrapper):
                     logger.debug(f"  Metadata: {result.metadata}")
 
                 nodes_converted += 1
+                kernel_index += 1  # Increment index after successful conversion
                 graph_modified = True
 
             except Exception as e:
diff --git a/brainsmith/primitives/transforms/insert_duplicate_streams.py b/brainsmith/primitives/transforms/insert_duplicate_streams.py
index 5bdb707e..b9dc7b82 100644
--- a/brainsmith/primitives/transforms/insert_duplicate_streams.py
+++ b/brainsmith/primitives/transforms/insert_duplicate_streams.py
@@ -5,13 +5,19 @@
 
 """Insert DuplicateStreams layers for tensor fanout."""
 
+
+import logging
+
 from onnx import TensorProto, helper
+from onnx.onnx_pb import StringStringEntryProto
 from qonnx.core.modelwrapper import ModelWrapper
 from qonnx.transformation.base import Transformation
 from qonnx.transformation.general import SortGraph
 from qonnx.transformation.infer_datatypes import InferDataTypes
 from qonnx.transformation.infer_shapes import InferShapes
 
+logger = logging.getLogger(__name__)
+
 
 class InsertDuplicateStreams(Transformation):
     """Insert DuplicateStreams HW layer for any tensor with fanout >= 2.
@@ -121,6 +127,18 @@ def _insert_duplicator(
         # Required by FINN's SpecializeKernel transform (line 68-76)
         dup_node.attribute.append(helper.make_attribute("backend", "fpgadataflow"))
 
+        # Copy PyTorch hierarchy metadata for MLO loop rolling
+        # Infrastructure kernels must inherit hierarchy from consumers (they exist to serve them)
+        metadata_copied = self._copy_hierarchy_metadata(
+            dup_node, successors, model, output_tensor
+        )
+
+        if not metadata_copied:
+            logger.debug(
+                f"DuplicateStreams for {output_tensor}: no hierarchy metadata found "
+                f"(may be excluded from FINNLoop)"
+            )
+
         # Insert node into graph
         graph.node.insert(insert_index, dup_node)
 
@@ -133,3 +151,116 @@ def _insert_duplicator(
                     clone_idx += 1
                     # Break inner loop - one clone per consumer connection
                     break
+
+    def _copy_hierarchy_metadata(
+        self,
+        dup_node,
+        successors,
+        model: ModelWrapper,
+        output_tensor: str
+    ) -> bool:
+        """Copy PyTorch hierarchy metadata from consumers to DuplicateStreams node.
+
+        For MLO loop rolling, nodes need pkg.torch.onnx.name_scopes and
+        pkg.torch.onnx.class_hierarchy metadata to be included in FINNLoop bodies.
+
+        Infrastructure kernels inherit from consumers (not producers) because:
+        - Consumers define where the duplicated data is needed
+        - Validates all consumers in same hierarchy (no cross-loop fanout)
+        - More robust than producer (which may be optimized away)
+
+        Args:
+            dup_node: DuplicateStreams ONNX node to annotate
+            successors: Consumer nodes
+            model: ModelWrapper
+            output_tensor: Tensor being duplicated
+
+        Returns:
+            True if metadata was copied, False otherwise
+        """
+        METADATA_KEYS = ["pkg.torch.onnx.name_scopes", "pkg.torch.onnx.class_hierarchy"]
+
+        # Collect metadata from all consumers
+        consumer_metadata = []
+        for consumer in successors:
+            consumer_meta = {}
+            for prop in consumer.metadata_props:
+                if prop.key in METADATA_KEYS:
+                    consumer_meta[prop.key] = prop.value
+            if consumer_meta:
+                consumer_metadata.append(consumer_meta)
+
+        # No metadata found in any consumer
+        if not consumer_metadata:
+            # Fall back to producer
+            producer = model.find_producer(output_tensor)
+            if producer:
+                for prop in producer.metadata_props:
+                    if prop.key in METADATA_KEYS:
+                        # Use StringStringEntryProto for metadata_props
+                        new_prop = StringStringEntryProto(key=prop.key, value=prop.value)
+                        dup_node.metadata_props.append(new_prop)
+                return len([p for p in producer.metadata_props if p.key in METADATA_KEYS]) > 0
+            return False
+
+        # For loop rolling, what matters is the common prefix, not exact match
+        # E.g., "encoder.layer.0.attention.self.query" and "encoder.layer.0.attention.self.key"
+        # both belong to the same loop iteration (encoder.layer.0)
+
+        # Find longest common prefix for name_scopes
+        name_scopes_list = []
+        for meta in consumer_metadata:
+            scope_str = meta.get("pkg.torch.onnx.name_scopes", "")
+            # Parse as list (format: ['encoder', 'encoder.layer.0', ...])
+            try:
+                import ast
+                scope_list = ast.literal_eval(scope_str)
+                name_scopes_list.append(scope_list)
+            except:
+                # If parsing fails, treat as incompatible
+                name_scopes_list.append([])
+
+        # Find common prefix across all consumers
+        if name_scopes_list and all(name_scopes_list):
+            common_prefix = name_scopes_list[0]
+            for scopes in name_scopes_list[1:]:
+                # Find longest common prefix
+                common_prefix = [
+                    common_prefix[i]
+                    for i in range(min(len(common_prefix), len(scopes)))
+                    if i < len(scopes) and common_prefix[i] == scopes[i]
+                ]
+
+            # Use common prefix as the hierarchy for DuplicateStreams
+            if common_prefix:
+                # Reconstruct metadata using common prefix
+                common_hierarchy_str = str(common_prefix)
+
+                # Get class hierarchy from first consumer (should be same at prefix level)
+                class_hierarchy = consumer_metadata[0].get("pkg.torch.onnx.class_hierarchy", "")
+
+                new_prop = StringStringEntryProto(
+                    key="pkg.torch.onnx.name_scopes",
+                    value=common_hierarchy_str
+                )
+                dup_node.metadata_props.append(new_prop)
+
+                if class_hierarchy:
+                    new_prop = StringStringEntryProto(
+                        key="pkg.torch.onnx.class_hierarchy",
+                        value=class_hierarchy
+                    )
+                    dup_node.metadata_props.append(new_prop)
+
+                logger.debug(
+                    f"DuplicateStreams for {output_tensor}: using common prefix {common_prefix}"
+                )
+                return True
+
+        # Fallback: use first consumer's full metadata
+        reference_metadata = consumer_metadata[0]
+        for key, value in reference_metadata.items():
+            new_prop = StringStringEntryProto(key=key, value=value)
+            dup_node.metadata_props.append(new_prop)
+
+        return True
diff --git a/brainsmith/primitives/transforms/parallelization.py b/brainsmith/primitives/transforms/parallelization.py
index 6139522f..1f51f76f 100644
--- a/brainsmith/primitives/transforms/parallelization.py
+++ b/brainsmith/primitives/transforms/parallelization.py
@@ -864,21 +864,36 @@ def apply(self, model):
         model = model.transform(AnnotateCycles())
 
         # Two-pass relaxation: run again with achievable target if needed
+        # Skip if there are unspecialized nodes (e.g., Shuffle) that will be decomposed later
         if self.two_pass_relaxation:
-            perf_dict = model.analysis(dataflow_performance)
-            if perf_dict["max_cycles"] > self.target_cycles_per_frame:
-                # Target not achievable, run second pass with achievable target
+            # Check for abstract/unspecialized nodes that aren't HLS/RTL yet
+            has_unspecialized = any(
+                node.op_type == "Shuffle" or
+                (hasattr(node, 'domain') and node.domain == "finn.custom_op.fpgadataflow.fpgadataflow")
+                for node in model.graph.node
+            )
+
+            if has_unspecialized:
                 warnings.warn(
-                    f"Node {perf_dict['max_cycles_node_name']} is bottleneck with "
-                    f"{perf_dict['max_cycles']} cycles, running second pass"
+                    "Skipping two-pass relaxation: model contains unspecialized nodes (e.g., Shuffle) "
+                    "that will be decomposed by transpose_decomposition. Parallelization will be "
+                    "refined after decomposition if needed."
                 )
-                model = model.transform(
-                    SetParallelization(
-                        target_cycles_per_frame=perf_dict["max_cycles"],
-                        mvau_wwidth_max=self.mvau_wwidth_max,
-                        two_pass_relaxation=False,  # Prevent infinite recursion
+            else:
+                perf_dict = model.analysis(dataflow_performance)
+                if perf_dict["max_cycles"] > self.target_cycles_per_frame:
+                    # Target not achievable, run second pass with achievable target
+                    warnings.warn(
+                        f"Node {perf_dict['max_cycles_node_name']} is bottleneck with "
+                        f"{perf_dict['max_cycles']} cycles, running second pass"
+                    )
+                    model = model.transform(
+                        SetParallelization(
+                            target_cycles_per_frame=perf_dict["max_cycles"],
+                            mvau_wwidth_max=self.mvau_wwidth_max,
+                            two_pass_relaxation=False,  # Prevent infinite recursion
+                        )
                     )
-                )
 
         return (model, False)
 
diff --git a/brainsmith/primitives/transforms/refresh_design_points.py b/brainsmith/primitives/transforms/refresh_design_points.py
index 5f802c1b..9260b3c7 100644
--- a/brainsmith/primitives/transforms/refresh_design_points.py
+++ b/brainsmith/primitives/transforms/refresh_design_points.py
@@ -16,7 +16,6 @@
 from qonnx.core.modelwrapper import ModelWrapper
 from qonnx.custom_op.registry import getCustomOp
 from qonnx.transformation.base import Transformation
-from qonnx.transformation.general import ApplyConfig
 
 from brainsmith.dataflow import KernelOp
 
@@ -187,6 +186,7 @@ def make_brainsmith_cleanup_pipeline():
     2. Refreshes all kernel instances
     3. Cleans up the graph
     """
+    from finn.transformation.general import ApplyConfig
     from qonnx.transformation.fold_constants import FoldConstants
     from qonnx.transformation.general import RemoveStaticGraphInputs, RemoveUnusedTensors
 
diff --git a/brainsmith/primitives/transforms/specialize_kernels.py b/brainsmith/primitives/transforms/specialize_kernels.py
index d0b821df..1b606ed4 100644
--- a/brainsmith/primitives/transforms/specialize_kernels.py
+++ b/brainsmith/primitives/transforms/specialize_kernels.py
@@ -332,4 +332,8 @@ def _create_specialized_node(self, node, backend_name):
         # Set backend attribute
         new_node.attribute.append(helper.make_attribute("backend", language))
 
+        # Copy metadata_props (PyTorch hierarchy metadata for MLO loop rolling)
+        for prop in node.metadata_props:
+            new_node.metadata_props.append(prop)
+
         return new_node
diff --git a/brainsmith/steps/__init__.py b/brainsmith/steps/__init__.py
index 5c15aa09..75efba22 100644
--- a/brainsmith/steps/__init__.py
+++ b/brainsmith/steps/__init__.py
@@ -11,48 +11,64 @@
     step_fn = get_step("qonnx_to_finn_step")
 """
 
-# Core FINN-compatible steps
-# BERT-specific steps
-from brainsmith.steps.bert_custom_steps import (
-    bert_cleanup_step,
-    bert_streamlining_step,
-    shell_metadata_handover_step,
+# Topology cleanup steps
+from brainsmith.steps.topology_cleanup_steps import (
+    finn_topology_cleanup_step,
+    import_qonnx_quantization_step,
 )
 
-# Dataflow graph construction
-from brainsmith.steps.build_dataflow_graph import build_dataflow_graph
+# Topology optimization steps
+from brainsmith.steps.topology_optimization_steps import (
+    normalize_dataflow_layouts_step,
+)
 
-# Specialization to HW backends
-from brainsmith.steps.build_hw_graph import build_hw_graph
+# Core dataflow compilation steps
 from brainsmith.steps.core_steps import (
-    constrain_folding_and_set_pumped_compute_step,
-    qonnx_to_finn_step,
-    specialize_layers_step,
+    build_dataflow_graph,
+    insert_infrastructure_kernels_step,
+    infer_computational_kernels_step,
+    specialize_kernel_backends,
+    build_hw_graph,  # Deprecated alias
 )
 
-# Layout normalization
-from brainsmith.steps.normalize_layouts import normalize_dataflow_layouts_step
-
-# Parallelization
-from brainsmith.steps.parallelization import (
+# Hardware optimization steps
+from brainsmith.steps.hardware_optimization_steps import (
+    constrain_folding_and_set_pumped_compute_step,
     apply_parallelization_config_step,
     target_fps_parallelization_step,
+    explore_kernel_params_step,
+    minimize_bit_width_step,
 )
 
-# Parameter exploration
-from brainsmith.steps.parameter_exploration import explore_kernel_params_step
+# BERT-specific steps
+from brainsmith.steps.bert_steps import (
+    bert_topology_cleanup_step,
+    bert_cleanup_step,
+    bert_streamlining_step,
+    shell_metadata_handover_step,
+)
 
 __all__ = [
-    "qonnx_to_finn_step",
-    "specialize_layers_step",
-    "constrain_folding_and_set_pumped_compute_step",
-    "shell_metadata_handover_step",
-    "bert_cleanup_step",
-    "bert_streamlining_step",
-    "build_dataflow_graph",
-    "build_hw_graph",
-    "normalize_dataflow_layouts_step",
-    "explore_kernel_params_step",
-    "apply_parallelization_config_step",
-    "target_fps_parallelization_step",
+    # Topology cleanup
+    'finn_topology_cleanup_step',
+    'import_qonnx_quantization_step',
+    # Topology optimization
+    'normalize_dataflow_layouts_step',
+    # Core dataflow compilation
+    'build_dataflow_graph',
+    'insert_infrastructure_kernels_step',
+    'infer_computational_kernels_step',
+    'specialize_kernel_backends',
+    'build_hw_graph',  # Deprecated
+    # Hardware optimization
+    'constrain_folding_and_set_pumped_compute_step',
+    'apply_parallelization_config_step',
+    'target_fps_parallelization_step',
+    'explore_kernel_params_step',
+    'minimize_bit_width_step',
+    # BERT-specific
+    'bert_topology_cleanup_step',
+    'bert_cleanup_step',
+    'bert_streamlining_step',
+    'shell_metadata_handover_step',
 ]
diff --git a/brainsmith/steps/bert_custom_steps.py b/brainsmith/steps/bert_steps.py
similarity index 68%
rename from brainsmith/steps/bert_custom_steps.py
rename to brainsmith/steps/bert_steps.py
index 5392c644..dc8190f8 100644
--- a/brainsmith/steps/bert_custom_steps.py
+++ b/brainsmith/steps/bert_steps.py
@@ -5,6 +5,7 @@
 BERT-Specific Custom Build Steps
 
 Custom steps specifically for BERT model processing, including:
+- Model-specific topology preprocessing
 - Head and tail removal for model decomposition
 - Metadata extraction for shell integration
 - Reference I/O generation for validation
@@ -13,43 +14,53 @@
 are not general-purpose FINN dataflow compilation steps.
 """
 
-import logging
 import os
 import shutil
+import logging
 from typing import Any
 
+# Import decorator for registration
+from brainsmith.registry import step
+from brainsmith.primitives.transforms.expand_norms import ExpandNorms
+from brainsmith.primitives.transforms.extract_shell_integration_metadata import ExtractShellIntegrationMetadata
+from qonnx.transformation.general import SortCommutativeInputsInitializerLast, GiveUniqueNodeNames
+from qonnx.transformation.remove import RemoveIdentityOps
+from qonnx.transformation.infer_datatypes import InferDataTypes
 from finn.transformation.streamline.absorb import (
-    AbsorbAddIntoMultiThreshold,
-    AbsorbMulIntoMultiThreshold,
     AbsorbSignBiasIntoMultiThreshold,
+    AbsorbAddIntoMultiThreshold,
+    AbsorbMulIntoMultiThreshold
 )
+from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
 from finn.transformation.streamline.reorder import (
     MoveOpPastFork,
-    MoveScalarLinearPastInvariants,
     MoveScalarMulPastMatMul,
+    MoveScalarLinearPastInvariants
 )
-from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
-from qonnx.transformation.general import GiveUniqueNodeNames, SortCommutativeInputsInitializerLast
-from qonnx.transformation.infer_datatypes import InferDataTypes
-from qonnx.transformation.remove import RemoveIdentityOps
-
-from brainsmith.primitives.transforms.extract_shell_integration_metadata import (
-    ExtractShellIntegrationMetadata,
-)
+from finn.transformation.streamline.collapse_repeated import CollapseRepeatedMul
 
 logger = logging.getLogger(__name__)
 
-# Import decorator for registration
-from brainsmith.registry import step  # noqa: E402
-
 # === Pre-Processing ===
 
+@step(name="bert_topology_cleanup")
+def bert_topology_cleanup_step(model: Any, cfg: Any) -> Any:
+    """Model-specific topology preprocessing.
 
-@step(name="bert_cleanup")
+    Decomposes transformer-specific operations into functional primitives
+    before quantization import and FINN topology cleanup.
+    """
+    model = model.transform(ExpandNorms())
+    return model
+
+@step(name='bert_cleanup')
 def bert_cleanup_step(model: Any, cfg: Any) -> Any:
     """Basic cleanup with identity removal and input sorting."""
 
-    for transform in [SortCommutativeInputsInitializerLast(), RemoveIdentityOps()]:
+    for transform in [
+        SortCommutativeInputsInitializerLast(),
+        RemoveIdentityOps()
+    ]:
         model = model.transform(transform)
 
     return model
@@ -57,8 +68,7 @@ def bert_cleanup_step(model: Any, cfg: Any) -> Any:
 
 # === Streamlining Steps ===
 
-
-@step(name="bert_streamlining")
+@step(name='bert_streamlining')
 def bert_streamlining_step(model: Any, cfg: Any) -> Any:
     """BERT-specific streamlining with SoftMax Mul node handling.
 
@@ -79,7 +89,7 @@ def bert_streamlining_step(model: Any, cfg: Any) -> Any:
         AbsorbSignBiasIntoMultiThreshold(),
         AbsorbAddIntoMultiThreshold(),
         AbsorbMulIntoMultiThreshold(),
-        RoundAndClipThresholds(),
+        RoundAndClipThresholds()
     ]:
         model = model.transform(transform)
 
@@ -91,6 +101,7 @@ def bert_streamlining_step(model: Any, cfg: Any) -> Any:
         MoveScalarLinearPastInvariants(),
         AbsorbMulIntoMultiThreshold(),
         AbsorbAddIntoMultiThreshold(),
+        CollapseRepeatedMul()
     ]:
         model = model.transform(transform)
 
@@ -103,8 +114,7 @@ def bert_streamlining_step(model: Any, cfg: Any) -> Any:
 
 # === Metadata Steps ===
 
-
-@step(name="shell_metadata_handover")
+@step(name='shell_metadata_handover')
 def shell_metadata_handover_step(model, cfg):
     """
     Extract metadata for shell integration process.
@@ -115,17 +125,14 @@ def shell_metadata_handover_step(model, cfg):
     from finn.builder.build_dataflow_config import DataflowOutputType
 
     if DataflowOutputType.STITCHED_IP in cfg.generate_outputs:
-        if os.path.isdir(cfg.output_dir + "/stitched_ip"):
-            model = model.transform(
-                ExtractShellIntegrationMetadata(cfg.output_dir + "/stitched_ip/shell_handover.json")
-            )
+        if os.path.isdir(cfg.output_dir + '/stitched_ip'):
+            model = model.transform(ExtractShellIntegrationMetadata(
+                cfg.output_dir + "/stitched_ip/shell_handover.json"
+            ))
             # copy over the ref IO *.npy files into the stitched_ip for handover
-            shutil.copy(cfg.verify_input_npy, cfg.output_dir + "/stitched_ip")
-            shutil.copy(cfg.verify_expected_output_npy, cfg.output_dir + "/stitched_ip")
+            shutil.copy(cfg.verify_input_npy, cfg.output_dir + '/stitched_ip')
+            shutil.copy(cfg.verify_expected_output_npy, cfg.output_dir + '/stitched_ip')
             return model
         else:
-            raise RuntimeError(
-                "Stitched IP directory not found. "
-                "Ensure shell_metadata_handover runs after create_stitched_ip step."
-            )
+            raise RuntimeError(f"Error: could not find stitched IP directory so unable to create metadata. Please ensure this is called after the create_stitched_ip step")
     return model
diff --git a/brainsmith/steps/build_dataflow_graph.py b/brainsmith/steps/build_dataflow_graph.py
deleted file mode 100644
index 251b7041..00000000
--- a/brainsmith/steps/build_dataflow_graph.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-"""Build dataflow graph step for hardware mapping.
-
-This step orchestrates the complete dataflow graph construction through two phases:
-1. Infrastructure kernels: Inserted via topology analysis (InsertInfrastructureKernels)
-2. Computational kernels: Inferred via pattern matching (InferKernels)
-
-The step automatically splits kernel_selections into these two categories based
-on the is_infrastructure metadata flag, then dispatches to the appropriate transform.
-"""
-import logging
-from typing import Any
-
-from qonnx.transformation.general import GiveUniqueNodeNames
-
-from brainsmith.primitives.transforms import InferKernels, InsertInfrastructureKernels
-from brainsmith.registry import get_component_metadata, get_kernel, step
-
-logger = logging.getLogger(__name__)
-
-
-@step(name="build_dataflow_graph")
-def build_dataflow_graph(model: Any, cfg: Any) -> Any:
-    """Build complete dataflow graph from kernel selections (two-phase workflow).
-
-    Extracts kernel classes from cfg.kernel_selections and splits them into:
-    1. Infrastructure kernels (is_infrastructure=True) → InsertInfrastructureKernels
-    2. Computational kernels (is_infrastructure=False) → InferKernels
-
-    This two-phase approach ensures infrastructure nodes (DuplicateStreams, FIFO, etc.)
-    are inserted first via topology analysis, then computational nodes are pattern-matched.
-
-    Args:
-        model: ONNX model to transform
-        cfg: Build configuration with kernel_selections attribute
-
-    Returns:
-        Transformed model with complete dataflow graph (infrastructure + computational kernels)
-    """
-    kernel_selections = getattr(cfg, "kernel_selections", None)
-    if not kernel_selections:
-        logger.debug("No kernel selections configured, skipping inference")
-        return model
-
-    logger.debug(f"Processing {len(kernel_selections)} kernel(s)...")
-
-    # Split kernel classes into infrastructure and computational
-    infrastructure_kernels = []
-    computational_kernels = []
-
-    for kernel_name, _ in kernel_selections:
-        try:
-            kernel_class = get_kernel(kernel_name)
-            metadata = get_component_metadata(kernel_name, "kernel")
-
-            if metadata.is_infrastructure:
-                infrastructure_kernels.append(kernel_class)
-                logger.debug(f"  {kernel_name} (infrastructure)")
-            else:
-                computational_kernels.append(kernel_class)
-                logger.debug(f"  {kernel_name} (computational)")
-        except KeyError:
-            logger.error(f"  Kernel not found in registry: {kernel_name}")
-
-    # Phase 1: Insert infrastructure kernels via topology analysis
-    if infrastructure_kernels:
-        logger.debug(f"Inserting {len(infrastructure_kernels)} infrastructure kernel(s)...")
-        model = model.transform(InsertInfrastructureKernels(infrastructure_kernels))
-
-    # Phase 2: Infer computational kernels via pattern matching
-    if computational_kernels:
-        logger.debug(f"Inferring {len(computational_kernels)} computational kernel(s)...")
-        model = model.transform(InferKernels(computational_kernels))
-
-    # Ensure all nodes have unique names after graph construction
-    # Some legacy FINN transforms (e.g., InferElementwiseBinaryOperation) create
-    # nodes without names, which causes issues in downstream steps like partitioning
-    model = model.transform(GiveUniqueNodeNames())
-    logger.debug("Assigned unique names to all nodes after dataflow graph construction")
-
-    return model
diff --git a/brainsmith/steps/build_hw_graph.py b/brainsmith/steps/build_hw_graph.py
deleted file mode 100644
index adc24470..00000000
--- a/brainsmith/steps/build_hw_graph.py
+++ /dev/null
@@ -1,164 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-"""Build hardware graph step combining partitioning and specialization.
-
-This step combines two critical phases of the dataflow compilation pipeline:
-1. Dataflow partitioning: Separates hardware-accelerated nodes into isolated subgraphs
-2. Backend specialization: Converts generic kernel nodes to HLS/RTL implementations
-
-The combined step simplifies the blueprint configuration and ensures proper
-sequencing of these tightly-coupled transformations.
-"""
-
-import logging
-import os
-from typing import Any
-
-from finn.transformation.fpgadataflow.create_dataflow_partition import CreateDataflowPartition
-from finn.util.basic import getHWCustomOp
-from qonnx.core.modelwrapper import ModelWrapper
-from qonnx.transformation.general import (
-    ApplyConfig,
-    GiveUniqueNodeNames,
-)
-from qonnx.transformation.infer_datatypes import InferDataTypes
-from qonnx.transformation.infer_shapes import InferShapes
-from qonnx.util.config import extract_model_config_to_json
-
-from brainsmith.primitives.transforms.specialize_kernels import SpecializeKernels
-from brainsmith.registry import step
-
-logger = logging.getLogger(__name__)
-
-
-@step(name="build_hw_graph")
-def build_hw_graph(model: Any, cfg: Any) -> Any:
-    """Build complete hardware dataflow graph via partitioning + specialization.
-
-    This step combines create_dataflow_partition and specialize_layers into a
-    unified transformation that:
-
-    1. **Partitioning Phase**: Separates consecutive groups of HWCustomOp nodes
-       into StreamingDataflowPartition nodes, which point to separate ONNX files.
-       Only dataflow accelerator synthesis can be performed on these HW subgraphs.
-
-    2. **Specialization Phase**: Converts generic hardware kernel nodes to
-       specialized backend implementations (HLS or RTL) based on kernel_selections
-       config and constraint checking.
-
-    The step handles both Brainsmith KernelOp nodes and legacy FINN HWCustomOp nodes,
-    ensuring compatibility with mixed graphs.
-
-    Args:
-        model: ModelWrapper containing the ONNX model with hardware kernel nodes
-        cfg: Build configuration with:
-            - output_dir: Output directory for intermediate models and configs
-            - kernel_selections: Backend priority lists for specialization
-            - specialize_layers_config_file: Optional user config for manual overrides
-
-    Returns:
-        ModelWrapper containing the specialized dataflow partition model
-
-    Blueprint usage:
-        steps:
-          - build_dataflow_graph      # Infer kernels first
-          - build_hw_graph            # Combined partitioning + specialization
-          - apply_folding_config      # Then apply parallelization
-
-    Implementation notes:
-        - Creates template_specialize_layers_config.json for user reference
-        - Supports single StreamingDataflowPartition only (FINN limitation)
-        - Returns the dataflow partition model, not the parent model
-        - Saves parent model to intermediate_models/dataflow_parent.onnx if enabled
-    """
-    logger.debug("Building hardware dataflow graph (partitioning + specialization)...")
-
-    # ========================================================================
-    # Phase 1: Create Dataflow Partition
-    # ========================================================================
-
-    logger.debug("Phase 1: Creating dataflow partition...")
-
-    partition_dir = os.path.join(cfg.output_dir, "intermediate_models", "supported_op_partitions")
-
-    # Use FINN's CreateDataflowPartition to separate HW nodes
-    parent_model = model.transform(CreateDataflowPartition(partition_model_dir=partition_dir))
-
-    # Extract the dataflow partition model
-    sdp_nodes = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")
-
-    if len(sdp_nodes) == 0:
-        logger.error("No StreamingDataflowPartition nodes found after partitioning")
-        logger.error("")
-        logger.error("This typically means one or more nodes failed to be converted to hardware:")
-        logger.error("  1. Kernel inference failed - ONNX nodes were not matched to any kernel")
-        logger.error("     → Check that kernels are listed in blueprint design_space.kernels")
-        logger.error("     → Verify nodes are supported by the selected kernels")
-        logger.error(
-            "  2. Backend specialization failed - kernels lack viable backend implementations"
-        )
-        logger.error("     → Check that backends are configured in kernel_selections")
-        logger.error("     → Verify RTL backend constraints are satisfied (see SpecializeKernels)")
-        logger.error("")
-        logger.error("Debug steps:")
-        logger.error("  - Inspect intermediate_models/ to see which nodes remain")
-        logger.error("  - Check logs for kernel inference warnings")
-        logger.error("  - Verify all ONNX ops have corresponding kernel transforms")
-        raise RuntimeError(
-            "No hardware dataflow partition created. "
-            "One or more nodes failed kernel inference or backend specialization. "
-            "See logs above for details."
-        )
-
-    if len(sdp_nodes) > 1:
-        logger.warning(
-            f"Found {len(sdp_nodes)} StreamingDataflowPartition nodes. "
-            "Only single partition is officially supported by FINN."
-        )
-
-    # Get the dataflow partition model file
-    sdp_node = sdp_nodes[0]
-    sdp_node_inst = getHWCustomOp(sdp_node, parent_model)
-    dataflow_model_filename = sdp_node_inst.get_nodeattr("model")
-
-    logger.debug(f"Dataflow partition extracted: {dataflow_model_filename}")
-
-    # Save parent model if requested
-    if cfg.save_intermediate_models:
-        parent_model_path = os.path.join(
-            cfg.output_dir, "intermediate_models", "dataflow_parent.onnx"
-        )
-        parent_model.save(parent_model_path)
-        logger.debug(f"Saved parent model: {parent_model_path}")
-
-    # Load the dataflow partition for specialization
-    model = ModelWrapper(dataflow_model_filename)
-
-    # Create template config for user reference
-    template_config_path = os.path.join(cfg.output_dir, "template_specialize_layers_config.json")
-    extract_model_config_to_json(model, template_config_path, ["preferred_impl_style"])
-    logger.debug(f"Created template config: {template_config_path}")
-
-    # ========================================================================
-    # Phase 2: Specialize Layers
-    # ========================================================================
-
-    logger.debug("Phase 2: Specializing hardware layers...")
-
-    # Apply user config if provided (manual overrides)
-    if cfg.specialize_layers_config_file is not None:
-        logger.debug(f"Applying user config: {cfg.specialize_layers_config_file}")
-        model = model.transform(GiveUniqueNodeNames())
-        model = model.transform(ApplyConfig(cfg.specialize_layers_config_file))
-
-    # Run registry-based backend specialization
-    logger.debug("Running registry-based backend specialization...")
-    model = model.transform(SpecializeKernels(cfg))
-
-    # Clean up and infer properties
-    logger.debug("Running cleanup transformations...")
-    for transform in [GiveUniqueNodeNames(), InferShapes(), InferDataTypes()]:
-        model = model.transform(transform)
-
-    return model
diff --git a/brainsmith/steps/core_steps.py b/brainsmith/steps/core_steps.py
index b9bd1904..4646dd9b 100644
--- a/brainsmith/steps/core_steps.py
+++ b/brainsmith/steps/core_steps.py
@@ -2,78 +2,395 @@
 # Licensed under the MIT License.
 
 """
-Core FINN-compatible Build Steps
+Core Dataflow Compilation Steps
 
-Brainsmith implementations of core FINN dataflow compiler steps.
-These steps use the comprehensive component registry to access
-transforms from QONNX, FINN, and Brainsmith.
+Core steps for building and specializing the hardware dataflow graph:
+- Dataflow graph construction (infrastructure + computational kernel inference)
+- Backend specialization (HLS/RTL selection and dataflow partitioning)
+
+These steps form the central compilation pipeline for dataflow accelerators.
 """
 
 import logging
+import os
 from typing import Any
 
-from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
-from qonnx.transformation.fold_constants import FoldConstants
-from qonnx.transformation.general import (
-    ApplyConfig,
-    ConvertDivToMul,
-    GiveUniqueNodeNames,
-)
+from finn.transformation.fpgadataflow.create_dataflow_partition import CreateDataflowPartition
+from finn.transformation.general import ApplyConfig
+from finn.util.basic import getHWCustomOp
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.general import GiveUniqueNodeNames
 from qonnx.transformation.infer_datatypes import InferDataTypes
 from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.util.config import extract_model_config_to_json
 
-from brainsmith.primitives.transforms.expand_norms import ExpandNorms
-from brainsmith.primitives.transforms.set_pumped_compute import SetPumpedCompute
+from brainsmith.primitives.transforms import InferKernels, InsertInfrastructureKernels
 from brainsmith.primitives.transforms.specialize_kernels import SpecializeKernels
-from brainsmith.primitives.transforms.temp_shuffle_fixer import TempShuffleFixer
+from brainsmith.registry import get_component_metadata, get_kernel, step
 
 logger = logging.getLogger(__name__)
 
-# Import decorator for registration
-from brainsmith.registry import step  # noqa: E402
 
-# === Conversion Steps ===
+# === Dataflow Graph Construction ===
+
+
+@step(name="build_dataflow_graph")
+def build_dataflow_graph(model: Any, cfg: Any) -> Any:
+    """Build complete dataflow graph from kernel selections (two-phase workflow).
+
+    Extracts kernel classes from cfg.kernel_selections and splits them into:
+    1. Infrastructure kernels (is_infrastructure=True) → InsertInfrastructureKernels
+    2. Computational kernels (is_infrastructure=False) → InferKernels
+
+    This two-phase approach ensures infrastructure nodes (DuplicateStreams, FIFO, etc.)
+    are inserted first via topology analysis, then computational nodes are pattern-matched.
+
+    Args:
+        model: ONNX model to transform
+        cfg: Build configuration with kernel_selections attribute
+
+    Returns:
+        Transformed model with complete dataflow graph (infrastructure + computational kernels)
+    """
+    kernel_selections = getattr(cfg, "kernel_selections", None)
+    if not kernel_selections:
+        logger.debug("No kernel selections configured, skipping inference")
+        return model
+
+    logger.debug(f"Processing {len(kernel_selections)} kernel(s)...")
+
+    # Split kernel classes into infrastructure and computational
+    infrastructure_kernels = []
+    computational_kernels = []
+
+    for kernel_name, _ in kernel_selections:
+        try:
+            kernel_class = get_kernel(kernel_name)
+            metadata = get_component_metadata(kernel_name, "kernel")
+
+            if metadata.is_infrastructure:
+                infrastructure_kernels.append(kernel_class)
+                logger.debug(f"  {kernel_name} (infrastructure)")
+            else:
+                computational_kernels.append(kernel_class)
+                logger.debug(f"  {kernel_name} (computational)")
+        except KeyError:
+            logger.error(f"  Kernel not found in registry: {kernel_name}")
+
+    # Phase 1: Insert infrastructure kernels via topology analysis
+    if infrastructure_kernels:
+        logger.debug(f"Inserting {len(infrastructure_kernels)} infrastructure kernel(s)...")
+        model = model.transform(InsertInfrastructureKernels(infrastructure_kernels))
+
+    # Phase 2: Infer computational kernels via pattern matching
+    if computational_kernels:
+        logger.debug(f"Inferring {len(computational_kernels)} computational kernel(s)...")
+        model = model.transform(InferKernels(computational_kernels))
+
+    # Ensure all nodes have unique names after graph construction
+    # Some legacy FINN transforms (e.g., InferElementwiseBinaryOperation) create
+    # nodes without names, which causes issues in downstream steps like partitioning
+    model = model.transform(GiveUniqueNodeNames())
+    logger.debug("Assigned unique names to all nodes after dataflow graph construction")
+
+    return model
+
+
+@step(name='insert_infrastructure_kernels')
+def insert_infrastructure_kernels_step(model: Any, cfg: Any) -> Any:
+    """Insert infrastructure kernels via topology analysis (Phase 1 of dataflow graph build).
+
+    Infrastructure kernels are inserted based on graph topology and connectivity patterns,
+    rather than pattern matching. Examples include:
+    - DuplicateStreams (for fan-out)
+    - FIFOs (for buffering)
+    - AddStreams (for fan-in)
+
+    This step extracts infrastructure kernels from cfg.kernel_selections (those with
+    is_infrastructure=True metadata) and applies InsertInfrastructureKernels transform.
+
+    Use this step when you want finer control over the build pipeline, running
+    infrastructure insertion separately from computational kernel inference.
+
+    Args:
+        model: ONNX model to transform
+        cfg: Build configuration with kernel_selections attribute
+
+    Returns:
+        Transformed model with infrastructure kernels inserted
+
+    Blueprint usage:
+        steps:
+          - insert_infrastructure_kernels  # Phase 1: topology-based insertion
+          - infer_computational_kernels    # Phase 2: pattern-based inference
+
+    See also:
+        - build_dataflow_graph: Combined step that runs both phases
+        - infer_computational_kernels: Phase 2 only
+    """
+    kernel_selections = getattr(cfg, 'kernel_selections', None)
+    if not kernel_selections:
+        logger.debug("No kernel selections configured, skipping infrastructure insertion")
+        return model
+
+    logger.debug(f"Processing {len(kernel_selections)} kernel selection(s)...")
+
+    # Extract only infrastructure kernels
+    infrastructure_kernels = []
+
+    for kernel_name, _ in kernel_selections:
+        try:
+            kernel_class = get_kernel(kernel_name)
+            metadata = get_component_metadata(kernel_name, 'kernel')
+
+            if metadata.is_infrastructure:
+                infrastructure_kernels.append(kernel_class)
+                logger.debug(f"  {kernel_name} (infrastructure)")
+        except KeyError:
+            logger.error(f"  Kernel not found in registry: {kernel_name}")
+
+    # Insert infrastructure kernels via topology analysis
+    if infrastructure_kernels:
+        logger.debug(f"Inserting {len(infrastructure_kernels)} infrastructure kernel(s)...")
+        model = model.transform(InsertInfrastructureKernels(infrastructure_kernels))
+    else:
+        logger.debug("No infrastructure kernels selected, skipping insertion")
+
+    return model
+
+
+@step(name='infer_computational_kernels')
+def infer_computational_kernels_step(model: Any, cfg: Any) -> Any:
+    """Infer computational kernels via pattern matching (Phase 2 of dataflow graph build).
+
+    Computational kernels are inferred by matching ONNX node patterns against kernel
+    transform patterns. Examples include:
+    - MatMul → MVAU
+    - LayerNorm → LayerNorm_hls
+    - Transpose → Shuffle
+    - Add/Mul → ElementwiseBinaryOp
 
+    This step extracts computational kernels from cfg.kernel_selections (those with
+    is_infrastructure=False metadata) and applies InferKernels transform.
 
-@step(name="qonnx_to_finn")
-def qonnx_to_finn_step(model: Any, cfg: Any) -> Any:
-    """Convert QONNX to FINN opset."""
+    Use this step when you want finer control over the build pipeline, running
+    computational inference separately from infrastructure insertion.
 
-    for transform in [ExpandNorms(), FoldConstants(), ConvertDivToMul(), ConvertQONNXtoFINN()]:
-        model = model.transform(transform)
+    Args:
+        model: ONNX model to transform
+        cfg: Build configuration with kernel_selections attribute
+
+    Returns:
+        Transformed model with computational kernels inferred and unique node names
+
+    Blueprint usage:
+        steps:
+          - insert_infrastructure_kernels  # Phase 1: topology-based insertion
+          - infer_computational_kernels    # Phase 2: pattern-based inference
+
+    Implementation notes:
+        - Applies GiveUniqueNodeNames after inference to fix legacy FINN transforms
+        - Some FINN transforms (e.g., InferElementwiseBinaryOperation) create nodes
+          without names, which causes issues in downstream partitioning
+
+    See also:
+        - build_dataflow_graph: Combined step that runs both phases
+        - insert_infrastructure_kernels: Phase 1 only
+    """
+    kernel_selections = getattr(cfg, 'kernel_selections', None)
+    if not kernel_selections:
+        logger.debug("No kernel selections configured, skipping kernel inference")
+        return model
+
+    logger.debug(f"Processing {len(kernel_selections)} kernel selection(s)...")
+
+    # Extract only computational kernels
+    computational_kernels = []
+
+    for kernel_name, _ in kernel_selections:
+        try:
+            kernel_class = get_kernel(kernel_name)
+            metadata = get_component_metadata(kernel_name, 'kernel')
+
+            if not metadata.is_infrastructure:
+                computational_kernels.append(kernel_class)
+                logger.debug(f"  {kernel_name} (computational)")
+        except KeyError:
+            logger.error(f"  Kernel not found in registry: {kernel_name}")
+
+    # Infer computational kernels via pattern matching
+    if computational_kernels:
+        logger.debug(f"Inferring {len(computational_kernels)} computational kernel(s)...")
+        model = model.transform(InferKernels(computational_kernels))
+    else:
+        logger.debug("No computational kernels selected, skipping inference")
+
+    # Ensure all nodes have unique names after graph construction
+    # Some legacy FINN transforms (e.g., InferElementwiseBinaryOperation) create
+    # nodes without names, which causes issues in downstream steps like partitioning
+    model = model.transform(GiveUniqueNodeNames())
+    logger.debug("Assigned unique names to all nodes after computational kernel inference")
 
     return model
 
 
-# === Hardware Steps ===
+# === Backend Specialization ===
+
+
+@step(name='specialize_kernel_backends')
+def specialize_kernel_backends(model: Any, cfg: Any) -> Any:
+    """Specialize kernel backends via partitioning + backend selection.
+
+    This step combines create_dataflow_partition and specialize_layers into a
+    unified transformation that:
+
+    1. **Partitioning Phase**: Separates consecutive groups of HWCustomOp nodes
+       into StreamingDataflowPartition nodes, which point to separate ONNX files.
+       Only dataflow accelerator synthesis can be performed on these HW subgraphs.
+
+    2. **Specialization Phase**: Converts generic hardware kernel nodes to
+       specialized backend implementations (HLS or RTL) based on kernel_selections
+       config and constraint checking.
+
+    The step handles both Brainsmith KernelOp nodes and legacy FINN HWCustomOp nodes,
+    ensuring compatibility with mixed graphs.
+
+    Args:
+        model: ModelWrapper containing the ONNX model with hardware kernel nodes
+        cfg: Build configuration with:
+            - output_dir: Output directory for intermediate models and configs
+            - kernel_selections: Backend priority lists for specialization
+            - specialize_layers_config_file: Optional user config for manual overrides
+
+    Returns:
+        ModelWrapper containing the specialized dataflow partition model
 
+    Blueprint usage:
+        steps:
+          - build_dataflow_graph         # Infer kernels first
+          - specialize_kernel_backends   # Combined partitioning + specialization
+          - apply_folding_config         # Then apply parallelization
 
-@step(name="specialize_layers")
-def specialize_layers_step(model: Any, cfg: Any) -> Any:
-    """Specialize hardware layers using registry-based backend discovery."""
+    Implementation notes:
+        - Creates template_specialize_layers_config.json for user reference
+        - Supports single StreamingDataflowPartition only (FINN limitation)
+        - Returns the dataflow partition model, not the parent model
+        - Saves parent model to intermediate_models/dataflow_parent.onnx if enabled
+    """
+    logger.debug("Building hardware dataflow graph (partitioning + specialization)...")
 
+    # ========================================================================
+    # Phase 1: Create Dataflow Partition
+    # ========================================================================
+
+    logger.debug("Phase 1: Creating dataflow partition...")
+
+    partition_dir = os.path.join(cfg.output_dir, "intermediate_models", "supported_op_partitions")
+
+    # Use FINN's CreateDataflowPartition to separate HW nodes
+    parent_model = model.transform(CreateDataflowPartition(partition_model_dir=partition_dir))
+
+    # Extract the dataflow partition model
+    sdp_nodes = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")
+
+    if len(sdp_nodes) == 0:
+        logger.error("No StreamingDataflowPartition nodes found after partitioning")
+        logger.error("")
+        logger.error("This typically means one or more nodes failed to be converted to hardware:")
+        logger.error("  1. Kernel inference failed - ONNX nodes were not matched to any kernel")
+        logger.error("     → Check that kernels are listed in blueprint design_space.kernels")
+        logger.error("     → Verify nodes are supported by the selected kernels")
+        logger.error(
+            "  2. Backend specialization failed - kernels lack viable backend implementations"
+        )
+        logger.error("     → Check that backends are configured in kernel_selections")
+        logger.error("     → Verify RTL backend constraints are satisfied (see SpecializeKernels)")
+        logger.error("")
+        logger.error("Debug steps:")
+        logger.error("  - Inspect intermediate_models/ to see which nodes remain")
+        logger.error("  - Check logs for kernel inference warnings")
+        logger.error("  - Verify all ONNX ops have corresponding kernel transforms")
+        raise RuntimeError(
+            "No hardware dataflow partition created. "
+            "One or more nodes failed kernel inference or backend specialization. "
+            "See logs above for details."
+        )
+
+    if len(sdp_nodes) > 1:
+        logger.warning(
+            f"Found {len(sdp_nodes)} StreamingDataflowPartition nodes. "
+            "Only single partition is officially supported by FINN."
+        )
+
+    # Get the dataflow partition model file
+    sdp_node = sdp_nodes[0]
+    sdp_node_inst = getHWCustomOp(sdp_node, parent_model)
+    dataflow_model_filename = sdp_node_inst.get_nodeattr("model")
+
+    logger.debug(f"Dataflow partition extracted: {dataflow_model_filename}")
+
+    # Save parent model if requested
+    if cfg.save_intermediate_models:
+        parent_model_path = os.path.join(
+            cfg.output_dir, "intermediate_models", "dataflow_parent.onnx"
+        )
+        parent_model.save(parent_model_path)
+        logger.debug(f"Saved parent model: {parent_model_path}")
+
+    # Load the dataflow partition for specialization
+    model = ModelWrapper(dataflow_model_filename)
+
+    # Create template config for user reference
+    template_config_path = os.path.join(cfg.output_dir, "template_specialize_layers_config.json")
+    extract_model_config_to_json(model, template_config_path, ["preferred_impl_style"])
+    logger.debug(f"Created template config: {template_config_path}")
+
+    # ========================================================================
+    # Phase 2: Specialize Layers
+    # ========================================================================
+
+    logger.debug("Phase 2: Specializing hardware layers...")
+
+    # Apply user config if provided (manual overrides)
     if cfg.specialize_layers_config_file is not None:
+        logger.debug(f"Applying user config: {cfg.specialize_layers_config_file}")
         model = model.transform(GiveUniqueNodeNames())
         model = model.transform(ApplyConfig(cfg.specialize_layers_config_file))
 
-    # Run Brainsmith registry-based specialization first
-    model = model.transform(SpecializeKernels(cfg))
-
-    # Run FINN's step_specialize_layers as catch-all for any remaining ops
-    # model = step_specialize_layers(model, cfg)
+    # Run registry-based backend specialization
+    logger.debug("Running registry-based backend specialization...")
+    model = model.transform(
+        SpecializeKernels(cfg),
+        apply_to_subgraphs=True  # Support MLO: specialize kernels in FINNLoop bodies
+    )
 
-    for transform in [GiveUniqueNodeNames(), InferShapes(), InferDataTypes()]:
-        model = model.transform(transform)
+    # Clean up and infer properties
+    logger.debug("Running cleanup transformations...")
+    for transform in [
+        GiveUniqueNodeNames(),
+        InferShapes(),
+        InferDataTypes()
+    ]:
+        model = model.transform(transform, apply_to_subgraphs=True)
 
     return model
 
 
-# === Optimization Steps ===
+# Backward compatibility alias
+@step(name='build_hw_graph')
+def build_hw_graph(model: Any, cfg: Any) -> Any:
+    """Legacy alias for specialize_kernel_backends (backward compatibility).
 
+    DEPRECATED: Use 'specialize_kernel_backends' instead.
 
-@step(name="constrain_folding_and_set_pumped_compute")
-def constrain_folding_and_set_pumped_compute_step(model, cfg):
-    """Apply optimizations including folding constraints and pumped compute."""
-    for transform in [TempShuffleFixer(), SetPumpedCompute()]:
-        model = model.transform(transform)
-    return model
+    This alias maintains compatibility with existing blueprints that use
+    the old 'build_hw_graph' step name. New blueprints should use the
+    clearer 'specialize_kernel_backends' name.
+
+    See specialize_kernel_backends() for full documentation.
+    """
+    logger.warning(
+        "Step 'build_hw_graph' is deprecated. "
+        "Use 'specialize_kernel_backends' instead for clarity."
+    )
+    return specialize_kernel_backends(model, cfg)
diff --git a/brainsmith/steps/hardware_optimization_steps.py b/brainsmith/steps/hardware_optimization_steps.py
new file mode 100644
index 00000000..40df46da
--- /dev/null
+++ b/brainsmith/steps/hardware_optimization_steps.py
@@ -0,0 +1,199 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""
+Hardware Optimization Steps
+
+Hardware-specific optimizations including parallelization configuration,
+FPS-based auto-parallelization, folding constraints, and parameter exploration.
+"""
+
+import logging
+from typing import Any
+
+from finn.transformation.fpgadataflow.minimize_accumulator_width import MinimizeAccumulatorWidth
+from finn.transformation.fpgadataflow.minimize_weight_bit_width import MinimizeWeightBitWidth
+from finn.util.basic import getHWCustomOp
+from qonnx.transformation.general import GiveUniqueNodeNames
+from qonnx.transformation.infer_datatypes import InferDataTypes
+
+from brainsmith.primitives.transforms.parallelization import (
+    ApplyParallelizationConfig,
+    SetParallelization,
+)
+from brainsmith.primitives.transforms.set_pumped_compute import SetPumpedCompute
+from brainsmith.primitives.transforms.temp_shuffle_fixer import TempShuffleFixer
+from brainsmith.registry import step
+
+logger = logging.getLogger(__name__)
+
+
+@step(name="constrain_folding_and_set_pumped_compute")
+def constrain_folding_and_set_pumped_compute_step(model, cfg):
+    """Apply optimizations including folding constraints and pumped compute."""
+    for transform in [TempShuffleFixer(), SetPumpedCompute()]:
+        model = model.transform(transform)
+    return model
+
+
+@step(name="apply_parallelization_config")
+def apply_parallelization_config_step(model: Any, cfg: Any) -> Any:
+    """Apply parallelization config from JSON file.
+
+    Drop-in replacement for FINN's ApplyConfig for parallelization.
+    Works with both FINN HWCustomOp and Brainsmith KernelOp nodes.
+
+    Config file path is read from cfg.folding_config_file (FINN convention).
+    """
+    config_file = getattr(cfg, "folding_config_file", None)
+
+    if config_file is None:
+        logger.warning("No folding_config_file specified in config, skipping parallelization")
+        return model
+
+    # Handle FINNLoop node naming before applying config
+    model = model.transform(GiveUniqueNodeNames())
+
+    loop_nodes = model.get_nodes_by_op_type("FINNLoop")
+    for node in loop_nodes:
+        node_inst = getHWCustomOp(node, model)
+        loop_body = node_inst.get_nodeattr("body")
+        loop_body = loop_body.transform(
+            GiveUniqueNodeNames(prefix=node.name + "_")
+        )
+        node_inst.set_nodeattr("body", loop_body.graph)
+
+    logger.debug(f"Applying parallelization config from: {config_file}")
+
+    # Apply to both top-level and FINNLoop subgraphs
+    model = model.transform(
+        ApplyParallelizationConfig(config_file),
+        apply_to_subgraphs=True
+    )
+
+    return model
+
+
+@step(name="target_fps_parallelization")
+def target_fps_parallelization_step(model: Any, cfg: Any) -> Any:
+    """Auto-generate parallelization from target FPS.
+
+    Drop-in replacement for FINN's SetFolding/target_fps_parallelization.
+    Works with both FINN HWCustomOp and Brainsmith KernelOp nodes.
+
+    Target cycles are calculated from cfg.target_fps and cfg.synth_clk_period_ns:
+        target_cycles = (1 / target_fps) / (clock_period_ns * 1e-9)
+    """
+    target_fps = getattr(cfg, "target_fps", None)
+
+    if target_fps is None:
+        logger.warning("No target_fps specified in config, skipping auto-parallelization")
+        return model
+
+    # Get clock period (default to 5ns if not specified)
+    clock_period_ns = getattr(cfg, "synth_clk_period_ns", 5.0)
+
+    # Calculate target cycles from FPS
+    target_cycles = int(1e9 / (target_fps * clock_period_ns))
+
+    logger.debug(
+        f"Auto-generating parallelization for target_fps={target_fps}, "
+        f"clock={clock_period_ns}ns, target_cycles={target_cycles}"
+    )
+
+    # Get optional MVAU weight stream width constraint (default 36 bits)
+    mvau_wwidth_max = getattr(cfg, "mvau_wwidth_max", 36)
+
+    # Get optional two-pass relaxation flag (default True)
+    two_pass_relaxation = getattr(cfg, "two_pass_relaxation", True)
+
+    # Apply to both top-level and FINNLoop subgraphs
+    model = model.transform(
+        SetParallelization(
+            target_cycles_per_frame=target_cycles,
+            mvau_wwidth_max=mvau_wwidth_max,
+            two_pass_relaxation=two_pass_relaxation,
+        ),
+        apply_to_subgraphs=True,
+        use_preorder_traversal=False,
+    )
+
+    # Post-process FINNLoop bodies to ensure unique names and persist changes
+    model = model.transform(GiveUniqueNodeNames())
+    loop_nodes = model.get_nodes_by_op_type("FINNLoop")
+    for node in loop_nodes:
+        node_inst = getHWCustomOp(node, model)
+        loop_body = node_inst.get_nodeattr("body")
+        loop_body = loop_body.transform(GiveUniqueNodeNames(prefix=node.name + "_"))
+        node_inst.set_nodeattr("body", loop_body.graph)
+
+    return model
+
+
+@step(name="explore_kernel_params")
+def explore_kernel_params_step(model, cfg):
+    """Parameter exploration for design space exploration (DSE).
+
+    Explores different parallelization configurations to find optimal
+    hardware resource utilization and performance trade-offs.
+    """
+    # Import here to avoid circular dependency
+    from brainsmith.primitives.transforms.parameter_exploration import ExploreKernelParams
+
+    if not hasattr(cfg, 'param_exploration_config'):
+        logger.warning("No param_exploration_config specified, skipping parameter exploration")
+        return model
+
+    logger.debug("Running parameter exploration...")
+    model = model.transform(ExploreKernelParams(cfg.param_exploration_config))
+
+    return model
+
+
+@step(name="minimize_bit_width")
+def minimize_bit_width_step(model: Any, cfg: Any) -> Any:
+    """Tighten weight and accumulator bit widths for each layer.
+
+    Brainsmith version that skips RoundAndClipThresholds since it's
+    applied elsewhere in the Brainsmith build flow.
+
+    This step:
+    1. Minimizes weight bit widths (MinimizeWeightBitWidth)
+    2. Minimizes accumulator bit widths (MinimizeAccumulatorWidth)
+    3. Propagates datatype changes (InferDataTypes)
+
+    All transforms are applied to subgraphs (FINNLoop bodies) as well.
+
+    Args:
+        model: ModelWrapper containing the dataflow graph
+        cfg: Build configuration object
+
+    Returns:
+        Transformed model with minimized bit widths
+    """
+    # Check if bit width minimization is enabled in config
+    minimize_enabled = getattr(cfg, "minimize_bit_width", True)
+
+    if not minimize_enabled:
+        logger.info("Bit width minimization disabled in config")
+        return model
+
+    logger.info("Minimizing bit widths (weights, accumulators)")
+
+    # 1. Minimize weight bit widths
+    logger.debug("Running MinimizeWeightBitWidth...")
+    model = model.transform(MinimizeWeightBitWidth(), apply_to_subgraphs=True)
+
+    # 2. Minimize accumulator bit widths
+    logger.debug("Running MinimizeAccumulatorWidth...")
+    model = model.transform(MinimizeAccumulatorWidth(), apply_to_subgraphs=True)
+
+    # NOTE: RoundAndClipThresholds is applied elsewhere in Brainsmith flow
+
+    # 3. Propagate datatype changes through the network
+    logger.debug("Running InferDataTypes to propagate changes...")
+    model = model.transform(InferDataTypes(), apply_to_subgraphs=True)
+
+    logger.info("Bit width minimization complete")
+
+    return model
diff --git a/brainsmith/steps/parallelization.py b/brainsmith/steps/parallelization.py
deleted file mode 100644
index 53739f3d..00000000
--- a/brainsmith/steps/parallelization.py
+++ /dev/null
@@ -1,116 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-"""Parallelization transformation steps.
-
-These steps provide drop-in replacements for FINN's parallelization pipeline,
-working with both legacy FINN HWCustomOp nodes and modern Brainsmith KernelOp nodes.
-
-The step layer focuses on extracting configuration from the build system (cfg),
-while ApplyParallelizationConfig and SetParallelization handle the actual logic.
-"""
-import logging
-from typing import Any
-
-from brainsmith.primitives.transforms.parallelization import (
-    ApplyParallelizationConfig,
-    SetParallelization,
-)
-from brainsmith.registry import step
-
-logger = logging.getLogger(__name__)
-
-
-@step(name="apply_parallelization_config")
-def apply_parallelization_config_step(model: Any, cfg: Any) -> Any:
-    """Apply parallelization config from JSON file.
-
-    Drop-in replacement for FINN's ApplyConfig for parallelization.
-    Works with both FINN HWCustomOp and Brainsmith KernelOp nodes.
-
-    Config file path is read from cfg.folding_config_file (FINN convention).
-
-    Args:
-        model: ModelWrapper to transform
-        cfg: Build configuration with folding_config_file attribute
-
-    Returns:
-        ModelWrapper with parallelization applied
-
-    Example config format:
-        {
-            "Defaults": {
-                "PE": [1, ["all"]]
-            },
-            "MVAU_0": {"PE": 8, "SIMD": 4},
-            "LayerNorm_0": {"PE": 16}
-        }
-    """
-    config_file = getattr(cfg, "folding_config_file", None)
-
-    if config_file is None:
-        logger.warning("No folding_config_file specified in config, skipping parallelization")
-        return model
-
-    logger.debug(f"Applying parallelization config from: {config_file}")
-    model = model.transform(ApplyParallelizationConfig(config_file))
-
-    return model
-
-
-@step(name="target_fps_parallelization")
-def target_fps_parallelization_step(model: Any, cfg: Any) -> Any:
-    """Auto-generate parallelization from target FPS.
-
-    Drop-in replacement for FINN's SetFolding/target_fps_parallelization.
-    Works with both FINN HWCustomOp and Brainsmith KernelOp nodes.
-
-    Target cycles are calculated from cfg.target_fps and cfg.synth_clk_period_ns:
-        target_cycles = (1 / target_fps) / (clock_period_ns * 1e-9)
-
-    Args:
-        model: ModelWrapper to transform
-        cfg: Build configuration with target_fps and synth_clk_period_ns attributes
-
-    Returns:
-        ModelWrapper with parallelization optimized for target FPS
-
-    Example:
-        target_fps = 100 (frames per second)
-        synth_clk_period_ns = 5.0 (5ns clock = 200MHz)
-        target_cycles = 1e9 / (100 * 5.0) = 2,000,000 cycles per frame
-    """
-    target_fps = getattr(cfg, "target_fps", None)
-
-    if target_fps is None:
-        logger.warning("No target_fps specified in config, skipping auto-parallelization")
-        return model
-
-    # Get clock period (default to 5ns if not specified)
-    clock_period_ns = getattr(cfg, "synth_clk_period_ns", 5.0)
-
-    # Calculate target cycles from FPS
-    # Cycles = (1 second / target_fps) / clock_period
-    # Convert to integer cycles
-    target_cycles = int(1e9 / (target_fps * clock_period_ns))
-
-    logger.debug(
-        f"Auto-generating parallelization for target_fps={target_fps}, "
-        f"clock={clock_period_ns}ns, target_cycles={target_cycles}"
-    )
-
-    # Get optional MVAU weight stream width constraint (default 36 bits)
-    mvau_wwidth_max = getattr(cfg, "mvau_wwidth_max", 36)
-
-    # Get optional two-pass relaxation flag (default True)
-    two_pass_relaxation = getattr(cfg, "two_pass_relaxation", True)
-
-    model = model.transform(
-        SetParallelization(
-            target_cycles_per_frame=target_cycles,
-            mvau_wwidth_max=mvau_wwidth_max,
-            two_pass_relaxation=two_pass_relaxation,
-        )
-    )
-
-    return model
diff --git a/brainsmith/steps/parameter_exploration.py b/brainsmith/steps/parameter_exploration.py
deleted file mode 100644
index a607886b..00000000
--- a/brainsmith/steps/parameter_exploration.py
+++ /dev/null
@@ -1,262 +0,0 @@
-# Copyright (c) Microsoft Corporation.
-# Licensed under the MIT License.
-
-"""Parameter exploration step for DSE (Phase 7)."""
-import json
-import logging
-import time
-from pathlib import Path
-from typing import Any
-
-from qonnx.custom_op.registry import getCustomOp
-
-from brainsmith.dataflow.kernel_op import KernelOp
-from brainsmith.dataflow.utils import iter_valid_configurations
-from brainsmith.registry import step
-
-logger = logging.getLogger(__name__)
-
-
-@step(name="explore_kernel_params")
-def explore_kernel_params_step(model, cfg):
-    """Explore parallelization parameters for all KernelOp nodes.
-
-    This step systematically explores all valid parallelization parameter
-    configurations (SIMD, PE, etc.) for each KernelOp in the model. It uses
-    the two-phase kernel construction system to efficiently validate configurations.
-
-    The step:
-    1. Finds all KernelOp nodes (domain="finn.custom_op.fpgadataflow")
-    2. For each KernelOp, gets valid parameter ranges via get_valid_ranges()
-    3. Explores all configurations using iter_valid_configurations()
-    4. Validates each configuration with get_design_point() (returns KernelDesignPoint)
-    5. Logs results and optionally saves to JSON
-
-    This is useful for:
-    - Verifying all configs work before full DSE
-    - Understanding the design space size
-    - Debugging configuration issues
-    - Collecting baseline metrics
-
-    Args:
-        model: ModelWrapper containing the ONNX model
-        cfg: FINN config object with output_dir
-
-    Returns:
-        ModelWrapper (unchanged - exploration only)
-
-    Blueprint usage:
-        steps:
-          - infer_kernels
-          - explore_kernel_params  # Add after kernel inference
-          - create_dataflow_partition
-    """
-    logger.debug("=" * 80)
-    logger.debug("Exploring kernel parallelization parameters...")
-    logger.debug("=" * 80)
-
-    # Find all KernelOp nodes
-    kernel_nodes = []
-    for node in model.graph.node:
-        # Check if this is a custom op (any domain)
-        # Skip standard ONNX ops (they have empty domain)
-        if not node.domain or node.domain == "":
-            continue
-
-        try:
-            custom_op = getCustomOp(node)
-            # Check if it's a KernelOp (has get_valid_ranges method)
-            if isinstance(custom_op, KernelOp):
-                kernel_nodes.append((node, custom_op))
-        except Exception as e:
-            # Not a registered custom op or not a KernelOp, skip silently
-            logger.debug(f"Skipping {node.name} ({node.op_type}): {e}")
-            continue
-
-    if not kernel_nodes:
-        logger.warning("No KernelOp nodes found in model")
-        logger.info("Skipping parameter exploration")
-        return model
-
-    logger.debug(f"Found {len(kernel_nodes)} KernelOp nodes to explore:")
-    for node, _ in kernel_nodes:
-        logger.debug(f"  - {node.name} ({node.op_type})")
-
-    # Explore each kernel
-    all_results = []
-    total_start = time.time()
-
-    for node, kernel_op in kernel_nodes:
-        logger.debug("-" * 80)
-        logger.debug(f"Exploring {node.name} ({node.op_type})...")
-
-        # Get valid ranges
-        try:
-            valid_ranges = kernel_op.get_valid_ranges(model)
-        except Exception as e:
-            logger.error(f"Failed to get valid ranges for {node.name}: {e}")
-            continue
-
-        if not valid_ranges:
-            logger.warning(f"  No parallelization parameters for {node.name}")
-            continue
-
-        # Log parameter space
-        logger.debug(f"  Parameters: {list(valid_ranges.keys())}")
-        for param_name, param_values in valid_ranges.items():
-            logger.debug(
-                f"    {param_name}: {len(param_values)} values "
-                f"(range: {min(param_values)}-{max(param_values)})"
-            )
-
-        # Calculate total configs
-        total_configs = 1
-        for param_values in valid_ranges.values():
-            total_configs *= len(param_values)
-        logger.debug(f"  Total configurations: {total_configs:,}")
-
-        # Explore configurations
-        results = _explore_kernel_configs(node.name, kernel_op, model, total_configs)
-        all_results.append(results)
-
-    total_elapsed = time.time() - total_start
-
-    # Log summary
-    logger.debug("=" * 80)
-    logger.debug("Parameter Exploration Summary")
-    logger.debug("=" * 80)
-
-    total_kernels = len(all_results)
-    total_configs_explored = sum(r["configs_explored"] for r in all_results)
-    total_successful = sum(r["configs_successful"] for r in all_results)
-    total_failed = sum(r["configs_failed"] for r in all_results)
-
-    logger.info(f"Kernels explored: {total_kernels}")
-    logger.info(f"Total configurations: {total_configs_explored:,}")
-    logger.info(f"Successful: {total_successful:,}")
-    logger.info(f"Failed: {total_failed:,}")
-    logger.info(f"Total time: {total_elapsed:.2f}s")
-
-    if total_configs_explored > 0:
-        avg_time_per_config = (total_elapsed / total_configs_explored) * 1000
-        logger.info(f"Average time per config: {avg_time_per_config:.2f}ms")
-
-    # Save results to JSON
-    if hasattr(cfg, "output_dir"):
-        output_path = Path(cfg.output_dir) / "parameter_exploration_results.json"
-        _save_results(output_path, all_results, total_elapsed)
-        logger.info(f"Results saved to: {output_path}")
-
-    logger.debug("=" * 80)
-
-    return model
-
-
-def _explore_kernel_configs(
-    node_name: str, kernel_op: KernelOp, model, expected_count: int
-) -> dict[str, Any]:
-    """Explore all configurations for a single kernel.
-
-    Args:
-        node_name: Name of the ONNX node
-        kernel_op: KernelOp instance
-        model: ModelWrapper
-        expected_count: Expected number of configurations
-
-    Returns:
-        Dict with exploration results
-    """
-    start_time = time.time()
-    successful = 0
-    failed = 0
-    config_details = []
-
-    logger.debug(f"  Exploring {expected_count:,} configurations...")
-
-    for i, config in enumerate(iter_valid_configurations(kernel_op, model)):
-        config_start = time.time()
-
-        try:
-            # Set parameters
-            for param_name, param_value in config.items():
-                kernel_op.set_nodeattr(param_name, param_value)
-
-            # Validate configuration
-            design_point = kernel_op.get_design_point(model)
-
-            # Verify parameters match
-            for param_name, param_value in config.items():
-                actual_value = design_point.params.get(param_name)
-                if actual_value != param_value:
-                    raise ValueError(
-                        f"Parameter mismatch: {param_name}={actual_value}, "
-                        f"expected {param_value}"
-                    )
-
-            config_time = time.time() - config_start
-            successful += 1
-
-            config_details.append(
-                {"config": config, "status": "success", "time_ms": config_time * 1000}
-            )
-
-        except Exception as e:
-            config_time = time.time() - config_start
-            failed += 1
-            logger.warning(f"    Config {config} failed: {e}")
-
-            config_details.append(
-                {
-                    "config": config,
-                    "status": "failed",
-                    "error": str(e),
-                    "time_ms": config_time * 1000,
-                }
-            )
-
-        # Log progress every 10 configs
-        if (i + 1) % 10 == 0 or (i + 1) == expected_count:
-            logger.debug(
-                f"    Progress: {i+1}/{expected_count} configs "
-                f"({successful} successful, {failed} failed)"
-            )
-
-    elapsed = time.time() - start_time
-
-    logger.debug(f"  Completed in {elapsed:.2f}s")
-    logger.debug(
-        f"  Success rate: {successful}/{expected_count} "
-        f"({100*successful/max(expected_count,1):.1f}%)"
-    )
-
-    return {
-        "node_name": node_name,
-        "configs_explored": expected_count,
-        "configs_successful": successful,
-        "configs_failed": failed,
-        "time_seconds": elapsed,
-        "config_details": config_details,
-    }
-
-
-def _save_results(output_path: Path, results: list[dict[str, Any]], total_time: float):
-    """Save exploration results to JSON file.
-
-    Args:
-        output_path: Path to save results
-        results: List of per-kernel results
-        total_time: Total exploration time
-    """
-    output_data = {
-        "summary": {
-            "total_kernels": len(results),
-            "total_configs": sum(r["configs_explored"] for r in results),
-            "total_successful": sum(r["configs_successful"] for r in results),
-            "total_failed": sum(r["configs_failed"] for r in results),
-            "total_time_seconds": total_time,
-        },
-        "kernels": results,
-    }
-
-    with open(output_path, "w") as f:
-        json.dump(output_data, f, indent=2)
diff --git a/brainsmith/steps/topology_cleanup_steps.py b/brainsmith/steps/topology_cleanup_steps.py
new file mode 100644
index 00000000..3f135c1c
--- /dev/null
+++ b/brainsmith/steps/topology_cleanup_steps.py
@@ -0,0 +1,61 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+"""
+Topology Cleanup Steps
+
+Initial graph topology transformations that prepare models for quantization import.
+These steps normalize graph structure before quantization metadata is processed.
+"""
+
+import logging
+from typing import Any
+
+from qonnx.transformation.extract_conv_bias import ExtractBiasFromConv
+from qonnx.transformation.fold_constants import FoldConstants
+from qonnx.transformation.gemm_to_matmul import GemmToMatMul
+from qonnx.transformation.general import ConvertDivToMul
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.quant_constant_folding import FoldTransposeIntoQuantInit
+from qonnx.transformation.remove import RemoveIdentityOps
+
+from brainsmith.primitives.transforms.import_qonnx_quantization import ImportQONNXQuantization
+from brainsmith.registry import step
+
+logger = logging.getLogger(__name__)
+
+
+@step(name="finn_topology_cleanup")
+def finn_topology_cleanup_step(model: Any, cfg: Any) -> Any:
+    """Generic graph topology cleanup for FINN compatibility.
+
+    Applies structural transformations to normalize the graph:
+    - ExtractBiasFromConv: Decompose Conv with bias into Conv + Add
+    - GemmToMatMul: Convert Gemm to MatMul (FINN doesn't support Gemm)
+    - FoldTransposeIntoQuantInit: Fold Transpose into weight initializers
+    - FoldConstants: Constant propagation and folding
+    - ConvertDivToMul: Normalize division to multiplication
+    - RemoveIdentityOps: Remove no-op nodes
+    """
+    for transform in [
+        ExtractBiasFromConv(),
+        GemmToMatMul(),
+        FoldTransposeIntoQuantInit(),
+        FoldConstants(),
+        ConvertDivToMul(),
+        RemoveIdentityOps(),
+    ]:
+        model = model.transform(transform)
+    return model
+
+
+@step(name="import_qonnx_quantization")
+def import_qonnx_quantization_step(model: Any, cfg: Any) -> Any:
+    """Import QONNX quantization metadata for hardware compilation.
+
+    Converts QONNX quantization nodes (Quant, BipolarQuant, Trunc) to FINN
+    quantization representation (MultiThreshold, QuantAvgPool2d) and prepares
+    threshold values for integer hardware.
+    """
+    model = model.transform(ImportQONNXQuantization())
+    return model
diff --git a/brainsmith/steps/normalize_layouts.py b/brainsmith/steps/topology_optimization_steps.py
similarity index 82%
rename from brainsmith/steps/normalize_layouts.py
rename to brainsmith/steps/topology_optimization_steps.py
index 47f9f23b..a26c142e 100644
--- a/brainsmith/steps/normalize_layouts.py
+++ b/brainsmith/steps/topology_optimization_steps.py
@@ -2,12 +2,10 @@
 # Licensed under the MIT License.
 
 """
-Layout Normalization Build Step
+Topology Optimization Steps
 
-Provides a preprocessing step that normalizes all tensor layouts to NHWC
-(channel-last) format for dataflow acceleration. This eliminates the need
-for per-kernel layout checking and ensures consistent channel-last layout
-throughout the dataflow region.
+Graph optimization transformations applied after quantization import but before
+kernel inference. These optimize the graph topology for dataflow execution.
 """
 
 import logging
@@ -42,7 +40,6 @@ def normalize_dataflow_layouts_step(model: Any, cfg: Any) -> Any:
     Usage in blueprint:
         steps:
           - "normalize_dataflow_layouts"  # Add before kernel inference
-          - "build_dataflow_graph"
           - ...
     """
     logger.debug("Normalizing dataflow layouts to NHWC (channel-last)")
diff --git a/docker/fetch-repos.sh b/docker/fetch-repos.sh
index 1069ffcb..81d6fb15 100755
--- a/docker/fetch-repos.sh
+++ b/docker/fetch-repos.sh
@@ -78,8 +78,8 @@ fi
 # Define our Git dependencies - URLs and revisions
 declare -A GIT_DEPS=(
     ["brevitas"]="https://github.com/Xilinx/brevitas.git@c10ef8764967e9cacc60347ce185be14e4ad97c4"
-    ["qonnx"]="https://github.com/fastmachinelearning/qonnx.git@f2c4ccd3e71795c9f116ee5a0c87a7dfd590c6d0"
-    ["finn"]="https://github.com/tafk7/finn.git@feature/logging-integration-transformer"
+    ["qonnx"]="https://github.com/fastmachinelearning/qonnx.git@custom/brainsmith"
+    ["finn"]="https://github.com/tafk7/finn.git@feature/mlo-merge"
     ["finn-experimental"]="https://github.com/Xilinx/finn-experimental.git@0724be21111a21f0d81a072fccc1c446e053f851"
     ["dataset-loading"]="https://github.com/fbcotter/dataset_loading.git@0.0.4"
 )
@@ -129,12 +129,15 @@ resolve_ref_to_commit() {
     # Fetch latest from remote to ensure we have up-to-date refs
     git fetch origin --quiet 2>/dev/null || true
 
-    # First try to resolve as-is (works for local branches, tags, and hashes)
-    local resolved_commit=$(git rev-parse "$ref" 2>/dev/null || echo "")
+    local resolved_commit=""
 
-    # If that fails, try as a remote branch on origin
+    # For branch refs, always prefer origin's version to detect when local is behind
+    # Try origin/$ref first (remote branch)
+    resolved_commit=$(git rev-parse "origin/$ref" 2>/dev/null || echo "")
+
+    # If that fails, try as-is (works for tags and commit hashes)
     if [ -z "$resolved_commit" ]; then
-        resolved_commit=$(git rev-parse "origin/$ref" 2>/dev/null || echo "")
+        resolved_commit=$(git rev-parse "$ref" 2>/dev/null || echo "")
     fi
 
     # If still no luck, return "unknown"
@@ -275,7 +278,8 @@ update_repo() {
                 echo -e "    ${current_commit:0:8} → $rev (${expected_commit:0:8})"
                 cd "$name"
                 git fetch --all --quiet
-                git -c advice.detachedHead=false checkout "$rev" --quiet
+                # Checkout the resolved commit hash to ensure we get the remote version
+                git -c advice.detachedHead=false checkout "$expected_commit" --quiet
                 echo -e "    ${GREEN}✓${NC} Updated to $rev"
                 cd ..
                 return 0
diff --git a/docs/developer-guide/experimental/3-reference/blueprints.md b/docs/developer-guide/experimental/3-reference/blueprints.md
index 5f1eaf60..f9979cf7 100644
--- a/docs/developer-guide/experimental/3-reference/blueprints.md
+++ b/docs/developer-guide/experimental/3-reference/blueprints.md
@@ -116,8 +116,8 @@ Kernels define the available hardware implementations for the dataflow graph. Th
 
 **Two kernel types:**
 
-- **Computational kernels** are pattern-matched from ONNX operations during the `build_dataflow_graph` step (e.g., ONNX MatMul → MVAU, ONNX Softmax → Softmax)
-- **Infrastructure kernels** are inserted by topology transforms that analyze graph structure (e.g., DuplicateStreams for tensor fanout, FIFO for buffering)
+- **Computational kernels** are pattern-matched from ONNX operations during the `infer_computational_kernels` step (e.g., ONNX MatMul → MVAU, ONNX Softmax → Softmax)
+- **Infrastructure kernels** are inserted by topology transforms during the `insert_infrastructure_kernels` step (e.g., DuplicateStreams for tensor fanout, FIFO for buffering)
 
 Both types use the backends you specify in this section.
 
@@ -139,7 +139,7 @@ class LayerNorm_hls(LayerNorm, HLSBackend):
 ```
 Then use `LayerNorm_hls` in the blueprint, not just `hls`.
 
-**Common computational kernels** (pattern-matched during `build_dataflow_graph`):
+**Common computational kernels** (pattern-matched during `infer_computational_kernels`):
 - `MVAU` - Matrix-Vector-Activation Unit (dense/linear layers)
 - `Thresholding` - Quantized activation functions
 - `LayerNorm` - Layer normalization
@@ -171,20 +171,18 @@ steps:
   # Optional steps - creates paths with and without
   - ["minimize_bit_width", ~]         # ~ means skip this step
 
-  # Dataflow graph construction (two-phase: infrastructure + computational)
-  - "build_dataflow_graph"            # Auto-splits kernels, inserts infrastructure + patterns
+  # Dataflow graph construction - Option 1: Combined (backward compatible)
+  - "build_dataflow_graph"            # Auto-splits kernels, runs both phases
 
-  # Advanced: Manual control (if not using build_dataflow_graph)
-  # - "insert_duplicate_streams"      # Insert DuplicateStreams only
-  # - "infer_kernels_manual"          # Pattern-match computational only
+  # Dataflow graph construction - Option 2: Split (finer control)
+  # - "insert_infrastructure_kernels" # Phase 1: topology-based (DuplicateStreams, etc.)
+  # - "infer_computational_kernels"   # Phase 2: pattern-matching (MVAU, LayerNorm, etc.)
 
-  # Post-inference infrastructure (optional, run after build_dataflow_graph)
-  - "insert_fifo"                     # Insert FIFOs for buffering
-  - "insert_dwc"                      # Insert data width converters
+  # Backend specialization
+  - "specialize_kernel_backends"      # Partition + select HLS/RTL backends
+  # Legacy name (deprecated): "build_hw_graph"
 
-  # Common FINN pipeline steps
-  - "create_dataflow_partition"       # Partition into dataflow regions
-  - "specialize_layers"               # Specialize to hardware
+  # Parallelization
   - "apply_folding_config"            # Apply parallelization
   - "generate_estimate_reports"       # Generate resource estimates
 ```
diff --git a/docs/developer-guide/mutli-layer-offload.md b/docs/developer-guide/mutli-layer-offload.md
new file mode 100644
index 00000000..969c153c
--- /dev/null
+++ b/docs/developer-guide/mutli-layer-offload.md
@@ -0,0 +1,605 @@
+# Multilayer Offload (MLO)
+
+Multilayer Offload (MLO) is a powerful feature recently added to FINN that enables the implementation of much larger neural networks by implementing a repeating slice of the model (such as a single transformer encoder layer) in hardware and cycling model weights through external memory (DRAM/HBM). This technique allows models that would otherwise be too large to be mapped to the FPGA.
+
+## Overview
+
+In many cases large Deep Learning models such as transformers and SLMs (and LLMs for that matter) have millions or billions of parameters processed over several identical repeating layers. One solution would be to map these layers to multiple FPGAs but the sheer quantity of layers (e.g. 32 layers in the PHI-4 Mini) makes it impractical to spread the design across so many devices. MLO overcomes this limitation by:
+
+1. **Implementing a single repeating layer** (e.g., one transformer encoder) in hardware
+2. **Storing weights off-chip** in high-bandwidth memory (HBM/DRAM)
+3. **Streaming weights** into the accelerator as needed for each layer
+4. **Reusing the same hardware** to process multiple layers sequentially
+
+This approach trades some throughput for the ability to handle much larger models, making it ideal for larger transformer models such as SLMs, vision transformers, and other deep architectures.
+
+## How It Works
+
+### Loop Body Hierarchy
+
+MLO works by identifying a repeating structure in the neural network and implementing only that structure in hardware. **Currently, loop body discovery is not automated** - users must manually identify one iteration of the repeating pattern and specify it using the `loop_body_hierarchy` parameter:
+
+```yaml
+finn_config:
+  loop_body_hierarchy: [['encoder', 'encoder.layer.0']]
+```
+
+**Manual Loop Body Identification:**
+The `loop_body_hierarchy` configuration must match the hierarchical naming structure in your ONNX model, which corresponds to the `pkg.torch.onnx.name_scopes` field used during model export. The loop rolling transformation uses these name scopes to determine which levels of hierarchy to include in the loop body.
+
+> **⚠️ Important:** You must use `dynamo=True` when exporting your PyTorch model to ONNX. Exporting with `dynamo=True` generates the metadata (name scopes) that MLO requires to identify repeating structures. Without this flag, the ONNX model will lack the hierarchical metadata needed for loop body discovery, and the MLO transformation will fail to locate the repeating patterns.
+
+**Technical Implementation:**
+The node extraction mechanism is implemented in FINN's loop rolling transformations:
+
+- **Step Location**: `deps/finn/src/finn/builder/build_dataflow_steps.py`
+- **Extraction Process**: `deps/finn/src/finn/transformation/fpgadataflow/loop_rolling.py` (LoopExtraction class)
+- **Hierarchy Matching**: `deps/finn/src/finn/util/onnxscript_helpers.py` (PytorchHierarchyNode class)
+
+The extraction works by:
+1. Creating a hierarchy parser from PyTorch metadata (`pkg.torch.onnx.name_scopes`)
+2. Adding each ONNX node to the parser based on its hierarchy path
+3. Using prefix matching to find all nodes under the specified hierarchy paths
+4. Extracting matching nodes to create loop templates and removing originals from the main graph
+
+This process requires the PyTorch exporter metadata generated by `dynamo=True`, which contains the module instance hierarchies that map ONNX nodes back to their originating PyTorch modules.
+
+This configuration tells Brainsmith:
+- Look for a repeating pattern called 'encoder' (top-level hierarchy)
+- The repeating unit is 'encoder.layer.0' (one complete encoder layer)
+- All encoder layers (layer.0, layer.1, layer.2, etc.) will be processed using the same hardware
+- The name scopes must exactly match the ONNX node names for proper identification
+
+#### Multiple Hierarchy Groups
+
+For models with multiple independent repeating structures, you can specify multiple hierarchy groups in the `loop_body_hierarchy` configuration:
+
+```yaml
+finn_config:
+  loop_body_hierarchy: [
+    ['encoder', 'encoder.layer.0'],
+    ['encoder', 'encoder.layer.1']
+  ]
+```
+
+This advanced configuration enables the following:
+- **Multiple Loop Iterations in a Single Body** - Include nodes from consecutive layers (e.g., layer.0 and layer.1) to unroll multiple iterations into the hardware implementation
+- **Fine-tuning Node Selection** - Adjust which nodes are included in the loop body when metadata is lost or inexact during ONNX export
+
+**Multiple Group Behavior:**
+- The loop body will include **all** of the nodes belonging to each hierarchy region within the loop body.
+
+#### Hierarchy Level Specification
+
+The `loop_body_hierarchy` can specify multiple levels of hierarchy to precisely control what gets included in the loop body:
+
+**Two-level hierarchy (simple case):**
+```yaml
+loop_body_hierarchy: [['encoder', 'encoder.layer.0']]
+```
+- Includes all nodes under `encoder.layer.0.*`
+- Good for simple transformer architectures
+
+**Three-level hierarchy (precise control):**
+```yaml
+loop_body_hierarchy: [
+  ['bert', 'bert.encoder', 'bert.encoder.layer.0']
+]
+```
+- Specifies the full path: model → encoder stack → specific layer
+- Provides more precise control over node selection
+- Useful for complex models with nested structures
+
+The FINN loop rolling step will find all ONNX nodes whose names start with the final hierarchy level (e.g., `bert.encoder.layer.0`) and extract them as the loop body.
+
+### Loop Rolling Process
+
+The loop rolling transformation (`step_loop_rolling` in FINN) performs these key operations:
+
+1. **Parses the `loop_body_hierarchy`** to identify which nodes belong to the repeating structure
+2. **Extracts nodes by name scope matching** - finds all ONNX nodes whose names match the specified hierarchy pattern (e.g., nodes starting with 'bert.encoder.layer.0')
+3. **Generates loop iteration logic** - creates control structures to iterate through all layers using the same hardware
+4. **Sets up weight streaming infrastructure** - configures memory interfaces to stream different weights for each iteration
+6. **Updates folding configuration** - modifies parallelization parameters to account for the loop structure
+
+#### Loop Body Extraction Details
+
+The specific extraction logic is implemented in the FINN library (`finn.builder.build_dataflow_steps.step_loop_rolling`). While the exact source code lines are not visible in this repository, the process performs these operations based on observable behavior:
+
+**Node Selection Process:**
+```python
+# Conceptual extraction logic (actual implementation in FINN)
+def extract_loop_body_nodes(model, loop_body_hierarchy):
+    """Extract nodes matching the loop body hierarchy pattern."""
+    extracted_nodes = []
+
+    # Get the target pattern from hierarchy (e.g., 'bert.encoder.layer.0')
+    target_pattern = loop_body_hierarchy[0][-1]  # Final level
+
+    # Find all nodes whose names start with the target pattern
+    for node in model.graph.node:
+        if node.name.startswith(target_pattern):
+            extracted_nodes.append(node)
+
+    return extracted_nodes
+
+```
+
+The metadata fields exported by PyTorch Dynamo are not always reliable and in some cases can be removed by optimization passes. When encountered, these issues are reported to the onnxscript team and are often resolved. However, we have tried to make the Loop Body Extraction process as robust as possible in the presence of missing metadata.
+
+In some cases, the Loop Body Extraction process can identify nodes with missing metadata fields. For example, if a node is missing its metadata field, Loop Extract attempts to infer the missing information for that node by checking the metadata of its input and output nodes.
+
+
+## Configuration
+
+### Basic MLO Setup
+
+To enable MLO in your blueprint, add the `loop_body_hierarchy` configuration:
+
+```yaml
+name: "BERT with MLO"
+description: "BERT model with Multilayer Offload"
+
+finn_config:
+  loop_body_hierarchy: [['encoder', 'encoder.layer.0']]
+  split_large_fifos: true
+  fifosim_n_inferences: 2  # Speed up FIFO simulation
+
+design_space:
+  steps:
+    - "qonnx_to_finn"
+    - "bert_streamlining"
+    - "infer_kernels"
+    - "create_dataflow_partition"
+    - "specialize_layers"
+    - "loop_rolling"        # This step implements MLO
+    - "target_fps_parallelization"
+    - "apply_folding_config"
+    # ... rest of pipeline
+```
+
+The easiest way to identify the proper loop body hierarchy is to open the model in Netron and check the values of the node metadata that you'd like to include in the loop body.
+
+
+### BERT MLO Example
+
+For BERT models, a typical MLO configuration looks like:
+
+```yaml
+# bert_mlo_demo.yaml
+name: "BERT Demo"
+description: "Hugging face BERT model with MLO"
+
+extends: "../../brainsmith/blueprints/bert.yaml"
+
+finn_config:
+  loop_body_hierarchy: [['encoder', 'encoder.layer.0']]
+  split_large_fifos: true
+  fifosim_n_inferences: 2
+  verify_steps: ['folded_hls_cppsim', 'stitched_ip_rtlsim']
+
+design_space:
+  steps:
+    - at_start:
+        insert:
+          - "bert_cleanup"
+          - "remove_head"
+          - "remove_tail"
+          - "generate_reference_io"
+    - at_end:
+        insert: "shell_metadata_handover"
+```
+
+
+## Example: BERT MLO Demo
+
+The `examples/bert/bert_mlo_demo.sh` demonstrates a complete MLO workflow:
+
+```bash
+#!/bin/bash
+# BERT MLO Demo
+
+# Generate folding configuration
+python gen_folding_config.py \
+    --simd 4 \
+    --pe 4 \
+    --num_layers 2 \
+    -t 1 \
+    -o ./configs/bert_mlo_demo.json
+
+# Run BERT demo with MLO
+python bert_demo.py \
+    -o bert_mlo_demo \
+    -n 4 \                    # 4 attention heads
+    -l 2 \                    # 2 layers total
+    -z 64 \                   # Hidden size 64
+    -i 256 \                  # Intermediate size 256
+    -b 8 \                    # 8-bit quantization
+    -q 32 \                   # Sequence length 32
+    --blueprint ./bert_mlo_demo.yaml
+```
+
+This creates a BERT model with 2 encoder layers where only the first layer is implemented in hardware, and the second layer reuses the same hardware with different weights.
+
+**CRITICAL: ONNX Export Requirements**
+```python
+# When exporting your model to ONNX, you MUST use dynamo=True
+# This generates the metadata (name scopes) that MLO requires for loop body discovery
+import brevitas.onnx as bo
+
+bo.export_qonnx(
+    model,
+    inputs,
+    output_path,
+    dynamo=True,              # Generates name scope metadata for MLO
+    input_names=['input_ids'],
+    opset_version=18,
+    do_constant_folding=True
+)
+```
+
+**Alternative: Custom Loop Rolling for Non-Dynamo Export**
+
+If you cannot use `dynamo=True` (due to compatibility issues, model complexity, or other constraints), you can either add the metadata manually or you can implement a custom loop rolling step.
+
+**Adding Metadata Manually**
+
+If your ONNX model was exported without `dynamo=True` or the metadata was lost during optimization, you can manually add the required `pkg.torch.onnx.name_scopes` metadata to enable MLO. This approach requires modifying the ONNX model's metadata properties directly.
+
+**Step 1: Understanding the Metadata Structure**
+
+The `pkg.torch.onnx.name_scopes` metadata field contains hierarchical naming information that maps each ONNX node back to its originating PyTorch module. The metadata is stored as a list of strings representing the hierarchy path from the root module to the specific operation.
+
+For example, in a BERT model:
+```python
+# Layer 0 attention query node
+['bert', 'bert.encoder', 'bert.encoder.layer.0', 'bert.encoder.layer.0.attention.self.query']
+
+# Layer 0 attention key node
+['bert', 'bert.encoder', 'bert.encoder.layer.0', 'bert.encoder.layer.0.attention.self.key']
+
+# Layer 1 attention query node
+['bert', 'bert.encoder', 'bert.encoder.layer.1', 'bert.encoder.layer.1.attention.self.query']
+```
+
+**Step 2: Identify Your Model's Hierarchy**
+
+First, determine the hierarchical structure of your model:
+
+```python
+import torch
+
+# Example: Print your PyTorch model structure
+model = YourModel()
+for name, module in model.named_modules():
+    print(name)
+
+# Output might look like:
+# encoder
+# encoder.layer.0
+# encoder.layer.0.attention
+# encoder.layer.0.attention.self
+# encoder.layer.1.attention
+# encoder.layer.1.attention.self
+```
+
+**Step 3: Add Metadata to ONNX Nodes**
+
+Use the following script to add metadata to your ONNX model:
+
+```python
+import onnx
+from onnx import helper
+
+def add_name_scope_metadata(model_path, output_path, node_hierarchy_map):
+    """
+    Add pkg.torch.onnx.name_scopes metadata to ONNX nodes.
+
+    Args:
+        model_path: Path to input ONNX model
+        output_path: Path to save modified ONNX model
+        node_hierarchy_map: Dict mapping node names to hierarchy paths (as list of strings)
+                           e.g., {'MatMul_0': ['encoder', 'encoder.layer.0', 'encoder.layer.0.attention']}
+    """
+    model = onnx.load(model_path)
+
+    for node in model.graph.node:
+        if node.name in node_hierarchy_map:
+            hierarchy_list = node_hierarchy_map[node.name]
+            # Convert list to the string format expected by ONNX metadata
+            # Format: serialized list of strings
+            hierarchy_str = str(hierarchy_list)
+
+            # Add or update the metadata attribute
+            metadata_found = False
+            for attr in node.attribute:
+                if attr.name == "pkg.torch.onnx.name_scopes":
+                    attr.s = hierarchy_str.encode('utf-8')
+                    metadata_found = True
+                    break
+
+            if not metadata_found:
+                # Create new metadata attribute
+                metadata_attr = helper.make_attribute(
+                    "pkg.torch.onnx.name_scopes",
+                    hierarchy_str
+                )
+                node.attribute.append(metadata_attr)
+
+    onnx.save(model, output_path)
+    print(f"Model with metadata saved to {output_path}")
+
+# Example usage for a BERT model
+node_hierarchy_map = {
+    # Attention layer nodes
+    'MatMul_0': ['bert', 'bert.encoder', 'bert.encoder.layer.0', 'bert.encoder.layer.0.attention.self.query'],
+    'MatMul_1': ['bert', 'bert.encoder', 'bert.encoder.layer.0', 'bert.encoder.layer.0.attention.self.key'],
+    'MatMul_2': ['bert', 'bert.encoder', 'bert.encoder.layer.0', 'bert.encoder.layer.0.attention.self.value'],
+    'MatMul_3': ['bert', 'bert.encoder', 'bert.encoder.layer.0', 'bert.encoder.layer.0.attention.output.dense'],
+
+    # Intermediate layer nodes
+    'MatMul_4': ['bert', 'bert.encoder', 'bert.encoder.layer.0', 'bert.encoder.layer.0.intermediate.dense'],
+    'MatMul_5': ['bert', 'bert.encoder', 'bert.encoder.layer.0', 'bert.encoder.layer.0.output.dense'],
+
+    # LayerNorm nodes
+    'LayerNormalization_0': ['bert', 'bert.encoder', 'bert.encoder.layer.0', 'bert.encoder.layer.0.attention.output.LayerNorm'],
+    'LayerNormalization_1': ['bert', 'bert.encoder', 'bert.encoder.layer.0', 'bert.encoder.layer.0.output.LayerNorm'],
+
+    # You only need to add metadata for the nodes used in the loop body template
+}
+
+add_name_scope_metadata(
+    'model_without_metadata.onnx',
+    'model_with_metadata.onnx',
+    node_hierarchy_map
+)
+```
+
+**Step 4: Verify Metadata with Netron**
+
+After adding metadata, open the modified model in Netron and inspect node properties to verify the `pkg.torch.onnx.name_scopes` field appears correctly.
+
+**Step 5: Use in MLO Configuration**
+
+Once metadata is added, configure your blueprint with the appropriate `loop_body_hierarchy`:
+
+```yaml
+finn_config:
+  loop_body_hierarchy: [['encoder', 'encoder.layer.0']]  # Must match your hierarchy paths
+```
+
+**Important Notes:**
+- Metadata must accurately reflect the repeating structure of your model
+- All nodes within a layer should have consistent hierarchy prefixes
+- Test with a small model (2-3 layers) before applying to larger models
+- Incorrect metadata will cause loop body extraction to fail or extract wrong nodes
+
+
+**Custom Loop Rolling Step**
+
+If you cannot export via PyTorch Dynamo, you can write your own *Loop Extraction* transform and then leverage the existing *Loop Rolling* transform to create the FINNLoop ONNX node. At present, you'll need to copy the *Loop Rolling* step in FINN and replace the *Loop Extraction* functionality. In the future, we plan to update the Loop Rolling step to accept a custom *Loop Extraction* function.
+
+The standard Loop Rolling build step consists of two transformations: *Loop Body Extraction* and *Loop Rolling*. *Loop Body Extraction* returns a *LoopBodyTemplate* object which is used by the *LoopRolling* transformation to as a pattern to identify individual instances of each loop body. The *LoopBody* template object is created using an ONNX file that contains one copy of the LoopBody you'd like to create.
+
+If you have a graph of the loop body or can easily create one, then you can simply create a custom Loop Rolling step in BrainSmith that creates the LoopBodyTemplate object from the ONNX file and passes it to the LoopRolling transformation as shown in the example code below.
+
+**Example: Custom Loop Rolling Step with Pre-built Loop Body Template**
+
+```python
+from brainsmith.core.plugins import step
+from finn.transformation.fpgadataflow.loop_rolling import LoopBodyTemplate, LoopRolling
+
+@step(name="custom_loop_rolling_with_template")
+def custom_loop_rolling_with_template(model, cfg):
+    """
+    Custom loop rolling step that uses a pre-created loop body ONNX file.
+
+    Use this approach when you have manually created or extracted the loop body
+    graph and saved it to an ONNX file.
+    """
+    # Load the loop body template from a pre-created ONNX file
+    # This file should contain one complete iteration of your loop body
+    loop_body_template_path = "path/to/your/loop_body_template.onnx"
+    loop_body_template = LoopBodyTemplate(loop_body_template_path)
+
+    # Apply the loop rolling transformation using your custom template
+    model = model.transform(LoopRolling(loop_body_template))
+
+    return model
+```
+
+In this approach, you need to manually create `loop_body_template.onnx` containing one instance of your repeating layer structure. You can create this file by:
+1. Extracting a subgraph from your full model using ONNX tools
+2. Building it programmatically using ONNX IR or onnxscript
+3. Exporting a single layer model from PyTorch
+
+Otherwise, you can create a custom LoopBodyExtraction transform. One approach to creating this transform is to create a *python* list of ONNX nodes within the model that fully comprise an iteration of the LoopBody. Then you can use that list to create a SubGraphView object which can in turn be saved to an ONNX file and then used to create the LoopBodyTemplate as shown in the example code below.
+
+**Example: Custom Loop Extraction and Rolling**
+
+```python
+from brainsmith.core.plugins import step
+from finn.transformation.fpgadataflow.loop_rolling import LoopBodyTemplate, LoopRolling
+from finn.util import onnxscript_helpers as osh
+import onnxscript
+from onnxscript import ir
+import onnx
+
+class CustomLoopExtraction:
+    """
+    Custom loop body extraction that identifies loop body nodes
+    without relying on PyTorch metadata.
+    """
+
+    def __init__(self, loop_body_hierarchy):
+        self.loop_body_hierarchy = loop_body_hierarchy
+        self.loop_body_template = None
+
+    def extract_loop_body_nodes(self, graph, target_pattern):
+        """
+        Identify nodes that belong to the loop body.
+
+        This is where you implement your custom logic to find the nodes.
+        You can use pattern matching, graph analysis, or any other method.
+        """
+        extracted_nodes = []
+
+        # Strategy 1: Simple name prefix matching
+        for node in graph._nodes:
+            if node.name.startswith(target_pattern):
+                extracted_nodes.append(node)
+
+        # Strategy 2: If prefix matching fails, try pattern in node name
+        if not extracted_nodes:
+            layer_id = target_pattern.split('.')[-1]
+            for node in graph._nodes:
+                if f".{layer_id}." in node.name or f"_{layer_id}_" in node.name:
+                    extracted_nodes.append(node)
+
+        return extracted_nodes
+
+    def apply(self, model):
+        """Extract loop body and create template file."""
+        # Deserialize the model to ONNX IR
+        model_ir = onnxscript.ir.serde.deserialize_model(model.model)
+        graph = model_ir.graph
+
+        # Get the target pattern from hierarchy
+        target_pattern = self.loop_body_hierarchy[0][-1]
+
+        # Extract nodes belonging to the loop body
+        nodes = self.extract_loop_body_nodes(graph, target_pattern)
+
+        if not nodes:
+            raise ValueError(f"No nodes found matching pattern: {target_pattern}")
+
+        print(f"Extracted {len(nodes)} nodes for loop body")
+
+        # Create a SubGraphView containing only the loop body nodes
+        loop_body_graph_view = osh.SubGraphView(graph, "loop-body", nodes)
+
+        # Create an ONNX model from the subgraph
+        loop_body_model = onnxscript.ir.Model(
+            loop_body_graph_view,
+            ir_version=model.model.ir_version
+        )
+
+        # Serialize and save the loop body template
+        proto = onnxscript.ir.serde.serialize_model(loop_body_model)
+        template_path = "loop-body-template.onnx"
+        onnx.save(proto, template_path)
+
+        print(f"Loop body template saved to: {template_path}")
+
+        # Create the LoopBodyTemplate object
+        self.loop_body_template = LoopBodyTemplate(template_path)
+
+        return model
+
+@step(name="custom_loop_rolling_full")
+def custom_loop_rolling_full(model, cfg):
+    """
+    Complete custom loop rolling step with custom extraction.
+
+    This approach:
+    1. Uses custom logic to identify loop body nodes
+    2. Creates a loop body template from those nodes
+    3. Applies FINN's LoopRolling transformation
+    """
+    # Get loop body hierarchy from config
+    hierarchy = cfg.loop_body_hierarchy if hasattr(cfg, 'loop_body_hierarchy') \
+                else [['encoder', 'encoder.layer.0']]
+
+    # Step 1: Custom extraction to create loop body template
+    extractor = CustomLoopExtraction(hierarchy)
+    model = extractor.apply(model)
+
+    # Step 2: Apply FINN's loop rolling with the custom template
+    if extractor.loop_body_template is None:
+        raise ValueError("Loop body extraction failed - no template created")
+
+    model = model.transform(LoopRolling(extractor.loop_body_template))
+
+    print("Custom loop rolling completed successfully")
+
+    return model
+```
+
+**Key Points:**
+
+1. **CustomLoopExtraction.extract_loop_body_nodes()**: This is where you implement your custom logic to identify which nodes belong to the loop body. The example shows simple name matching, but you can implement more sophisticated graph analysis.
+
+2. **SubGraphView**: This FINN utility class creates a view of a subgraph given a list of nodes. It automatically handles:
+   - Finding all necessary inputs/outputs
+   - Maintaining graph connectivity
+   - Preserving node attributes and metadata
+
+3. **LoopBodyTemplate**: This class (from FINN) wraps the loop body ONNX file and provides the pattern matching infrastructure that LoopRolling needs.
+
+4. **LoopRolling transformation**: This is FINN's standard transformation that:
+   - Finds all instances of the loop body pattern in your model
+   - Replaces them with a single FINNLoop node
+   - Sets up weight streaming infrastructure
+   - Handles I/O normalization and type checking
+
+**Usage in Blueprint:**
+
+```yaml
+design_space:
+  steps:
+    - "qonnx_to_finn"
+    - "bert_streamlining"
+    - "infer_kernels"
+    - "create_dataflow_partition"
+    - "specialize_layers"
+    - "custom_loop_rolling_full"  # Your custom step
+    - "target_fps_parallelization"
+    - "apply_folding_config"
+```
+
+
+## Debugging MLO Issues
+
+### Common Problems
+
+**Missing or incorrect metadata (most common):**
+- Ensure ONNX export used `dynamo=True` to generate name scope metadata
+- Verify the ONNX model contains proper hierarchical node names
+- If unable to use dynamo export, implement custom loop rolling step (see Loop Body Identification section)
+
+**Missing Loop Body Nodes**
+
+If a node that should be in the loop body is not included during *Loop Extraction*, this can appear in `loopbody_template.onnx` as unexpected inputs and outputs to the loop body graph. Further, this can result in loop rolling failure or errors in subsequent build steps like `step_create_dataflow_partition`.
+
+Sometimes a node in the middle of the loop body will be excluded from the loop body. This can result in a self-referencing loop error in `step_create_dataflow_partition`, where the partitioning process detects invalid circular dependencies.
+
+**Debugging Steps:**
+1. Open `loopbody_template.onnx` in your build directory using Netron
+2. Check for unexpected graph inputs/outputs that should be internal to the loop body
+3. Identify which nodes are missing by comparing against your expected layer structure
+4. Adjust the `loop_body_hierarchy` configuration to include missing nodes:
+   - Try adding an additional hierarchy group for the missing node's namespace
+   - Use a broader hierarchy prefix to capture more nodes
+   - If using custom loop extraction, verify your node matching patterns
+5. Verify metadata on the missing nodes (check `pkg.torch.onnx.name_scopes` field in Netron)
+6. Rebuild and verify the `loopbody_template.onnx` contains all expected nodes
+
+
+**Incorrect loop body identification:**
+- Check `loop_body_hierarchy` matches your model structure
+- Verify layer naming conventions in ONNX graph
+
+
+### Debug Tools
+
+1. **Save intermediate models** - Use `save_intermediate_models: true`
+2. **Enable verification** - Use RTL simulation to check correctness
+3. **Memory tracing** - Monitor weight loading patterns
+4. **Performance counters** - Track cycles, bandwidth utilization
+
+## See Also
+
+- [Design Space Exploration](design_space_exploration.md) - Understanding execution trees
+- [Blueprint Schema](blueprint_schema.md) - Configuration syntax
+- [Hardware Kernels](hardware_kernels.md) - Building custom accelerators
+- [BERT Examples](../examples/bert/) - Complete MLO implementations
diff --git a/examples/bert/README.md b/examples/bert/README.md
index 82caf5bb..2b4da253 100644
--- a/examples/bert/README.md
+++ b/examples/bert/README.md
@@ -87,7 +87,7 @@ blueprint YAML.
 - **remove_tail**: Removes classification head to focus on encoder
 - **generate_reference_io**: Creates test vectors for RTL verification
 
-**Core brainsmith steps used from brainsmith.steps.bert_custom_steps:**
+**Core brainsmith steps used from brainsmith.steps.bert_steps:**
 - **bert_cleanup**: BERT-specific model cleanup and normalization
 - **bert_streamlining**: Streamline BERT model structure
 - **shell_metadata_handover**: Extract metadata for shell integration
diff --git a/examples/bert/bert_demo.py b/examples/bert/bert_demo.py
index fdf461a6..ade65fe4 100644
--- a/examples/bert/bert_demo.py
+++ b/examples/bert/bert_demo.py
@@ -19,36 +19,39 @@
 import warnings
 from pathlib import Path
 
-import brevitas.nn as qnn
-import brevitas.onnx as bo
-
-# Import local custom steps to register them for use in blueprint YAML.
-# These steps are referenced in bert_demo.yaml: remove_head, remove_tail, generate_reference_io
-import custom_steps  # noqa: F401 - Registers custom steps via @step decorator
+import numpy as np
 import onnx
 import torch
 
+# Import brainsmith early to set up paths
+import brainsmith
+from brainsmith.settings import get_config
 # Note: Config export to environment (FINN_ROOT, etc.) happens automatically
+
 from brevitas.graph.calibrate import calibration_mode
 from brevitas.graph.quantize import layerwise_quantize
 from brevitas.quant import Int8ActPerTensorFloat, Int8WeightPerTensorFloat, Uint8ActPerTensorFloat
-from brevitas_examples.llm.llm_quant.prepare_for_quantize import (
-    replace_sdpa_with_quantizable_layers,
-)
+from brevitas_examples.llm.llm_quant.prepare_for_quantize import replace_sdpa_with_quantizable_layers
+from onnx.onnx_pb import StringStringEntryProto
 from onnxsim import simplify
+from qonnx.core.datatype import DataType
+from qonnx.util.basic import gen_finn_dt_tensor
 from qonnx.util.cleanup import cleanup
 from torch import nn
 from transformers import BertConfig, BertModel
 from transformers.utils.fx import symbolic_trace
+import brevitas.nn as qnn
+import brevitas.onnx as bo
 
-# Import brainsmith early to set up paths
-from brainsmith.settings import get_config
+# Import local custom steps to register them for use in blueprint YAML.
+# These steps are referenced in bert_demo.yaml: remove_head, remove_tail, generate_reference_io
+import custom_steps
 
 # Add parent directory to path for imports
 sys.path.append(str(Path(__file__).parent.parent.parent))
 
-from brainsmith import explore_design_space  # noqa: E402
-from brainsmith.dse.types import SegmentStatus  # noqa: E402
+from brainsmith import explore_design_space
+from brainsmith.dse.types import SegmentStatus
 
 warnings.simplefilter("ignore")
 
@@ -71,19 +74,17 @@ def generate_bert_model(args):
         attn_implementation="sdpa",
         hidden_act="relu",
     )
-
     # Initialize model
     model = BertModel(config=config)
     model.to(dtype=dtype)
     model.eval()
-
     # Prepare inputs
     vocab_size = model.config.vocab_size
     seq_len = args.seqlen
     batch_size = 1
 
     input_ids = torch.randint(vocab_size, (batch_size, seq_len), dtype=torch.int64)
-    inp = {"input_ids": input_ids}
+    inp = {'input_ids': input_ids}
 
     # Symbolic tracing
     input_names = inp.keys()
@@ -93,52 +94,50 @@ def generate_bert_model(args):
     model = replace_sdpa_with_quantizable_layers(model)
 
     # Configure quantization
-    unsigned_hidden_act = config.hidden_act == "relu"
+    unsigned_hidden_act = config.hidden_act == 'relu'
     layerwise_compute_layer_map = {}
 
     # Linear layer quantization
     layerwise_compute_layer_map[nn.Linear] = (
         qnn.QuantLinear,
         {
-            "input_quant": lambda module: Uint8ActPerTensorFloat
-            if module.in_features == config.intermediate_size and unsigned_hidden_act
-            else Int8ActPerTensorFloat,
-            "weight_quant": Int8WeightPerTensorFloat,
-            "weight_bit_width": args.bitwidth,
-            "output_quant": None,
-            "bias_quant": None,
-            "return_quant_tensor": False,
-        },
+            'input_quant': lambda module: Uint8ActPerTensorFloat
+                if module.in_features == config.intermediate_size and unsigned_hidden_act
+                else Int8ActPerTensorFloat,
+            'weight_quant': Int8WeightPerTensorFloat,
+            'weight_bit_width': args.bitwidth,
+            'output_quant': None,
+            'bias_quant': None,
+            'return_quant_tensor': False
+        }
     )
-
     # Attention quantization
     layerwise_compute_layer_map[qnn.ScaledDotProductAttention] = (
         qnn.QuantScaledDotProductAttention,
         {
-            "softmax_input_quant": Int8ActPerTensorFloat,
-            "softmax_input_bit_width": args.bitwidth,
-            "attn_output_weights_quant": Uint8ActPerTensorFloat,
-            "attn_output_weights_bit_width": args.bitwidth,
-            "q_scaled_quant": Int8ActPerTensorFloat,
-            "q_scaled_bit_width": args.bitwidth,
-            "k_transposed_quant": Int8ActPerTensorFloat,
-            "k_transposed_bit_width": args.bitwidth,
-            "v_quant": Int8ActPerTensorFloat,
-            "v_bit_width": args.bitwidth,
-            "attn_output_quant": None,
-            "return_quant_tensor": False,
-        },
+            'softmax_input_quant': Int8ActPerTensorFloat,
+            'softmax_input_bit_width': args.bitwidth,
+            'attn_output_weights_quant': Uint8ActPerTensorFloat,
+            'attn_output_weights_bit_width': args.bitwidth,
+            'q_scaled_quant': Int8ActPerTensorFloat,
+            'q_scaled_bit_width': args.bitwidth,
+            'k_transposed_quant': Int8ActPerTensorFloat,
+            'k_transposed_bit_width': args.bitwidth,
+            'v_quant': Int8ActPerTensorFloat,
+            'v_bit_width': args.bitwidth,
+            'attn_output_quant': None,
+            'return_quant_tensor': False
+        }
     )
-
     # Tanh quantization
     layerwise_compute_layer_map[nn.Tanh] = (
         qnn.QuantTanh,
         {
-            "input_quant": None,
-            "act_quant": Int8ActPerTensorFloat,
-            "act_bit_width": args.bitwidth,
-            "return_quant_tensor": False,
-        },
+            'input_quant': None,
+            'act_quant': Int8ActPerTensorFloat,
+            'act_bit_width': args.bitwidth,
+            'return_quant_tensor': False
+        }
     )
 
     # Apply quantization
@@ -150,7 +149,7 @@ def generate_bert_model(args):
         quant_model(**inp)
 
     # Export to ONNX
-    with tempfile.NamedTemporaryFile(suffix=".onnx", delete=False) as tmp:
+    with tempfile.NamedTemporaryFile(suffix='.onnx', delete=False) as tmp:
         tmp_path = tmp.name
 
     with torch.no_grad():
@@ -159,8 +158,10 @@ def generate_bert_model(args):
             (input_ids),
             tmp_path,
             do_constant_folding=True,
-            input_names=["input_ids"],
-            opset_version=17,
+            input_names=['input_ids'],
+            opset_version=18,
+            dynamo=True,
+            optimize=True
         )
 
     # Load and return model
@@ -174,7 +175,6 @@ def generate_bert_model(args):
     print(f"  - Model inputs: {len(model.graph.input)} tensors")
     print(f"  - Model outputs: {len(model.graph.output)} tensors")
     print(f"  - Number of nodes: {len(model.graph.node)}")
-
     return model
 
 
@@ -185,11 +185,29 @@ def run_brainsmith_dse(model, args):
     model_dir = os.path.join(args.output_dir, "intermediate_models")
     os.makedirs(model_dir, exist_ok=True)
 
+    # Extract metadata from the original model
+    metadata = {}
+    for node in model.graph.node:
+        md = {}
+        for prop in node.metadata_props:
+            md[prop.key] = prop.value
+        metadata[node.name] = md
+
     # Simplify model (matches old hw_compiler.py)
-    model, check = simplify(model)
+    simp_model_no_md, check = simplify(model)
     if not check:
         raise RuntimeError("Unable to simplify the Brevitas BERT model")
 
+    # Add the metadata back to the simplified model
+    simp_model_with_md = simp_model_no_md
+    for node in simp_model_no_md.graph.node:
+        if node.name in metadata:
+            md_props = metadata[node.name]
+            for key,value in md_props.items():
+                new_md = StringStringEntryProto(key=key,value=value)
+                node.metadata_props.append(new_md)
+
+    model = simp_model_with_md
     # Save simplified model
     onnx.save(model, os.path.join(model_dir, "simp.onnx"))
     # Also save to debug directory for comparison
@@ -199,17 +217,19 @@ def run_brainsmith_dse(model, args):
     # Run cleanup
     cleanup(
         in_file=os.path.join(model_dir, "simp.onnx"),
-        out_file=os.path.join(args.output_dir, "df_input.onnx"),
+        out_file=os.path.join(args.output_dir, "df_input.onnx")
     )
 
-    # Save a copy of the cleaned model for visualization
-    import shutil
+    # Clean up temporary artifacts (simp.onnx is already saved to debug_models)
+    os.remove(os.path.join(model_dir, "simp.onnx"))
+    shutil.rmtree(model_dir)
 
+    # Save a copy of the cleaned model for visualization
     debug_dir = os.path.join(args.output_dir, "debug_models")
     os.makedirs(debug_dir, exist_ok=True)
     shutil.copy(
         os.path.join(args.output_dir, "df_input.onnx"),
-        os.path.join(debug_dir, "02_after_qonnx_cleanup.onnx"),
+        os.path.join(debug_dir, "02_after_qonnx_cleanup.onnx")
     )
 
     # Get blueprint path from args
@@ -219,14 +239,14 @@ def run_brainsmith_dse(model, args):
     results = explore_design_space(
         model_path=os.path.join(args.output_dir, "df_input.onnx"),
         blueprint_path=str(blueprint_path),
-        output_dir=args.output_dir,
+        output_dir=args.output_dir
     )
 
     # Results are automatically logged by explore_design_space()
     # Just check if we succeeded
     stats = results.compute_stats()
-    if stats["successful"] == 0:
-        raise RuntimeError("No successful builds")
+    if stats['successful'] == 0:
+        raise RuntimeError(f"No successful builds")
 
     # The new execution tree handles output automatically
     final_model_dst = os.path.join(args.output_dir, "output.onnx")
@@ -236,59 +256,44 @@ def run_brainsmith_dse(model, args):
         if result.status == SegmentStatus.COMPLETED and result.output_model:
             shutil.copy2(result.output_model, final_model_dst)
             break
-
     # Handle shell metadata (matches old hw_compiler.py)
     handover_file = os.path.join(args.output_dir, "stitched_ip", "shell_handover.json")
     if os.path.exists(handover_file):
-        with open(handover_file) as fp:
+        with open(handover_file, "r") as fp:
             handover = json.load(fp)
         handover["num_layers"] = args.num_hidden_layers
         with open(handover_file, "w") as fp:
             json.dump(handover, fp, indent=4)
-
     return results
 
 
 def main():
     parser = argparse.ArgumentParser(
-        description="Modern BERT FINN demo - Exact parity with old system using Brainsmith DFC"
+        description='Modern BERT FINN demo - Exact parity with old system using Brainsmith DFC'
     )
 
     # Model configuration
-    parser.add_argument("-o", "--output", help="Output build directory name", required=True)
-    parser.add_argument(
-        "-z", "--hidden_size", type=int, default=384, help="BERT hidden_size parameter"
-    )
-    parser.add_argument(
-        "-n",
-        "--num_attention_heads",
-        type=int,
-        default=12,
-        help="BERT num_attention_heads parameter",
-    )
-    parser.add_argument(
-        "-l", "--num_hidden_layers", type=int, default=1, help="Number of hidden layers"
-    )
-    parser.add_argument(
-        "-i", "--intermediate_size", type=int, default=1536, help="BERT intermediate_size parameter"
-    )
-    parser.add_argument(
-        "-b", "--bitwidth", type=int, default=8, help="Quantization bitwidth (4 or 8)"
-    )
-    parser.add_argument("-q", "--seqlen", type=int, default=128, help="Sequence length parameter")
+    parser.add_argument('-o', '--output', help='Output build directory name', required=True)
+    parser.add_argument('-z', '--hidden_size', type=int, default=384,
+                       help='BERT hidden_size parameter')
+    parser.add_argument('-n', '--num_attention_heads', type=int, default=12,
+                       help='BERT num_attention_heads parameter')
+    parser.add_argument('-l', '--num_hidden_layers', type=int, default=1,
+                       help='Number of hidden layers')
+    parser.add_argument('-i', '--intermediate_size', type=int, default=1536,
+                       help='BERT intermediate_size parameter')
+    parser.add_argument('-b', '--bitwidth', type=int, default=8,
+                       help='Quantization bitwidth (4 or 8)')
+    parser.add_argument('-q', '--seqlen', type=int, default=128,
+                       help='Sequence length parameter')
 
     # Blueprint configuration
-    parser.add_argument(
-        "--blueprint",
-        type=str,
-        default="bert_demo.yaml",
-        help="Blueprint YAML file to use (default: bert_demo.yaml)",
-    )
+    parser.add_argument('--blueprint', type=str, default='bert_demo.yaml',
+                       help='Blueprint YAML file to use (default: bert_demo.yaml)')
 
     # Force flag
-    parser.add_argument(
-        "--force", action="store_true", help="Remove existing output directory before building"
-    )
+    parser.add_argument('--force', action='store_true',
+                       help='Remove existing output directory before building')
 
     args = parser.parse_args()
 
@@ -304,9 +309,7 @@ def main():
     print("=" * 60)
     print("BERT Demo - Brainsmith Dataflow Core")
     print("=" * 60)
-    print(
-        f"Model: {args.num_hidden_layers} layers, hidden={args.hidden_size}, heads={args.num_attention_heads}, intermediate={args.intermediate_size}"
-    )
+    print(f"Model: {args.num_hidden_layers} layers, hidden={args.hidden_size}, heads={args.num_attention_heads}, intermediate={args.intermediate_size}")
     print(f"Quantization: {args.bitwidth}-bit, sequence length={args.seqlen}")
     print(f"Blueprint: {args.blueprint}")
     print(f"Output: {args.output_dir}")
@@ -314,18 +317,17 @@ def main():
 
     try:
         # Step 1: Generate BERT model
-        print("\nStep 1: Generating quantized BERT model...")
+        print("\nStep 1: Generating dummy quantized BERT model...")
         model = generate_bert_model(args)
 
         # Step 2: Create dataflow core accelerator
         print("\nStep 2: Creating dataflow core accelerator...")
-        run_brainsmith_dse(model, args)
+        result = run_brainsmith_dse(model, args)
 
         print("\n" + "=" * 70)
         print("BUILD COMPLETED SUCCESSFULLY")
         print("=" * 70)
         print(f"Output directory: {args.output_dir}")
-
     except Exception as e:
         print(f"\nERROR: Build failed with error: {e}")
         raise
diff --git a/examples/bert/bert_demo.yaml b/examples/bert/bert_demo.yaml
index a55d05c4..26b62152 100644
--- a/examples/bert/bert_demo.yaml
+++ b/examples/bert/bert_demo.yaml
@@ -25,7 +25,7 @@ design_space:
     - at_start:
         insert:
           # Core brainsmith step:
-          - "bert_cleanup"           # brainsmith.steps.bert_custom_steps
+          - "bert_cleanup"           # brainsmith.steps.bert_steps
           # Local example steps (from custom_steps.py):
           - "remove_head"            # Remove model head up to first LayerNorm
           - "remove_tail"            # Remove model tail after second output
@@ -33,4 +33,4 @@ design_space:
 
     - at_end:
         # Core brainsmith step:
-        insert: "shell_metadata_handover"  # brainsmith.steps.bert_custom_steps
+        insert: "shell_metadata_handover"  # brainsmith.steps.bert_steps
diff --git a/examples/bert/bert_mlo_demo.sh b/examples/bert/bert_mlo_demo.sh
new file mode 100755
index 00000000..04341143
--- /dev/null
+++ b/examples/bert/bert_mlo_demo.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+# Quick test script - matches functionality of old quicktest.sh
+
+set -e
+
+# Set longer timeout for RTL simulation (BERT models can take longer)
+export LIVENESS_THRESHOLD=10000000
+
+echo "Running BERT Modern Demo with Loop Rolling Test"
+echo "==============================================="
+
+# Change to demo directory
+cd "$(dirname "$0")"
+
+# Clean up any existing bert_mlo_demo build directory
+if [ -d "${BSMITH_BUILD_DIR}/bert_mlo_demo" ]; then
+    echo "Removing existing bert_mlo_demo build directory..."
+    rm -rf "${BSMITH_BUILD_DIR}/bert_mlo_demo"
+fi
+
+# Generate folding config
+echo "Generating folding configuration..."
+python gen_folding_config.py \
+    --simd 4 \
+    --pe 4 \
+    --num_layers 2 \
+    -t 1 \
+    -o ./configs/bert_mlo_demo.json
+
+# Run BERT demo
+echo "Running BERT demo with 2 layers..."
+python bert_demo.py \
+    -o bert_mlo_demo \
+    -n 4 \
+    -l 2 \
+    -z 64 \
+    -i 256 \
+    -b 8 \
+    -q 32 \
+    --blueprint ./bert_mlo_demo.yaml
+
+echo "Bert MLO test completed!"
diff --git a/examples/bert/bert_mlo_demo.yaml b/examples/bert/bert_mlo_demo.yaml
new file mode 100644
index 00000000..7122a3ce
--- /dev/null
+++ b/examples/bert/bert_mlo_demo.yaml
@@ -0,0 +1,35 @@
+
+name: "BERT Demo"
+description: "Hugging face BERT model"
+
+extends: "${BSMITH_DIR}/examples/blueprints/bert.yaml"
+
+# Configuration overrides
+clock_ns: 5.0                       # Target clock period in nanoseconds
+output: "bitfile"                   # estimates | rtl | bitfile
+board: "V80"                        # Target FPGA board
+save_intermediate_models: true      # Save intermediate ONNX models
+
+finn_config:
+  loop_body_hierarchy: [['encoder', 'encoder.layer.0']]
+  split_large_fifos: true
+  fifosim_n_inferences: 2           # Speed up FIFO
+  verify_steps: ['folded_hls_cppsim', 'stitched_ip_rtlsim']
+    #verify_save_rtlsim_waveforms: true
+
+
+design_space:
+  # Inherit kernels from parent blueprint (don't override with empty list)
+  # kernels are defined in parent bert.yaml
+
+  # Add pre/post-processing steps to standard BERT blueprint
+  steps:
+    - at_start:
+        insert:
+          - "bert_cleanup"
+          - "remove_head"
+          - "remove_tail"
+          - "generate_reference_io"
+
+    - at_end:
+        insert: "shell_metadata_handover"
diff --git a/examples/bert/custom_steps.py b/examples/bert/custom_steps.py
index ab9231de..5d136abb 100644
--- a/examples/bert/custom_steps.py
+++ b/examples/bert/custom_steps.py
@@ -23,8 +23,8 @@
 - generate_reference_io: Generate reference inputs/outputs for validation
 
 Core brainsmith steps also used in bert_demo.yaml:
-- bert_cleanup, bert_streamlining: from brainsmith.steps.bert_custom_steps
-- shell_metadata_handover: from brainsmith.steps.bert_custom_steps
+- bert_cleanup, bert_streamlining: from brainsmith.steps.bert_steps
+- shell_metadata_handover: from brainsmith.steps.bert_steps
 
 These steps are highly specific to BERT model architecture and demonstrate
 how to create example-specific steps using the @step decorator without
diff --git a/examples/bert_training/Layers1_config.json b/examples/bert_training/Layers1_config.json
new file mode 100644
index 00000000..c352ebd2
--- /dev/null
+++ b/examples/bert_training/Layers1_config.json
@@ -0,0 +1,1113 @@
+{
+  "Defaults": {},
+  "StreamingFIFO_rtl_0": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "ElementwiseAdd_hls_0": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_1": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "ElementwiseAdd_hls_1": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      378
+    ]
+  },
+  "StreamingFIFO_rtl_2": {
+    "depth": 378,
+    "impl_style": "vivado",
+    "ram_style": "auto"
+  },
+  "LayerNorm_hls_0": {
+    "SIMD": 1,
+    "inFIFODepths": [
+      378
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_3": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "ElementwiseMul_hls_0": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_4": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "ElementwiseAdd_hls_2": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_5": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "DuplicateStreams_hls_0": {
+    "PE": 1,
+    "outFIFODepths": [
+      2,
+      98301
+    ],
+    "inFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_6": {
+    "depth": 98301,
+    "impl_style": "vivado",
+    "ram_style": "auto"
+  },
+  "StreamingFIFO_rtl_7": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "Thresholding_rtl_0": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_8": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "DuplicateStreams_hls_1": {
+    "PE": 1,
+    "outFIFODepths": [
+      2,
+      2,
+      2
+    ],
+    "inFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_9": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingFIFO_rtl_10": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingFIFO_rtl_11": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingDataWidthConverter_rtl_0": {
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      32
+    ]
+  },
+  "StreamingDataWidthConverter_rtl_1": {
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      32
+    ]
+  },
+  "StreamingDataWidthConverter_rtl_2": {
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      32
+    ]
+  },
+  "StreamingFIFO_rtl_12": {
+    "depth": 32,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingFIFO_rtl_13": {
+    "depth": 32,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingFIFO_rtl_14": {
+    "depth": 32,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "MVAU_rtl_0": {
+    "PE": 96,
+    "SIMD": 4,
+    "inFIFODepths": [
+      32
+    ],
+    "resType": "auto",
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "MVAU_rtl_1": {
+    "PE": 96,
+    "SIMD": 4,
+    "inFIFODepths": [
+      32
+    ],
+    "resType": "auto",
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "MVAU_rtl_2": {
+    "PE": 96,
+    "SIMD": 4,
+    "inFIFODepths": [
+      32
+    ],
+    "resType": "auto",
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_15": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingFIFO_rtl_16": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingFIFO_rtl_17": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingDataWidthConverter_rtl_3": {
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingDataWidthConverter_rtl_4": {
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingDataWidthConverter_rtl_5": {
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_18": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingFIFO_rtl_19": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingFIFO_rtl_20": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "ElementwiseMul_hls_1": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "ElementwiseMul_hls_2": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "ElementwiseMul_hls_3": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_21": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingFIFO_rtl_22": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingFIFO_rtl_23": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "ElementwiseAdd_hls_3": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      59943
+    ]
+  },
+  "ElementwiseAdd_hls_4": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      59943
+    ]
+  },
+  "ElementwiseAdd_hls_5": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      59943
+    ]
+  },
+  "StreamingFIFO_rtl_24": {
+    "depth": 59943,
+    "impl_style": "vivado",
+    "ram_style": "auto"
+  },
+  "StreamingFIFO_rtl_25": {
+    "depth": 59943,
+    "impl_style": "vivado",
+    "ram_style": "auto"
+  },
+  "StreamingFIFO_rtl_26": {
+    "depth": 59943,
+    "impl_style": "vivado",
+    "ram_style": "auto"
+  },
+  "Shuffle_hls_0": {
+    "SIMD": 1,
+    "inFIFODepths": [
+      59943
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "Shuffle_hls_1": {
+    "SIMD": 1,
+    "inFIFODepths": [
+      59943
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "Shuffle_hls_2": {
+    "SIMD": 1,
+    "inFIFODepths": [
+      59943
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_27": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingFIFO_rtl_28": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingFIFO_rtl_29": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "Thresholding_rtl_1": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "Thresholding_rtl_2": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "Thresholding_rtl_3": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_30": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingFIFO_rtl_31": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingFIFO_rtl_32": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingDataWidthConverter_rtl_6": {
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      850
+    ]
+  },
+  "StreamingDataWidthConverter_rtl_7": {
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2001
+    ]
+  },
+  "StreamingDataWidthConverter_rtl_8": {
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_33": {
+    "depth": 850,
+    "impl_style": "vivado",
+    "ram_style": "auto"
+  },
+  "StreamingFIFO_rtl_34": {
+    "depth": 2001,
+    "impl_style": "vivado",
+    "ram_style": "auto"
+  },
+  "StreamingFIFO_rtl_35": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "MVAU_rtl_3": {
+    "PE": 32,
+    "SIMD": 4,
+    "inFIFODepths": [
+      2001,
+      2
+    ],
+    "resType": "auto",
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_36": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingDataWidthConverter_rtl_9": {
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_37": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "Thresholding_rtl_4": {
+    "PE": 4,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_38": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "ElementwiseMul_hls_4": {
+    "PE": 4,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      26623
+    ]
+  },
+  "StreamingFIFO_rtl_39": {
+    "depth": 26623,
+    "impl_style": "vivado",
+    "ram_style": "auto"
+  },
+  "StreamingDataWidthConverter_rtl_10": {
+    "inFIFODepths": [
+      26623
+    ],
+    "outFIFODepths": [
+      6049
+    ]
+  },
+  "StreamingFIFO_rtl_40": {
+    "depth": 6049,
+    "impl_style": "vivado",
+    "ram_style": "auto"
+  },
+  "HWSoftmax_hls_0": {
+    "SIMD": 1,
+    "inFIFODepths": [
+      6049
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_41": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingDataWidthConverter_rtl_11": {
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_42": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "Thresholding_rtl_5": {
+    "PE": 4,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_43": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "MVAU_rtl_4": {
+    "PE": 32,
+    "SIMD": 4,
+    "inFIFODepths": [
+      2,
+      850
+    ],
+    "resType": "auto",
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_44": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingDataWidthConverter_rtl_12": {
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      32
+    ]
+  },
+  "StreamingFIFO_rtl_45": {
+    "depth": 32,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "Shuffle_hls_3": {
+    "SIMD": 1,
+    "inFIFODepths": [
+      32
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_46": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "Thresholding_rtl_6": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_47": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingDataWidthConverter_rtl_13": {
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_48": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "MVAU_rtl_5": {
+    "PE": 96,
+    "SIMD": 4,
+    "inFIFODepths": [
+      2
+    ],
+    "resType": "auto",
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_49": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingDataWidthConverter_rtl_14": {
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_50": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "ElementwiseMul_hls_5": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_51": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "ElementwiseAdd_hls_6": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_52": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "ElementwiseAdd_hls_7": {
+    "PE": 1,
+    "inFIFODepths": [
+      2,
+      98301
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_53": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "LayerNorm_hls_1": {
+    "SIMD": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_54": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "ElementwiseMul_hls_6": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_55": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "ElementwiseAdd_hls_8": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_56": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "DuplicateStreams_hls_2": {
+    "PE": 1,
+    "outFIFODepths": [
+      2,
+      381
+    ],
+    "inFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_57": {
+    "depth": 381,
+    "impl_style": "vivado",
+    "ram_style": "auto"
+  },
+  "StreamingFIFO_rtl_58": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "Thresholding_rtl_7": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_59": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingDataWidthConverter_rtl_15": {
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_60": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "MVAU_rtl_6": {
+    "PE": 384,
+    "SIMD": 4,
+    "inFIFODepths": [
+      2
+    ],
+    "resType": "auto",
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_61": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingDataWidthConverter_rtl_16": {
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_62": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "Thresholding_rtl_8": {
+    "PE": 3,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_63": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingDataWidthConverter_hls_0": {
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_64": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "MVAU_rtl_7": {
+    "PE": 384,
+    "SIMD": 4,
+    "inFIFODepths": [
+      2
+    ],
+    "resType": "auto",
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_65": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingDataWidthConverter_rtl_17": {
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_66": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "ElementwiseMul_hls_7": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_67": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "ElementwiseAdd_hls_9": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_68": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "ElementwiseAdd_hls_10": {
+    "PE": 1,
+    "inFIFODepths": [
+      2,
+      381
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_69": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "LayerNorm_hls_2": {
+    "SIMD": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_70": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "ElementwiseMul_hls_8": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_71": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "ElementwiseAdd_hls_11": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_72": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "Crop_hls_0": {
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_73": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "Thresholding_rtl_9": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_74": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "StreamingDataWidthConverter_rtl_18": {
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_75": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "MVAU_rtl_8": {
+    "PE": 1,
+    "SIMD": 3,
+    "inFIFODepths": [
+      2
+    ],
+    "resType": "auto",
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_76": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "Thresholding_rtl_10": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_77": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "Thresholding_rtl_11": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_78": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "MVAU_rtl_9": {
+    "PE": 1,
+    "SIMD": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "resType": "auto",
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_79": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "ElementwiseMul_hls_9": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_80": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  },
+  "ElementwiseAdd_hls_12": {
+    "PE": 1,
+    "inFIFODepths": [
+      2
+    ],
+    "outFIFODepths": [
+      2
+    ]
+  },
+  "StreamingFIFO_rtl_81": {
+    "depth": 2,
+    "impl_style": "rtl",
+    "ram_style": "auto"
+  }
+}
\ No newline at end of file
diff --git a/examples/bert_training/bert_demo.py b/examples/bert_training/bert_demo.py
new file mode 100644
index 00000000..9de81583
--- /dev/null
+++ b/examples/bert_training/bert_demo.py
@@ -0,0 +1,161 @@
+############################################################################
+# Copyright (C) 2025, Advanced Micro Devices, Inc.
+# All rights reserved.
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+#
+# SPDX-License-Identifier: MIT
+#
+# @author       Shane T. Fleming <shane.fleming@amd.com>
+# @author       Thomas Keller <thomaskeller@microsoft.com>
+############################################################################
+
+import argparse
+import json
+import os
+import shutil
+import sys
+import tempfile
+import warnings
+from pathlib import Path
+
+import numpy as np
+import onnx
+from onnxsim import simplify
+from qonnx.core.datatype import DataType
+from qonnx.util.basic import gen_finn_dt_tensor
+from qonnx.util.cleanup import cleanup
+
+import custom_steps  # Import custom steps to trigger registration
+
+# Add parent directory to path for imports
+sys.path.append(str(Path(__file__).parent.parent.parent))
+
+from brainsmith import forge
+
+warnings.simplefilter("ignore")
+
+
+def generate_bert_model(args):
+    """Load BERT model from specified ONNX file."""
+    if not os.path.exists(args.model_path):
+        raise FileNotFoundError(f"Model file not found: {args.model_path}")
+    
+    model = onnx.load(args.model_path)
+    return model
+
+
+def run_brainsmith_dse(model, args):
+    """Run Brainsmith with new execution tree architecture."""
+    # Create output directory
+    os.makedirs(args.output_dir, exist_ok=True)
+    model_dir = os.path.join(args.output_dir, "intermediate_models")
+    os.makedirs(model_dir, exist_ok=True)
+
+    onnx.save(model, os.path.join(args.output_dir, "input.onnx"))
+
+    # Get blueprint path from args
+    blueprint_path = Path(__file__).parent / args.blueprint
+    
+    # Forge the FPGA accelerator
+    print("Forging FPGA accelerator...")
+    results = forge(
+        model_path=os.path.join(args.output_dir, "input.onnx"),
+        blueprint_path=str(blueprint_path),
+        output_dir=args.output_dir
+    )
+    
+    # Results are automatically logged by forge()
+    # Just check if we succeeded
+    stats = results.stats
+    if stats['successful'] == 0:
+        raise RuntimeError(f"No successful builds")
+    
+    # The new execution tree handles output automatically
+    final_model_dst = os.path.join(args.output_dir, "output.onnx")
+    
+    # Find the output from the successful execution
+    for segment_id, result in results.segment_results.items():
+        if result.success and result.output_model:
+            shutil.copy2(result.output_model, final_model_dst)
+            break
+    
+    # Handle shell metadata (matches old hw_compiler.py)
+    handover_file = os.path.join(args.output_dir, "stitched_ip", "shell_handover.json")
+    if os.path.exists(handover_file):
+        with open(handover_file, "r") as fp:
+            handover = json.load(fp)
+        handover["num_layers"] = args.num_hidden_layers
+        with open(handover_file, "w") as fp:
+            json.dump(handover, fp, indent=4)
+    
+    return results
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='BERT FINN demo using pre-trained ONNX model'
+    )
+    
+    # Model configuration
+    parser.add_argument('-o', '--output', help='Output build directory name', required=True)
+    parser.add_argument('-m', '--model', dest='model_path', help='Path to ONNX model file', required=True)
+    parser.add_argument('-z', '--hidden_size', type=int, default=384, 
+                       help='BERT hidden_size parameter')
+    parser.add_argument('-n', '--num_attention_heads', type=int, default=12, 
+                       help='BERT num_attention_heads parameter')
+    parser.add_argument('-l', '--num_hidden_layers', type=int, default=1, 
+                       help='Number of hidden layers')
+    parser.add_argument('-i', '--intermediate_size', type=int, default=1536, 
+                       help='BERT intermediate_size parameter')
+    parser.add_argument('-b', '--bitwidth', type=int, default=8, 
+                       help='Quantization bitwidth (4 or 8)')
+    parser.add_argument('-q', '--seqlen', type=int, default=128, 
+                       help='Sequence length parameter')
+    
+    # Blueprint configuration
+    parser.add_argument('--blueprint', type=str, default='bert_demo.yaml',
+                       help='Blueprint YAML file to use (default: bert_demo.yaml)')
+    
+    args = parser.parse_args()
+    
+    # Determine output directory
+    build_dir = os.environ.get("BSMITH_BUILD_DIR", "./build")
+    print(build_dir)
+    args.output_dir = os.path.join(build_dir, args.output)
+    
+    print("=" * 70)
+    print("BERT Demo Using Brainsmith DSE")
+    print("=" * 70)
+    print(f"Configuration:")
+    print(f"  Hidden layers: {args.num_hidden_layers}")
+    print(f"  Hidden size: {args.hidden_size}")
+    print(f"  Attention heads: {args.num_attention_heads}")
+    print(f"  Intermediate size: {args.intermediate_size}")
+    print(f"  Bitwidth: {args.bitwidth}")
+    print(f"  Sequence length: {args.seqlen}")
+    print(f"  Blueprint: {args.blueprint}")
+    print(f"  Output directory: {args.output_dir}")
+    print("=" * 70)
+    
+    try:
+        # Step 1: Generate BERT model
+        print("\nStep 1: Generating quantized BERT model...")
+        model = generate_bert_model(args)
+        
+        # Step 2: Run Brainsmith DSE
+        print("\nStep 2: Running Brainsmith DSE pipeline...")
+        result = run_brainsmith_dse(model, args)
+        
+        print("\n" + "=" * 70)
+        print("BUILD COMPLETED SUCCESSFULLY")
+        print("=" * 70)
+        print(f"Output directory: {args.output_dir}")
+        
+    except Exception as e:
+        print(f"\nERROR: Build failed with error: {e}")
+        raise
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/bert_training/bert_demo.yaml b/examples/bert_training/bert_demo.yaml
new file mode 100644
index 00000000..59500ce5
--- /dev/null
+++ b/examples/bert_training/bert_demo.yaml
@@ -0,0 +1,43 @@
+
+name: "BERT Demo"
+description: "Hugging face BERT model"
+
+extends: "${BSMITH_DIR}/brainsmith/blueprints/bert.yaml"
+
+# Configuration overrides
+clock_ns: 5.0                       # Target clock period in nanoseconds
+output: "bitfile"                   # estimates | rtl | bitfile
+board: "V80"                        # Target FPGA board
+save_intermediate_models: true      # Save intermediate ONNX models
+
+# Direct override FINN configuration options
+finn_config:
+  loop_body_hierarchy: [
+    ['bert', 'bert.encoder', 'bert.encoder.layer.0']
+  ]
+  standalone_thresholds: true
+  folding_config_file: "${BSMITH_DIR}/examples/bert_training/initial_folding.json"
+  split_large_fifos: true
+  auto_fifo_depths: true
+  fifosim_n_inferences: 2         # Speed up FIFO sizing
+  stitched_ip_gen_dcp: true
+  verify_steps:
+    - "stitched_ip_rtlsim"
+  #verify_save_rtlsim_waveforms: true #This is really big
+  verify_save_full_context: true
+  verification_atol: 0.1
+
+design_space:
+  # Inherit kernels from parent blueprint
+
+  # Add pre/post-processing steps to standard BERT blueprint
+  steps:
+    - at_start:
+        insert:
+          - "bert_cleanup"
+          - "remove_head"
+            #- "remove_tail"
+          - "generate_reference_io"
+
+    - at_end:
+        insert: "shell_metadata_handover"
diff --git a/examples/bert_training/custom_steps.py b/examples/bert_training/custom_steps.py
new file mode 100644
index 00000000..e9978319
--- /dev/null
+++ b/examples/bert_training/custom_steps.py
@@ -0,0 +1,145 @@
+############################################################################
+# Copyright (C) 2025, Advanced Micro Devices, Inc.
+# All rights reserved.
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+#
+# SPDX-License-Identifier: MIT
+#
+# @author       Shane T. Fleming <shane.fleming@amd.com>
+# @author       Thomas Keller <thomaskeller@microsoft.com>
+############################################################################
+
+"""
+BERT-Specific Custom Build Steps
+
+Custom steps specifically for BERT model processing, including:
+- Head and tail removal for model decomposition
+- Metadata extraction for shell integration
+- Reference I/O generation for validation
+
+These steps are highly specific to BERT model architecture and
+are not general-purpose FINN dataflow compilation steps.
+"""
+
+import os
+import shutil
+import logging
+from typing import Any
+import numpy as np
+
+import finn.core.onnx_exec as oxe
+from qonnx.core.datatype import DataType
+from qonnx.util.basic import gen_finn_dt_tensor
+from brainsmith.core.plugins import step
+from brainsmith.utils import apply_transforms
+
+logger = logging.getLogger(__name__)
+
+
+@step(
+    name="remove_head",
+    category="bert",
+    description="Head removal for models"
+)
+def remove_head_step(model, cfg):
+    """Remove all nodes up to the first LayerNormalization node and rewire input."""
+    
+    assert len(model.graph.input) == 1, "Error the graph has more inputs than expected"
+    tensor_to_node = {output: node for node in model.graph.node for output in node.output}
+
+    to_remove = []
+
+    current_tensor = model.graph.input[0].name
+    current_node = model.find_consumer(current_tensor)
+    while current_node.op_type != "LayerNormalization":
+        to_remove.append(current_node)
+        assert len(current_node.output) == 1, "Error expected an linear path to the first LN"
+        current_tensor = current_node.output[0]
+        current_node = model.find_consumer(current_tensor)
+
+    # Send the global input to the consumers of the layernorm output
+    LN_output = current_node.output[0]
+    consumers = model.find_consumers(LN_output)
+
+    # Remove nodes
+    to_remove.append(current_node)
+    for node in to_remove:
+        model.graph.node.remove(node)
+
+    in_vi = model.get_tensor_valueinfo(LN_output)
+    model.graph.input.pop()
+    model.graph.input.append(in_vi)
+    model.graph.value_info.remove(in_vi)
+
+    # Reconnect input
+    for con in consumers:
+        for i,ip in enumerate(con.input):
+            if ip == LN_output:
+                con.input[i] = model.graph.input[0].name
+
+    # Clean up after head removal
+    model = apply_transforms(model, [
+        'RemoveUnusedTensors',
+        'GiveReadableTensorNames'
+    ])
+    
+    return model
+
+
+def _recurse_model_tail_removal(model, to_remove, node):
+    """Helper function for recursively walking the BERT graph from the second
+    output up to the last LayerNorm to remove it"""
+    if node is not None:
+        if node.op_type != "LayerNormalization":
+            to_remove.append(node)
+            for tensor in node.input:
+                _recurse_model_tail_removal(model, to_remove, model.find_producer(tensor))
+    return
+
+
+@step(
+    name="remove_tail", 
+    category="bert",
+    description="BERT-specific tail removal for models"
+)
+def remove_tail_step(model, cfg):
+    """Remove from global_out_1 all the way back to the first LayerNorm."""
+    # Direct implementation from old custom_step_remove_tail
+    out_names = [x.name for x in model.graph.output]
+    assert "global_out" in out_names, "Error: expected one of the outputs to be called global_out_1, we might need better pattern matching logic here"
+
+    to_remove = []
+    current_node = model.find_producer('global_out')
+    _recurse_model_tail_removal(model, to_remove, current_node)
+
+    for node in to_remove:
+        model.graph.node.remove(node)
+    del model.graph.output[out_names.index('global_out')]
+
+    return model
+
+
+@step(
+    name="generate_reference_io", 
+    category="bert",
+    description="Reference IO generation for BERT demo"
+)
+def generate_reference_io_step(model, cfg):
+    """
+    This step is to generate a reference IO pair for the 
+    onnx model where the head and the tail have been 
+    chopped off.
+    """
+    input_m = model.graph.input[0]
+    in_shape = [dim.dim_value for dim in input_m.type.tensor_type.shape.dim]
+    in_tensor = np.random.uniform(0, 1000, size=in_shape).astype(np.float32)
+    np.save(cfg.output_dir+"/input.npy", in_tensor)
+
+    input_t = { input_m.name : in_tensor}
+    out_name = model.graph.output[0].name
+
+    y_ref = oxe.execute_onnx(model, input_t, True)
+    np.save(cfg.output_dir+"/expected_output.npy", y_ref[out_name])
+    np.savez(cfg.output_dir+"/expected_context.npz", **y_ref) 
+    return model
diff --git a/examples/bert_training/evaluate_onnx_accuracy.py b/examples/bert_training/evaluate_onnx_accuracy.py
new file mode 100755
index 00000000..8cb4a8bb
--- /dev/null
+++ b/examples/bert_training/evaluate_onnx_accuracy.py
@@ -0,0 +1,211 @@
+#!/usr/bin/env python3
+"""
+Evaluate ONNX Model Accuracy on Validation Set
+"""
+
+import onnxruntime as ort
+import numpy as np
+from transformers import BertTokenizer
+from datasets import load_dataset
+import argparse
+import os
+import time
+from tqdm import tqdm
+
+
+def load_onnx_model(model_path):
+    """Load ONNX model with appropriate runtime"""
+    print(f"Loading ONNX model from {model_path}...")
+    
+    is_qonnx = False
+    try:
+        with open(model_path, 'rb') as f:
+            content = f.read(50000)  # Read more content
+            if (b'qonnx.custom_op' in content or 
+                b'Quant(-1)' in content or 
+                b'brevitas' in content or
+                b'QuantLinear' in content or
+                b'qonnx:Quant' in content):
+                is_qonnx = True
+    except Exception:
+        pass
+        
+    if not is_qonnx:
+        try:
+            import onnxruntime as ort
+            test_session = ort.InferenceSession(model_path)
+            test_session = None  # Clean up
+        except Exception as e:
+            if 'qonnx.custom_op' in str(e) or 'Quant(-1)' in str(e):
+                is_qonnx = True
+    
+    if is_qonnx:
+        print("Detected QONNX model, using QONNX runtime...")
+        try:
+            from qonnx.core.modelwrapper import ModelWrapper
+            from qonnx.transformation.infer_shapes import InferShapes
+            from qonnx.transformation.infer_datatypes import InferDataTypes
+            
+            model = ModelWrapper(model_path)
+            
+            try:
+                model = model.transform(InferDataTypes())
+                model = model.transform(InferShapes())
+            except Exception as e:
+                print(f"  - Some transformations failed: {e}")
+            
+            return model, 'qonnx'
+            
+        except ImportError:
+            print("QONNX not available, falling back to ONNX Runtime...")
+            return None, None
+    else:
+        print("Using standard ONNX Runtime...")
+        try:
+            session = ort.InferenceSession(model_path)
+            return session, 'onnx'
+        except Exception as e:
+            print(f"Error loading ONNX model: {e}")
+            return None, None
+
+
+def predict_batch(model, model_type, input_ids_batch):
+    """Predict on a batch of input_ids"""
+    if model_type == 'onnx':
+        input_name = model.get_inputs()[0].name
+        output_name = model.get_outputs()[0].name
+        result = model.run([output_name], {input_name: input_ids_batch})
+        logits = result[0]
+        
+    elif model_type == 'qonnx':
+        from qonnx.core.onnx_exec import execute_onnx
+        
+        batch_logits = []
+        for i in range(input_ids_batch.shape[0]):
+            single_input = input_ids_batch[i:i+1]  # Keep batch dimension
+            input_dict = {"input_ids": single_input}
+            
+            try:
+                output_dict = execute_onnx(model, input_dict)
+                
+                output_key = list(output_dict.keys())[-1]
+                logits = output_dict[output_key]
+                
+                if len(logits.shape) == 1:
+                    logits = logits.reshape(1, -1)
+                
+                batch_logits.append(logits)
+                
+            except Exception as e:
+                print(f"Error processing sample {i}: {e}")
+                batch_logits.append(np.array([[0.0, 0.0]]))
+        
+        logits = np.vstack(batch_logits)
+    
+    return logits
+
+
+def evaluate_model_accuracy(model, model_type, tokenizer, max_length=128, 
+                          num_samples=None, batch_size=32):
+    """Evaluate model accuracy on SST-2 validation set"""
+    print("Loading SST-2 validation dataset...")
+    dataset = load_dataset("glue", "sst2")
+    val_dataset = dataset['validation']
+    
+    if model_type == 'qonnx' and batch_size > 8:
+        batch_size = 8
+        print(f"Using batch size {batch_size} for QONNX model")
+    
+    if num_samples:
+        val_dataset = val_dataset.select(range(min(num_samples, len(val_dataset))))
+        print(f"Evaluating on {len(val_dataset)} samples")
+    else:
+        print(f"Evaluating on full validation set ({len(val_dataset)} samples)")
+    
+    correct = 0
+    total = 0
+    
+    for i in tqdm(range(0, len(val_dataset), batch_size), desc="Evaluating"):
+        batch_end = min(i + batch_size, len(val_dataset))
+        batch_samples = val_dataset[i:batch_end]
+        
+        texts = batch_samples['sentence']
+        labels = batch_samples['label']
+        
+        inputs = tokenizer(
+            texts,
+            truncation=True,
+            padding='max_length',
+            max_length=max_length,
+            return_tensors='np'
+        )
+        
+        input_ids = inputs['input_ids'].astype(np.int64)
+        
+        try:
+            logits = predict_batch(model, model_type, input_ids)
+            predictions = np.argmax(logits, axis=-1)
+            
+            for pred, true_label in zip(predictions, labels):
+                if pred == true_label:
+                    correct += 1
+                total += 1
+                
+        except Exception as e:
+            print(f"Error processing batch {i//batch_size}: {e}")
+            continue
+    
+    if total == 0:
+        print("No samples were successfully processed!")
+        return 0.0
+    
+    accuracy = correct / total
+    return accuracy
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Evaluate ONNX model accuracy')
+    parser.add_argument('--model', default='quantized_int8_model.onnx',
+                        help='Path to ONNX model')
+    parser.add_argument('--max_length', type=int, default=128,
+                        help='Maximum sequence length')
+    parser.add_argument('--num_samples', type=int, default=None,
+                        help='Number of validation samples to use (default: all)')
+    parser.add_argument('--batch_size', type=int, default=32,
+                        help='Batch size for evaluation')
+    
+    args = parser.parse_args()
+    
+    if not os.path.exists(args.model):
+        print(f"Error: Model not found at {args.model}")
+        return
+    
+    model, model_type = load_onnx_model(args.model)
+    if model is None:
+        print("Failed to load model")
+        return
+    
+    print("Loading tokenizer...")
+    tokenizer = BertTokenizer.from_pretrained('prajjwal1/bert-tiny')
+    
+    print("\nStarting accuracy evaluation...")
+    start_time = time.time()
+    
+    accuracy = evaluate_model_accuracy(
+        model, model_type, tokenizer, 
+        args.max_length, args.num_samples, args.batch_size
+    )
+    
+    eval_time = time.time() - start_time
+    
+    print(f"\n=== Evaluation Results ===")
+    print(f"Model: {args.model}")
+    print(f"Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
+    print(f"Evaluation time: {eval_time:.2f} seconds")
+    
+    model_size = os.path.getsize(args.model) / (1024 * 1024)
+    print(f"Model size: {model_size:.2f} MB")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/bert_training/initial_folding.json b/examples/bert_training/initial_folding.json
new file mode 100644
index 00000000..a5fec5a8
--- /dev/null
+++ b/examples/bert_training/initial_folding.json
@@ -0,0 +1,175 @@
+{
+  "Defaults": {},
+  "ElementwiseAdd_hls_0": {
+    "PE": 1
+  },
+  "ElementwiseAdd_hls_1": {
+    "PE": 1
+  },
+  "LayerNorm_hls_0": {
+    "SIMD": 1
+  },
+  "ElementwiseMul_hls_0": {
+    "PE": 1
+  },
+  "ElementwiseMul_Add_2": {
+    "PE": 1
+  },
+  "FINNLoop_0_DuplicateStreams_hls_0": {
+    "PE": 2
+  },
+  "FINNLoop_0_Thresholding_rtl_0": {
+    "PE": 4
+  },
+  "FINNLoop_0_DuplicateStreams_hls_1": {
+    "PE": 4
+  },
+  "FINNLoop_0_MVAU_rtl_0": {
+    "PE": 32,
+    "SIMD": 4
+  },
+  "FINNLoop_0_MVAU_rtl_1": {
+    "PE": 32,
+    "SIMD": 4
+  },
+  "FINNLoop_0_MVAU_rtl_2": {
+    "PE": 32,
+    "SIMD": 4
+  },
+  "FINNLoop_0_ElementwiseMul_hls_0": {
+    "PE": 2
+  },
+  "FINNLoop_0_ElementwiseMul_hls_1": {
+    "PE": 2
+  },
+  "FINNLoop_0_ElementwiseMul_hls_2": {
+    "PE": 2
+  },
+  "FINNLoop_0_ElementwiseAdd_hls_0": {
+    "PE": 2
+  },
+  "FINNLoop_0_ElementwiseAdd_hls_1": {
+    "PE": 2
+  },
+  "FINNLoop_0_ElementwiseAdd_hls_2": {
+    "PE": 2
+  },
+  "FINNLoop_0_Shuffle_hls_0": {
+    "SIMD": 1
+  },
+  "FINNLoop_0_Shuffle_hls_1": {
+    "SIMD": 1
+  },
+  "FINNLoop_0_Shuffle_hls_2": {
+    "SIMD": 1
+  },
+  "FINNLoop_0_Thresholding_rtl_1": {
+    "PE": 4
+  },
+  "FINNLoop_0_Thresholding_rtl_2": {
+    "PE": 4
+  },
+  "FINNLoop_0_Thresholding_rtl_3": {
+    "PE": 4
+  },
+  "FINNLoop_0_MVAU_rtl_3": {
+    "PE": 16,
+    "SIMD": 4
+  },
+  "FINNLoop_0_Thresholding_rtl_4": {
+    "PE": 16
+  },
+  "FINNLoop_0_ElementwiseMul_hls_3": {
+    "PE": 2
+  },
+  "FINNLoop_0_HWSoftmax_hls_0": {
+    "SIMD": 1
+  },
+  "FINNLoop_0_Thresholding_rtl_5": {
+    "PE": 4
+  },
+  "FINNLoop_0_MVAU_rtl_4": {
+    "PE": 16,
+    "SIMD": 4
+  },
+  "FINNLoop_0_Shuffle_hls_3": {
+    "SIMD": 1
+  },
+  "FINNLoop_0_Thresholding_rtl_6": {
+    "PE": 8
+  },
+  "FINNLoop_0_MVAU_rtl_5": {
+    "PE": 32,
+    "SIMD": 4
+  },
+  "FINNLoop_0_ElementwiseMul_hls_4": {
+    "PE": 2
+  },
+  "FINNLoop_0_ElementwiseAdd_hls_3": {
+    "PE": 2
+  },
+  "FINNLoop_0_ElementwiseAdd_hls_4": {
+    "PE": 2
+  },
+  "FINNLoop_0_LayerNorm_hls_0": {
+    "SIMD": 8
+  },
+  "FINNLoop_0_ElementwiseMul_hls_5": {
+    "PE": 2
+  },
+  "FINNLoop_0_ElementwiseAdd_hls_5": {
+    "PE": 2
+  },
+  "FINNLoop_0_DuplicateStreams_hls_2": {
+    "PE": 4
+  },
+  "FINNLoop_0_Thresholding_rtl_7": {
+    "PE": 16
+  },
+  "FINNLoop_0_MVAU_rtl_6": {
+    "PE": 128,
+    "SIMD": 4
+  },
+  "FINNLoop_0_Thresholding_rtl_8": {
+    "PE": 128
+  },
+  "FINNLoop_0_MVAU_rtl_7": {
+    "PE": 128,
+    "SIMD": 4
+  },
+  "FINNLoop_0_ElementwiseMul_hls_6": {
+    "PE": 2
+  },
+  "FINNLoop_0_ElementwiseAdd_hls_6": {
+    "PE": 2
+  },
+  "FINNLoop_0_ElementwiseAdd_hls_7": {
+    "PE": 2
+  },
+  "FINNLoop_0_LayerNorm_hls_1": {
+    "SIMD": 8
+  },
+  "FINNLoop_0_ElementwiseMul_hls_7": {
+    "PE": 2
+  },
+  "FINNLoop_0_ElementwiseAdd_hls_8": {
+    "PE": 2
+  },
+  "Thresholding_rtl_0": {
+    "PE": 16
+  },
+  "MVAU_rtl_0": {
+    "PE": 16,
+    "SIMD": 16
+  },
+  "Thresholding_rtl_1": {
+    "PE": 4
+  },
+  "Thresholding_rtl_2": {
+    "PE": 2
+  },
+  "MVAU_rtl_1": {
+    "PE": 2,
+    "SIMD": 2
+  }
+}
diff --git a/examples/bert_training/quantize_to_int8.py b/examples/bert_training/quantize_to_int8.py
new file mode 100755
index 00000000..050ed168
--- /dev/null
+++ b/examples/bert_training/quantize_to_int8.py
@@ -0,0 +1,350 @@
+#!/usr/bin/env python3
+"""
+Apply PTQ Quantization using Brevitas to FP32 Model and Export to Clean ONNX
+"""
+
+import torch
+import torch.nn as nn
+from transformers import BertTokenizer, BertConfig, BertForSequenceClassification
+from datasets import load_dataset
+import brevitas.nn as qnn
+from brevitas.quant import Int8ActPerTensorFloat, Uint8ActPerTensorFloat, Int8WeightPerTensorFloat
+from brevitas.graph import ModuleToModuleByInstance
+from brevitas.graph.calibrate import calibration_mode
+from brevitas.graph.quantize import layerwise_quantize
+# from brevitas_examples.llm.llm_quant.prepare_for_quantize import replace_sdpa_with_quantizable_layers
+from brevitas.graph import TorchFunctionalToModule
+from brevitas.nn import ScaledDotProductAttention
+import torch.nn.functional as F
+from transformers.utils.fx import symbolic_trace
+import argparse
+import os
+import numpy as np
+from tqdm import tqdm
+from torch.utils.data import DataLoader
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.transformation.infer_shapes import InferShapes
+from qonnx.transformation.infer_datatypes import InferDataTypes
+from qonnx.transformation.fold_constants import FoldConstants
+from qonnx.transformation.quant_constant_folding import FoldTransposeIntoQuantInit
+from qonnx.transformation.general import (
+    RemoveUnusedTensors,
+    SortGraph,
+    GiveUniqueNodeNames,
+    GiveUniqueParameterTensors,
+)
+
+
+def replace_sdpa_with_quantizable_layers(model):
+    """Replace scaled dot product attention with quantizable version"""
+    fn_to_module_map = ((F.scaled_dot_product_attention, ScaledDotProductAttention),)
+    model = TorchFunctionalToModule(fn_to_module_map=fn_to_module_map).apply(model)
+    return model
+
+
+def create_tinybert_config():
+    """Create TinyBERT configuration"""
+    config = BertConfig(
+        vocab_size=30522,
+        hidden_size=384,
+        num_hidden_layers=6,
+        num_attention_heads=12,
+        intermediate_size=1536,
+        hidden_act="relu",
+        num_labels=2
+    )
+    return config
+
+
+def load_fp32_model(model_path, max_length=128):
+    """Load the trained FP32 model"""
+    print(f"Loading FP32 model from {model_path}...")
+    config = create_tinybert_config()
+    model = BertForSequenceClassificationWrapper(config, max_length)
+    model.load_state_dict(torch.load(model_path, map_location='cpu', weights_only=False))
+    model.eval()
+    return model
+
+
+def apply_bert_quantization(model, config, bitwidth=8, seqlen=128):
+    """Apply BERT-style quantization using layerwise approach"""
+    print(f"Applying BERT-style quantization with {bitwidth}-bit precision...")
+
+    dtype = torch.float32
+    model.to(dtype=dtype)
+    model.eval()
+    vocab_size = model.config.vocab_size
+    batch_size = 1
+
+    input_ids = torch.randint(vocab_size, (batch_size, seqlen), dtype=torch.int64)
+    inp = {'input_ids': input_ids}
+
+    print("Performing symbolic tracing...")
+    input_names = inp.keys()
+    model = symbolic_trace(model, input_names, disable_check=True)
+
+    print("Replacing SDPA with quantizable variants...")
+    model = replace_sdpa_with_quantizable_layers(model)
+    print("Replacement done.")
+
+    unsigned_hidden_act = config.hidden_act == 'relu'
+    layerwise_compute_layer_map = {}
+
+    # Linear layer quantization
+    layerwise_compute_layer_map[nn.Linear] = (
+        qnn.QuantLinear,
+        {
+            'input_quant': lambda module: Uint8ActPerTensorFloat
+                if module.in_features == config.intermediate_size and unsigned_hidden_act
+                else Int8ActPerTensorFloat,
+            'weight_quant': Int8WeightPerTensorFloat,
+            'weight_bit_width': bitwidth,
+            'output_quant': None,
+            'bias_quant': None,
+            'return_quant_tensor': False
+        }
+    )
+
+    layerwise_compute_layer_map[qnn.ScaledDotProductAttention] = (
+        qnn.QuantScaledDotProductAttention,
+        {
+            'softmax_input_quant': Int8ActPerTensorFloat,
+            'softmax_input_bit_width': bitwidth,
+            'attn_output_weights_quant': Uint8ActPerTensorFloat,
+            'attn_output_weights_bit_width': bitwidth,
+            'q_scaled_quant': Int8ActPerTensorFloat,
+            'q_scaled_bit_width': bitwidth,
+            'k_transposed_quant': Int8ActPerTensorFloat,
+            'k_transposed_bit_width': bitwidth,
+            'v_quant': Int8ActPerTensorFloat,
+            'v_bit_width': bitwidth,
+            'out_quant': Int8ActPerTensorFloat,
+            'out_bit_width': bitwidth,
+            'return_quant_tensor': False
+        }
+    )
+
+    # HardTanh quantization (replacing Tanh)
+    layerwise_compute_layer_map[nn.Tanh] = (
+        qnn.QuantHardTanh,
+        {
+            'input_quant': None,
+            'act_quant': Int8ActPerTensorFloat,
+            'act_bit_width': bitwidth,
+            'min_val': -1.0,
+            'max_val': 1.0,
+            'return_quant_tensor': False
+        }
+    )
+
+    print("Applying layerwise quantization...")
+    model = layerwise_quantize(
+        model=model,
+        compute_layer_map=layerwise_compute_layer_map
+    )
+    model.to(dtype=dtype)
+
+    print("BERT quantization completed.")
+    return model
+
+
+def calibrate_model(model, tokenizer, num_samples=1600, max_length=128):
+    """Calibrate the quantized model with sample data using proper calibration mode"""
+    print(f"Calibrating model with ~{num_samples} samples...")
+
+    dataset = load_dataset("glue", "sst2")
+    calibration_samples = dataset["train"].shuffle(seed=42).select(range(num_samples))
+
+    def tokenize_function(examples):
+        return tokenizer(
+            examples["sentence"],
+            truncation=True,
+            padding="max_length",
+            max_length=max_length,
+            return_tensors="pt"
+        )
+
+    calibration_data = calibration_samples.map(tokenize_function, batched=True)
+    calibration_data.set_format(type="torch", columns=["input_ids"])
+    calibration_dataloader = DataLoader(calibration_data, batch_size=32, shuffle=False)
+
+    model.eval()
+    device = next(model.parameters()).device
+
+    with torch.no_grad(), calibration_mode(model):
+        for batch_idx, batch in enumerate(tqdm(calibration_dataloader, desc="Calibrating")):
+            input_ids = batch["input_ids"].to(device)
+
+            _ = model(input_ids)
+
+            if batch_idx >= 50:
+                break
+
+    print("Calibration completed")
+
+class BertForSequenceClassificationWrapper(BertForSequenceClassification):
+    def __init__(self, config, max_length=128):
+        super().__init__(config)
+        self.max_length = max_length
+
+    def forward(self, input_ids):
+        batch_size = input_ids.shape[0]
+        attention_mask = torch.ones((batch_size, self.max_length), dtype=torch.long, device=input_ids.device)
+        return super().forward(input_ids=input_ids, attention_mask=attention_mask)
+
+
+def apply_qonnx_cleanup(model_path):
+    """Apply QONNX cleanup transformations to reduce complexity"""
+
+    try:
+        model = ModelWrapper(model_path)
+
+        print(f"  Original model has {len(model.graph.node)} nodes")
+
+        model = model.transform(InferDataTypes())
+        model = model.transform(InferShapes())
+        model = model.transform(GiveUniqueNodeNames())
+        model = model.transform(GiveUniqueParameterTensors())
+        model = model.transform(SortGraph())
+        model = model.transform(FoldConstants())
+        model = model.transform(RemoveUnusedTensors())
+
+        model = model.transform(FoldTransposeIntoQuantInit())
+
+        print(f"  Cleaned model has {len(model.graph.node)} nodes")
+
+        cleaned_path = model_path.replace('.onnx', '_cleaned.onnx')
+        model.save(cleaned_path)
+
+        print(f"  Cleaned model saved to: {cleaned_path}")
+        return cleaned_path
+
+    except Exception as e:
+        print(f"  QONNX cleanup failed: {e}")
+        return model_path
+
+
+def export_quantized_to_onnx(model, output_path, max_length=128):
+    """Export quantized model to clean ONNX"""
+    device = next(model.parameters()).device
+    model.eval()
+
+    dummy_input = torch.ones(1, max_length, dtype=torch.long).to(device)
+
+    from brevitas.export import export_qonnx
+    print(f"Attempting QONNX export with dynamo=True...")
+    export_qonnx(model, dummy_input, output_path, dynamo=True)
+    print(f"QONNX export successful")
+
+    print(f"Quantized ONNX model saved to: {output_path}")
+    cleaned_path = apply_qonnx_cleanup(output_path)
+
+    return cleaned_path
+
+
+def validate_quantized_model(original_model, quantized_model, tokenizer, max_length=128):
+    print("Validating quantized model...")
+
+    dataset = load_dataset("glue", "sst2")
+    test_samples = dataset['validation'].shuffle(seed=42).select(range(100))
+
+    original_model.eval()
+    quantized_model.eval()
+    device = next(quantized_model.parameters()).device
+
+    original_correct = 0
+    quantized_correct = 0
+
+    with torch.no_grad():
+        for sample in test_samples:
+            # Tokenize
+            inputs = tokenizer(
+                sample['sentence'],
+                truncation=True,
+                padding='max_length',
+                max_length=max_length,
+                return_tensors='pt'
+            )
+
+            input_ids = inputs['input_ids'].to(device)
+            true_label = sample['label']
+
+            orig_outputs = original_model(input_ids)
+            orig_pred = torch.argmax(orig_outputs.logits, dim=-1).item()
+            if orig_pred == true_label:
+                original_correct += 1
+
+            quant_outputs = quantized_model(input_ids)
+            # Handle different output formats
+            if hasattr(quant_outputs, 'logits'):
+                quant_logits = quant_outputs.logits
+            elif isinstance(quant_outputs, dict) and 'logits' in quant_outputs:
+                quant_logits = quant_outputs['logits']
+            else:
+                # If it's a tensor or other format, assume it's the logits directly
+                quant_logits = quant_outputs
+            quant_pred = torch.argmax(quant_logits, dim=-1).item()
+            if quant_pred == true_label:
+                quantized_correct += 1
+
+    orig_acc = original_correct / len(test_samples) * 100
+    quant_acc = quantized_correct / len(test_samples) * 100
+
+    print(f"Original model accuracy: {orig_acc:.2f}%")
+    print(f"Quantized model accuracy: {quant_acc:.2f}%")
+    print(f"Accuracy difference: {quant_acc - orig_acc:+.2f}%")
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Quantize FP32 Model to INT8 and Export to ONNX')
+    parser.add_argument('--input_model', default='best_fp32_model.pth',
+                        help='Path to FP32 PyTorch model')
+    parser.add_argument('--output', default='quantized_int8_model.onnx',
+                        help='Output quantized ONNX path')
+    parser.add_argument('--calibration_samples', type=int, default=1600,
+                        help='Number of samples for calibration')
+    parser.add_argument('--bitwidth', type=int, default=8,
+                        help='Quantization bit width')
+    parser.add_argument('--max_length', type=int, default=128,
+                        help='Maximum sequence length')
+    parser.add_argument('--validate', action='store_true',
+                        help='Validate quantized model accuracy')
+
+    args = parser.parse_args()
+
+    if not os.path.exists(args.input_model):
+        print(f"Error: Input model not found at {args.input_model}")
+        print("Please run train_fp32_model.py first")
+        return
+
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    print(f"Using device: {device}")
+
+    tokenizer = BertTokenizer.from_pretrained('prajjwal1/bert-tiny')
+    original_model = load_fp32_model(args.input_model, args.max_length)
+    original_model.to(device)
+
+    config = create_tinybert_config()
+    quantized_model = apply_bert_quantization(original_model, config, args.bitwidth, args.max_length)
+    quantized_model.to(device)
+
+    print(f"Quantized model has {sum(p.numel() for p in quantized_model.parameters()):,} parameters")
+
+    calibrate_model(quantized_model, tokenizer, args.calibration_samples, args.max_length)
+
+    if args.validate:
+        validate_quantized_model(original_model, quantized_model, tokenizer, args.max_length)
+
+    cleaned_model_path = export_quantized_to_onnx(quantized_model, args.output, args.max_length)
+
+    torch.save(quantized_model.state_dict(), 'quantized_int8_model.pth')
+
+    print(f"\nQuantization completed!")
+    print(f"Quantized ONNX model saved to: {args.output}")
+    if cleaned_model_path != args.output:
+        print(f"Cleaned ONNX model saved to: {cleaned_model_path}")
+    print(f"Quantized PyTorch model saved to: quantized_int8_model.pth")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/bert_training/train_fp32_model.py b/examples/bert_training/train_fp32_model.py
new file mode 100755
index 00000000..8c6d98f9
--- /dev/null
+++ b/examples/bert_training/train_fp32_model.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python3
+"""
+Train FP32 TinyBERT Classification Model and Export to Clean ONNX
+"""
+
+import torch
+import torch.nn as nn
+from torch.utils.data import DataLoader
+from transformers import BertTokenizer, BertConfig, BertForSequenceClassification
+from datasets import load_dataset
+import numpy as np
+import onnx
+import onnxsim
+import argparse
+import os
+from tqdm import tqdm
+
+
+def create_tinybert_config():
+    """Create TinyBERT configuration"""
+    config = BertConfig(
+        vocab_size=30522,
+        hidden_size=384,
+        num_hidden_layers=6,
+        num_attention_heads=12,
+        intermediate_size=1536,
+        hidden_act="relu",
+        num_labels=2
+    )
+    return config
+
+
+def load_and_preprocess_data(tokenizer, max_length=128):
+    """Load and preprocess SST-2 dataset"""
+    print("Loading SST-2 dataset...")
+    dataset = load_dataset("glue", "sst2")
+    
+    def tokenize_data(examples):
+        return tokenizer(
+            examples['sentence'],
+            truncation=True,
+            padding='max_length',
+            max_length=max_length
+        )
+    
+    # Tokenize datasets
+    train_dataset = dataset['train'].map(tokenize_data, batched=True)
+    val_dataset = dataset['validation'].map(tokenize_data, batched=True)
+    
+    # Set format for PyTorch
+    train_dataset.set_format(type='torch', columns=['input_ids', 'label'])
+    val_dataset.set_format(type='torch', columns=['input_ids', 'label'])
+    
+    return train_dataset, val_dataset
+
+
+def train_model(model, train_loader, val_loader, device, epochs=3):
+    """Train the model"""
+    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
+    criterion = nn.CrossEntropyLoss()
+    
+    model.to(device)
+    best_val_acc = 0
+    
+    for epoch in range(epochs):
+        # Training
+        model.train()
+        total_loss = 0
+        correct = 0
+        total = 0
+        
+        print(f"\nEpoch {epoch+1}/{epochs}")
+        train_pbar = tqdm(train_loader, desc="Training")
+        
+        for batch in train_pbar:
+            input_ids = batch['input_ids'].to(device)
+            labels = batch['label'].to(device)
+            
+            optimizer.zero_grad()
+            outputs = model(input_ids)
+            loss = criterion(outputs.logits, labels)
+            loss.backward()
+            optimizer.step()
+            
+            total_loss += loss.item()
+            _, predicted = torch.max(outputs.logits.data, 1)
+            total += labels.size(0)
+            correct += (predicted == labels).sum().item()
+            
+            train_pbar.set_postfix({
+                'loss': f'{loss.item():.4f}',
+                'acc': f'{100.*correct/total:.2f}%'
+            })
+        
+        train_acc = 100. * correct / total
+        
+        # Validation
+        model.eval()
+        val_correct = 0
+        val_total = 0
+        val_loss = 0
+        
+        with torch.no_grad():
+            for batch in tqdm(val_loader, desc="Validation"):
+                input_ids = batch['input_ids'].to(device)
+                labels = batch['label'].to(device)
+                
+                outputs = model(input_ids)
+                loss = criterion(outputs.logits, labels)
+                val_loss += loss.item()
+                
+                _, predicted = torch.max(outputs.logits.data, 1)
+                val_total += labels.size(0)
+                val_correct += (predicted == labels).sum().item()
+        
+        val_acc = 100. * val_correct / val_total
+        
+        print(f"Epoch {epoch+1}: Train Acc: {train_acc:.2f}%, Val Acc: {val_acc:.2f}%")
+        
+        # Save best model
+        if val_acc > best_val_acc:
+            best_val_acc = val_acc
+            torch.save(model.state_dict(), 'best_fp32_model.pth')
+            print(f"New best model saved with validation accuracy: {val_acc:.2f}%")
+    
+    return best_val_acc
+
+
+def export_to_onnx(model, tokenizer, output_path, max_length=128):
+    """Export model to clean ONNX format"""
+    print("Exporting to ONNX...")
+    
+    model.eval()
+    device = next(model.parameters()).device
+    
+    # Create dummy input
+    dummy_input = torch.ones(1, max_length, dtype=torch.long).to(device)
+    
+    # Export to ONNX
+    torch.onnx.export(
+        model,
+        dummy_input,
+        output_path,
+        export_params=True,
+        opset_version=17,
+        do_constant_folding=True,
+        input_names=['input_ids'],
+        output_names=['logits'],
+        dynamic_axes={
+            'input_ids': {0: 'batch_size'},
+            'logits': {0: 'batch_size'}
+        }
+    )
+    
+    # Simplify ONNX model
+    print("Simplifying ONNX model...")
+    model_onnx = onnx.load(output_path)
+    model_onnx, check = onnxsim.simplify(model_onnx)
+    assert check, "Simplified ONNX model could not be validated"
+    onnx.save(model_onnx, output_path)
+    
+    print(f"Clean ONNX model saved to: {output_path}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Train FP32 TinyBERT and Export to ONNX')
+    parser.add_argument('--epochs', type=int, default=3, help='Number of training epochs')
+    parser.add_argument('--batch_size', type=int, default=32, help='Batch size')
+    parser.add_argument('--max_length', type=int, default=128, help='Maximum sequence length')
+    parser.add_argument('--output', default='fp32_model.onnx', help='Output ONNX path')
+    
+    args = parser.parse_args()
+    
+    # Setup
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    print(f"Using device: {device}")
+    
+    # Load tokenizer and create model
+    print("Loading tokenizer and creating model...")
+    tokenizer = BertTokenizer.from_pretrained('prajjwal1/bert-tiny')
+    config = create_tinybert_config()
+    model = BertForSequenceClassification(config)
+    
+    print(f"Model has {sum(p.numel() for p in model.parameters()):,} parameters")
+    
+    # Load data
+    train_dataset, val_dataset = load_and_preprocess_data(tokenizer, args.max_length)
+    
+    train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
+    val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False)
+    
+    print(f"Training samples: {len(train_dataset)}")
+    print(f"Validation samples: {len(val_dataset)}")
+    
+    # Train model
+    best_acc = train_model(model, train_loader, val_loader, device, args.epochs)
+    
+    # Load best model for export
+    model.load_state_dict(torch.load('best_fp32_model.pth'))
+    model.eval()
+    
+    # Export to ONNX
+    export_to_onnx(model, tokenizer, args.output, args.max_length)
+    
+    print(f"\nTraining completed!")
+    print(f"Best validation accuracy: {best_acc:.2f}%")
+    print(f"FP32 ONNX model saved to: {args.output}")
+    print(f"PyTorch model saved to: best_fp32_model.pth")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/blueprints/base.yaml b/examples/blueprints/base.yaml
index 5316d224..67dbac1e 100644
--- a/examples/blueprints/base.yaml
+++ b/examples/blueprints/base.yaml
@@ -10,18 +10,22 @@ clock_ns: 5.0                       # Target clock period in nanoseconds
 design_space:
   kernels: []
   steps:
-    - "cleanup"  # custom_step_cleanup
-    - "qonnx_to_finn"
-    # REQUIRED: Layout normalization must run before kernel inference
-    # All Brainsmith kernels assume NHWC (channel-last) layout for dataflow.
-    # This step converts any NCHW tensors to NHWC globally, eliminating the
-    # need for per-kernel layout checking and ensuring uniform dataflow behavior.
-    - "normalize_dataflow_layouts"
-    - "build_dataflow_graph" # Build complete dataflow graph (infrastructure + computational kernels)
-    - "create_dataflow_partition"
-    - "specialize_layers"
-    - "target_fps_parallelization"
-    - "apply_folding_config"
+    # <-- Your topology cleanup steps should go here -->
+    - "finn_topology_cleanup"          # Standard graph cleanup expected by FINN
+    - "import_qonnx_quantization"      # Process quantization metadata from QONNX
+    # <-- Your topology optimization steps should go here -->
+    - "finn:streamline"
+    - "normalize_dataflow_layouts"         # Normalize tensors to NHWC layout
+    ########## Core Brainsmith Steps ##########
+    - "infer_computational_kernels"        # Infer pattern-based kernels (MVAU, LayerNorm, etc.)
+    - "insert_infrastructure_kernels"      # Insert topology-based kernels (DuplicateStreams, etc.)
+    - "specialize_kernel_backends"         # Select HLS/RTL backends + create dataflow partition
+
+
+
+    - "brainsmith:target_fps_parallelization"
+    - "apply_parallelization_config"
+
     - "minimize_bit_width"
     - "generate_estimate_reports"
     - "hw_codegen"
diff --git a/examples/blueprints/bert.yaml b/examples/blueprints/bert.yaml
index cabb59df..b497b59e 100644
--- a/examples/blueprints/bert.yaml
+++ b/examples/blueprints/bert.yaml
@@ -10,27 +10,32 @@ board: "V80"                        # Target FPGA board
 
 design_space:
   kernels:
+    # Computational Kernels
     - LayerNorm
-    - DuplicateStreams
     - ElementwiseBinaryOp
     - Shuffle
+    - Crop
+    - Lookup
     - Softmax
-    - finn:Thresholding
+    - brainsmith:Thresholding
     - finn:MVAU
+    # Infrastructure Kernels
+    - DuplicateStreams
 
   steps:
-    - "qonnx_to_finn"
-    # Topology optimization
+    - "bert_topology_cleanup"              # Model-specific (ExpandNorms)
+    - "finn_topology_cleanup"              # Generic graph optimization
+    - "import_qonnx_quantization"          # Quantization metadata import
     - "bert_streamlining"
-    - "normalize_dataflow_layouts" # Normalize tensors to NHWC layout
-    # Core Brainsmith steps
-    - "build_dataflow_graph"  # ONNX --> Kernels
-    - "build_hw_graph"        # Kernels --> HW Backends
-    # - "loop_rolling"
-    - "transpose_decomposition"
+    - "normalize_dataflow_layouts"         # Normalize tensors to NHWC layout
+    - "infer_computational_kernels"        # Infer pattern-based kernels (MVAU, LayerNorm, etc.)
+    - "insert_infrastructure_kernels"      # Insert topology-based kernels (DuplicateStreams, etc.)
+    - "specialize_kernel_backends"         # Select HLS/RTL backends + create dataflow partition
+    - "minimize_bit_width"
+    - "loop_rolling"
     - "brainsmith:target_fps_parallelization"
     - "apply_parallelization_config"
-    - "minimize_bit_width"
+    - "transpose_decomposition"
     - "generate_estimate_reports"
     - "hw_codegen"
     - "hw_ipgen"
diff --git a/mkdocs.yml b/mkdocs.yml
index 062ad81d..f40a5d42 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -2,9 +2,8 @@ site_name: Brainsmith
 site_description: From PyTorch to RTL - FPGA Accelerator Compiler for AI
 site_author: Microsoft
 site_url: https://microsoft.github.io/brainsmith/
-
 repo_name: microsoft/brainsmith
-repo_url: https://github.com/microsoft/brainsmith
+repo_url: https://github.com/microsoft/brainsmith/
 edit_uri: edit/main/docs/
 
 copyright: Copyright &copy; Microsoft. All rights reserved.
@@ -118,16 +117,28 @@ nav:
     - Hardware Kernels: developer-guide/hardware-kernels.md
     - Component Registry: developer-guide/registry.md
     - Blueprint Schema: developer-guide/blueprint-schema.md
+<<<<<<< HEAD
+    - Component Registry: developer-guide/registry-user-guide.md
+    - Multi-Layer Offload: developer-guide/multi-layer-offload.md
+||||||| merged common ancestors
+    - Component Registry: developer-guide/registry-user-guide.md
+=======
     - Dataflow Modeling: developer-guide/dataflow-modeling.md
     - Multi-Layer Offload: developer-guide/multi-layer-offload.md
+>>>>>>> main
   - API Reference:
     - Overview: api/index.md
     - CLI: api/cli.md
+<<<<<<< HEAD
+  - Tutorials (Coming Soon): tutorials/index.md
+||||||| merged common ancestors
+=======
     - Design Space Exploration: api/dse.md
     - Dataflow Modeling: api/dataflow.md
     - Component Registry: api/registry.md
     - Settings: api/settings.md
   - Tutorials (Coming Soon): tutorials/index.md
+>>>>>>> main
 
 extra_css:
   - stylesheets/extra.css
diff --git a/poetry.lock b/poetry.lock
index cf0790d0..e393f54d 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,5 +1,175 @@
 # This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand.
 
+[[package]]
+name = "aiohappyeyeballs"
+version = "2.6.1"
+description = "Happy Eyeballs for asyncio"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8"},
+    {file = "aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558"},
+]
+
+[[package]]
+name = "aiohttp"
+version = "3.13.2"
+description = "Async http client/server framework (asyncio)"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "aiohttp-3.13.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2372b15a5f62ed37789a6b383ff7344fc5b9f243999b0cd9b629d8bc5f5b4155"},
+    {file = "aiohttp-3.13.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e7f8659a48995edee7229522984bd1009c1213929c769c2daa80b40fe49a180c"},
+    {file = "aiohttp-3.13.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:939ced4a7add92296b0ad38892ce62b98c619288a081170695c6babe4f50e636"},
+    {file = "aiohttp-3.13.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6315fb6977f1d0dd41a107c527fee2ed5ab0550b7d885bc15fee20ccb17891da"},
+    {file = "aiohttp-3.13.2-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6e7352512f763f760baaed2637055c49134fd1d35b37c2dedfac35bfe5cf8725"},
+    {file = "aiohttp-3.13.2-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e09a0a06348a2dd73e7213353c90d709502d9786219f69b731f6caa0efeb46f5"},
+    {file = "aiohttp-3.13.2-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a09a6d073fb5789456545bdee2474d14395792faa0527887f2f4ec1a486a59d3"},
+    {file = "aiohttp-3.13.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b59d13c443f8e049d9e94099c7e412e34610f1f49be0f230ec656a10692a5802"},
+    {file = "aiohttp-3.13.2-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:20db2d67985d71ca033443a1ba2001c4b5693fe09b0e29f6d9358a99d4d62a8a"},
+    {file = "aiohttp-3.13.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:960c2fc686ba27b535f9fd2b52d87ecd7e4fd1cf877f6a5cba8afb5b4a8bd204"},
+    {file = "aiohttp-3.13.2-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:6c00dbcf5f0d88796151e264a8eab23de2997c9303dd7c0bf622e23b24d3ce22"},
+    {file = "aiohttp-3.13.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fed38a5edb7945f4d1bcabe2fcd05db4f6ec7e0e82560088b754f7e08d93772d"},
+    {file = "aiohttp-3.13.2-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:b395bbca716c38bef3c764f187860e88c724b342c26275bc03e906142fc5964f"},
+    {file = "aiohttp-3.13.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:204ffff2426c25dfda401ba08da85f9c59525cdc42bda26660463dd1cbcfec6f"},
+    {file = "aiohttp-3.13.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:05c4dd3c48fb5f15db31f57eb35374cb0c09afdde532e7fb70a75aede0ed30f6"},
+    {file = "aiohttp-3.13.2-cp310-cp310-win32.whl", hash = "sha256:e574a7d61cf10351d734bcddabbe15ede0eaa8a02070d85446875dc11189a251"},
+    {file = "aiohttp-3.13.2-cp310-cp310-win_amd64.whl", hash = "sha256:364f55663085d658b8462a1c3f17b2b84a5c2e1ba858e1b79bff7b2e24ad1514"},
+    {file = "aiohttp-3.13.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4647d02df098f6434bafd7f32ad14942f05a9caa06c7016fdcc816f343997dd0"},
+    {file = "aiohttp-3.13.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e3403f24bcb9c3b29113611c3c16a2a447c3953ecf86b79775e7be06f7ae7ccb"},
+    {file = "aiohttp-3.13.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:43dff14e35aba17e3d6d5ba628858fb8cb51e30f44724a2d2f0c75be492c55e9"},
+    {file = "aiohttp-3.13.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e2a9ea08e8c58bb17655630198833109227dea914cd20be660f52215f6de5613"},
+    {file = "aiohttp-3.13.2-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:53b07472f235eb80e826ad038c9d106c2f653584753f3ddab907c83f49eedead"},
+    {file = "aiohttp-3.13.2-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e736c93e9c274fce6419af4aac199984d866e55f8a4cec9114671d0ea9688780"},
+    {file = "aiohttp-3.13.2-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ff5e771f5dcbc81c64898c597a434f7682f2259e0cd666932a913d53d1341d1a"},
+    {file = "aiohttp-3.13.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a3b6fb0c207cc661fa0bf8c66d8d9b657331ccc814f4719468af61034b478592"},
+    {file = "aiohttp-3.13.2-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:97a0895a8e840ab3520e2288db7cace3a1981300d48babeb50e7425609e2e0ab"},
+    {file = "aiohttp-3.13.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9e8f8afb552297aca127c90cb840e9a1d4bfd6a10d7d8f2d9176e1acc69bad30"},
+    {file = "aiohttp-3.13.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:ed2f9c7216e53c3df02264f25d824b079cc5914f9e2deba94155190ef648ee40"},
+    {file = "aiohttp-3.13.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:99c5280a329d5fa18ef30fd10c793a190d996567667908bef8a7f81f8202b948"},
+    {file = "aiohttp-3.13.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:2ca6ffef405fc9c09a746cb5d019c1672cd7f402542e379afc66b370833170cf"},
+    {file = "aiohttp-3.13.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:47f438b1a28e926c37632bff3c44df7d27c9b57aaf4e34b1def3c07111fdb782"},
+    {file = "aiohttp-3.13.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9acda8604a57bb60544e4646a4615c1866ee6c04a8edef9b8ee6fd1d8fa2ddc8"},
+    {file = "aiohttp-3.13.2-cp311-cp311-win32.whl", hash = "sha256:868e195e39b24aaa930b063c08bb0c17924899c16c672a28a65afded9c46c6ec"},
+    {file = "aiohttp-3.13.2-cp311-cp311-win_amd64.whl", hash = "sha256:7fd19df530c292542636c2a9a85854fab93474396a52f1695e799186bbd7f24c"},
+    {file = "aiohttp-3.13.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:b1e56bab2e12b2b9ed300218c351ee2a3d8c8fdab5b1ec6193e11a817767e47b"},
+    {file = "aiohttp-3.13.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:364e25edaabd3d37b1db1f0cbcee8c73c9a3727bfa262b83e5e4cf3489a2a9dc"},
+    {file = "aiohttp-3.13.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c5c94825f744694c4b8db20b71dba9a257cd2ba8e010a803042123f3a25d50d7"},
+    {file = "aiohttp-3.13.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ba2715d842ffa787be87cbfce150d5e88c87a98e0b62e0f5aa489169a393dbbb"},
+    {file = "aiohttp-3.13.2-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:585542825c4bc662221fb257889e011a5aa00f1ae4d75d1d246a5225289183e3"},
+    {file = "aiohttp-3.13.2-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:39d02cb6025fe1aabca329c5632f48c9532a3dabccd859e7e2f110668972331f"},
+    {file = "aiohttp-3.13.2-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e67446b19e014d37342f7195f592a2a948141d15a312fe0e700c2fd2f03124f6"},
+    {file = "aiohttp-3.13.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4356474ad6333e41ccefd39eae869ba15a6c5299c9c01dfdcfdd5c107be4363e"},
+    {file = "aiohttp-3.13.2-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:eeacf451c99b4525f700f078becff32c32ec327b10dcf31306a8a52d78166de7"},
+    {file = "aiohttp-3.13.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d8a9b889aeabd7a4e9af0b7f4ab5ad94d42e7ff679aaec6d0db21e3b639ad58d"},
+    {file = "aiohttp-3.13.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:fa89cb11bc71a63b69568d5b8a25c3ca25b6d54c15f907ca1c130d72f320b76b"},
+    {file = "aiohttp-3.13.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8aa7c807df234f693fed0ecd507192fc97692e61fee5702cdc11155d2e5cadc8"},
+    {file = "aiohttp-3.13.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:9eb3e33fdbe43f88c3c75fa608c25e7c47bbd80f48d012763cb67c47f39a7e16"},
+    {file = "aiohttp-3.13.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9434bc0d80076138ea986833156c5a48c9c7a8abb0c96039ddbb4afc93184169"},
+    {file = "aiohttp-3.13.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ff15c147b2ad66da1f2cbb0622313f2242d8e6e8f9b79b5206c84523a4473248"},
+    {file = "aiohttp-3.13.2-cp312-cp312-win32.whl", hash = "sha256:27e569eb9d9e95dbd55c0fc3ec3a9335defbf1d8bc1d20171a49f3c4c607b93e"},
+    {file = "aiohttp-3.13.2-cp312-cp312-win_amd64.whl", hash = "sha256:8709a0f05d59a71f33fd05c17fc11fcb8c30140506e13c2f5e8ee1b8964e1b45"},
+    {file = "aiohttp-3.13.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:7519bdc7dfc1940d201651b52bf5e03f5503bda45ad6eacf64dda98be5b2b6be"},
+    {file = "aiohttp-3.13.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:088912a78b4d4f547a1f19c099d5a506df17eacec3c6f4375e2831ec1d995742"},
+    {file = "aiohttp-3.13.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5276807b9de9092af38ed23ce120539ab0ac955547b38563a9ba4f5b07b95293"},
+    {file = "aiohttp-3.13.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1237c1375eaef0db4dcd7c2559f42e8af7b87ea7d295b118c60c36a6e61cb811"},
+    {file = "aiohttp-3.13.2-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:96581619c57419c3d7d78703d5b78c1e5e5fc0172d60f555bdebaced82ded19a"},
+    {file = "aiohttp-3.13.2-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a2713a95b47374169409d18103366de1050fe0ea73db358fc7a7acb2880422d4"},
+    {file = "aiohttp-3.13.2-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:228a1cd556b3caca590e9511a89444925da87d35219a49ab5da0c36d2d943a6a"},
+    {file = "aiohttp-3.13.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ac6cde5fba8d7d8c6ac963dbb0256a9854e9fafff52fbcc58fdf819357892c3e"},
+    {file = "aiohttp-3.13.2-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f2bef8237544f4e42878c61cef4e2839fee6346dc60f5739f876a9c50be7fcdb"},
+    {file = "aiohttp-3.13.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:16f15a4eac3bc2d76c45f7ebdd48a65d41b242eb6c31c2245463b40b34584ded"},
+    {file = "aiohttp-3.13.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:bb7fb776645af5cc58ab804c58d7eba545a97e047254a52ce89c157b5af6cd0b"},
+    {file = "aiohttp-3.13.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:e1b4951125ec10c70802f2cb09736c895861cd39fd9dcb35107b4dc8ae6220b8"},
+    {file = "aiohttp-3.13.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:550bf765101ae721ee1d37d8095f47b1f220650f85fe1af37a90ce75bab89d04"},
+    {file = "aiohttp-3.13.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:fe91b87fc295973096251e2d25a811388e7d8adf3bd2b97ef6ae78bc4ac6c476"},
+    {file = "aiohttp-3.13.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e0c8e31cfcc4592cb200160344b2fb6ae0f9e4effe06c644b5a125d4ae5ebe23"},
+    {file = "aiohttp-3.13.2-cp313-cp313-win32.whl", hash = "sha256:0740f31a60848d6edb296a0df827473eede90c689b8f9f2a4cdde74889eb2254"},
+    {file = "aiohttp-3.13.2-cp313-cp313-win_amd64.whl", hash = "sha256:a88d13e7ca367394908f8a276b89d04a3652044612b9a408a0bb22a5ed976a1a"},
+    {file = "aiohttp-3.13.2-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:2475391c29230e063ef53a66669b7b691c9bfc3f1426a0f7bcdf1216bdbac38b"},
+    {file = "aiohttp-3.13.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:f33c8748abef4d8717bb20e8fb1b3e07c6adacb7fd6beaae971a764cf5f30d61"},
+    {file = "aiohttp-3.13.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ae32f24bbfb7dbb485a24b30b1149e2f200be94777232aeadba3eecece4d0aa4"},
+    {file = "aiohttp-3.13.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d7f02042c1f009ffb70067326ef183a047425bb2ff3bc434ead4dd4a4a66a2b"},
+    {file = "aiohttp-3.13.2-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:93655083005d71cd6c072cdab54c886e6570ad2c4592139c3fb967bfc19e4694"},
+    {file = "aiohttp-3.13.2-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:0db1e24b852f5f664cd728db140cf11ea0e82450471232a394b3d1a540b0f906"},
+    {file = "aiohttp-3.13.2-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b009194665bcd128e23eaddef362e745601afa4641930848af4c8559e88f18f9"},
+    {file = "aiohttp-3.13.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c038a8fdc8103cd51dbd986ecdce141473ffd9775a7a8057a6ed9c3653478011"},
+    {file = "aiohttp-3.13.2-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:66bac29b95a00db411cd758fea0e4b9bdba6d549dfe333f9a945430f5f2cc5a6"},
+    {file = "aiohttp-3.13.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:4ebf9cfc9ba24a74cf0718f04aac2a3bbe745902cc7c5ebc55c0f3b5777ef213"},
+    {file = "aiohttp-3.13.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:a4b88ebe35ce54205c7074f7302bd08a4cb83256a3e0870c72d6f68a3aaf8e49"},
+    {file = "aiohttp-3.13.2-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:98c4fb90bb82b70a4ed79ca35f656f4281885be076f3f970ce315402b53099ae"},
+    {file = "aiohttp-3.13.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:ec7534e63ae0f3759df3a1ed4fa6bc8f75082a924b590619c0dd2f76d7043caa"},
+    {file = "aiohttp-3.13.2-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:5b927cf9b935a13e33644cbed6c8c4b2d0f25b713d838743f8fe7191b33829c4"},
+    {file = "aiohttp-3.13.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:88d6c017966a78c5265d996c19cdb79235be5e6412268d7e2ce7dee339471b7a"},
+    {file = "aiohttp-3.13.2-cp314-cp314-win32.whl", hash = "sha256:f7c183e786e299b5d6c49fb43a769f8eb8e04a2726a2bd5887b98b5cc2d67940"},
+    {file = "aiohttp-3.13.2-cp314-cp314-win_amd64.whl", hash = "sha256:fe242cd381e0fb65758faf5ad96c2e460df6ee5b2de1072fe97e4127927e00b4"},
+    {file = "aiohttp-3.13.2-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:f10d9c0b0188fe85398c61147bbd2a657d616c876863bfeff43376e0e3134673"},
+    {file = "aiohttp-3.13.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:e7c952aefdf2460f4ae55c5e9c3e80aa72f706a6317e06020f80e96253b1accd"},
+    {file = "aiohttp-3.13.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c20423ce14771d98353d2e25e83591fa75dfa90a3c1848f3d7c68243b4fbded3"},
+    {file = "aiohttp-3.13.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e96eb1a34396e9430c19d8338d2ec33015e4a87ef2b4449db94c22412e25ccdf"},
+    {file = "aiohttp-3.13.2-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:23fb0783bc1a33640036465019d3bba069942616a6a2353c6907d7fe1ccdaf4e"},
+    {file = "aiohttp-3.13.2-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2e1a9bea6244a1d05a4e57c295d69e159a5c50d8ef16aa390948ee873478d9a5"},
+    {file = "aiohttp-3.13.2-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0a3d54e822688b56e9f6b5816fb3de3a3a64660efac64e4c2dc435230ad23bad"},
+    {file = "aiohttp-3.13.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7a653d872afe9f33497215745da7a943d1dc15b728a9c8da1c3ac423af35178e"},
+    {file = "aiohttp-3.13.2-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:56d36e80d2003fa3fc0207fac644216d8532e9504a785ef9a8fd013f84a42c61"},
+    {file = "aiohttp-3.13.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:78cd586d8331fb8e241c2dd6b2f4061778cc69e150514b39a9e28dd050475661"},
+    {file = "aiohttp-3.13.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:20b10bbfbff766294fe99987f7bb3b74fdd2f1a2905f2562132641ad434dcf98"},
+    {file = "aiohttp-3.13.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:9ec49dff7e2b3c85cdeaa412e9d438f0ecd71676fde61ec57027dd392f00c693"},
+    {file = "aiohttp-3.13.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:94f05348c4406450f9d73d38efb41d669ad6cd90c7ee194810d0eefbfa875a7a"},
+    {file = "aiohttp-3.13.2-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:fa4dcb605c6f82a80c7f95713c2b11c3b8e9893b3ebd2bc9bde93165ed6107be"},
+    {file = "aiohttp-3.13.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:cf00e5db968c3f67eccd2778574cf64d8b27d95b237770aa32400bd7a1ca4f6c"},
+    {file = "aiohttp-3.13.2-cp314-cp314t-win32.whl", hash = "sha256:d23b5fe492b0805a50d3371e8a728a9134d8de5447dce4c885f5587294750734"},
+    {file = "aiohttp-3.13.2-cp314-cp314t-win_amd64.whl", hash = "sha256:ff0a7b0a82a7ab905cbda74006318d1b12e37c797eb1b0d4eb3e316cf47f658f"},
+    {file = "aiohttp-3.13.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7fbdf5ad6084f1940ce88933de34b62358d0f4a0b6ec097362dcd3e5a65a4989"},
+    {file = "aiohttp-3.13.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7c3a50345635a02db61792c85bb86daffac05330f6473d524f1a4e3ef9d0046d"},
+    {file = "aiohttp-3.13.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0e87dff73f46e969af38ab3f7cb75316a7c944e2e574ff7c933bc01b10def7f5"},
+    {file = "aiohttp-3.13.2-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2adebd4577724dcae085665f294cc57c8701ddd4d26140504db622b8d566d7aa"},
+    {file = "aiohttp-3.13.2-cp39-cp39-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e036a3a645fe92309ec34b918394bb377950cbb43039a97edae6c08db64b23e2"},
+    {file = "aiohttp-3.13.2-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:23ad365e30108c422d0b4428cf271156dd56790f6dd50d770b8e360e6c5ab2e6"},
+    {file = "aiohttp-3.13.2-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1f9b2c2d4b9d958b1f9ae0c984ec1dd6b6689e15c75045be8ccb4011426268ca"},
+    {file = "aiohttp-3.13.2-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3a92cf4b9bea33e15ecbaa5c59921be0f23222608143d025c989924f7e3e0c07"},
+    {file = "aiohttp-3.13.2-cp39-cp39-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:070599407f4954021509193404c4ac53153525a19531051661440644728ba9a7"},
+    {file = "aiohttp-3.13.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:29562998ec66f988d49fb83c9b01694fa927186b781463f376c5845c121e4e0b"},
+    {file = "aiohttp-3.13.2-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:4dd3db9d0f4ebca1d887d76f7cdbcd1116ac0d05a9221b9dad82c64a62578c4d"},
+    {file = "aiohttp-3.13.2-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:d7bc4b7f9c4921eba72677cd9fedd2308f4a4ca3e12fab58935295ad9ea98700"},
+    {file = "aiohttp-3.13.2-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:dacd50501cd017f8cccb328da0c90823511d70d24a323196826d923aad865901"},
+    {file = "aiohttp-3.13.2-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:8b2f1414f6a1e0683f212ec80e813f4abef94c739fd090b66c9adf9d2a05feac"},
+    {file = "aiohttp-3.13.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:04c3971421576ed24c191f610052bcb2f059e395bc2489dd99e397f9bc466329"},
+    {file = "aiohttp-3.13.2-cp39-cp39-win32.whl", hash = "sha256:9f377d0a924e5cc94dc620bc6366fc3e889586a7f18b748901cf016c916e2084"},
+    {file = "aiohttp-3.13.2-cp39-cp39-win_amd64.whl", hash = "sha256:9c705601e16c03466cb72011bd1af55d68fa65b045356d8f96c216e5f6db0fa5"},
+    {file = "aiohttp-3.13.2.tar.gz", hash = "sha256:40176a52c186aefef6eb3cad2cdd30cd06e3afbe88fe8ab2af9c0b90f228daca"},
+]
+
+[package.dependencies]
+aiohappyeyeballs = ">=2.5.0"
+aiosignal = ">=1.4.0"
+attrs = ">=17.3.0"
+frozenlist = ">=1.1.1"
+multidict = ">=4.5,<7.0"
+propcache = ">=0.2.0"
+yarl = ">=1.17.0,<2.0"
+
+[package.extras]
+speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns (>=3.3.0)", "backports.zstd ; platform_python_implementation == \"CPython\" and python_version < \"3.14\"", "brotlicffi ; platform_python_implementation != \"CPython\""]
+
+[[package]]
+name = "aiosignal"
+version = "1.4.0"
+description = "aiosignal: a list of registered asynchronous callbacks"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e"},
+    {file = "aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7"},
+]
+
+[package.dependencies]
+frozenlist = ">=1.1.0"
+typing-extensions = {version = ">=4.2", markers = "python_version < \"3.13\""}
+
 [[package]]
 name = "annotated-types"
 version = "0.7.0"
@@ -920,6 +1090,49 @@ files = [
 marshmallow = ">=3.18.0,<4.0.0"
 typing-inspect = ">=0.4.0,<1"
 
+[[package]]
+name = "datasets"
+version = "3.0.2"
+description = "HuggingFace community-driven open-source library of datasets"
+optional = false
+python-versions = ">=3.8.0"
+groups = ["main"]
+files = [
+    {file = "datasets-3.0.2-py3-none-any.whl", hash = "sha256:220bfbea0be9bf81d121bd2ac76fe4ef3f7defe0e8586ce1e7f66dcaaf69f88d"},
+    {file = "datasets-3.0.2.tar.gz", hash = "sha256:07204c389ce0491ef3ad50dd79966d3fd40422a12b831cf84a117323ac74fbc1"},
+]
+
+[package.dependencies]
+aiohttp = "*"
+dill = ">=0.3.0,<0.3.9"
+filelock = "*"
+fsspec = {version = ">=2023.1.0,<=2024.9.0", extras = ["http"]}
+huggingface-hub = ">=0.23.0"
+multiprocess = "<0.70.17"
+numpy = ">=1.17"
+packaging = "*"
+pandas = "*"
+pyarrow = ">=15.0.0"
+pyyaml = ">=5.1"
+requests = ">=2.32.2"
+tqdm = ">=4.66.3"
+xxhash = "*"
+
+[package.extras]
+audio = ["librosa", "soundfile (>=0.12.1)", "soxr (>=0.4.0) ; python_version >= \"3.9\""]
+benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"]
+dev = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0) ; python_version >= \"3.9\"", "sqlalchemy", "tensorflow (>=2.16.0) ; python_version >= \"3.10\"", "tensorflow (>=2.6.0)", "tensorflow (>=2.6.0) ; python_version < \"3.10\"", "tiktoken", "torch", "torch (>=2.0.0)", "torchdata", "transformers", "transformers (>=4.42.0)", "zstandard"]
+docs = ["s3fs", "tensorflow (>=2.6.0)", "torch", "transformers"]
+jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"]
+quality = ["ruff (>=0.3.0)"]
+s3 = ["s3fs"]
+tensorflow = ["tensorflow (>=2.6.0)"]
+tensorflow-gpu = ["tensorflow (>=2.6.0)"]
+tests = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0) ; python_version >= \"3.9\"", "sqlalchemy", "tensorflow (>=2.16.0) ; python_version >= \"3.10\"", "tensorflow (>=2.6.0) ; python_version < \"3.10\"", "tiktoken", "torch (>=2.0.0)", "torchdata", "transformers (>=4.42.0)", "zstandard"]
+tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0) ; python_version >= \"3.9\"", "sqlalchemy", "tiktoken", "torch (>=2.0.0)", "torchdata", "transformers (>=4.42.0)", "zstandard"]
+torch = ["torch"]
+vision = ["Pillow (>=9.4.0)"]
+
 [[package]]
 name = "deap"
 version = "1.3.3"
@@ -1045,6 +1258,22 @@ files = [
     {file = "dependencies-2.0.1.tar.gz", hash = "sha256:89f8262059ee6fb7a27f12bc72cec41e4a954a7b6f5ba0b4c902be1495e1cd12"},
 ]
 
+[[package]]
+name = "dill"
+version = "0.3.8"
+description = "serialize all of Python"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7"},
+    {file = "dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca"},
+]
+
+[package.extras]
+graph = ["objgraph (>=1.7.2)"]
+profile = ["gprof2dot (>=2022.7.29)"]
+
 [[package]]
 name = "distlib"
 version = "0.4.0"
@@ -1092,6 +1321,42 @@ files = [
     {file = "docutils-0.22.2.tar.gz", hash = "sha256:9fdb771707c8784c8f2728b67cb2c691305933d68137ef95a75db5f4dfbc213d"},
 ]
 
+[[package]]
+name = "evaluate"
+version = "0.4.6"
+description = "HuggingFace community-driven open-source library of evaluation"
+optional = false
+python-versions = ">=3.8.0"
+groups = ["main"]
+files = [
+    {file = "evaluate-0.4.6-py3-none-any.whl", hash = "sha256:bca85bc294f338377b7ac2f861e21c308b11b2a285f510d7d5394d5df437db29"},
+    {file = "evaluate-0.4.6.tar.gz", hash = "sha256:e07036ca12b3c24331f83ab787f21cc2dbf3631813a1631e63e40897c69a3f21"},
+]
+
+[package.dependencies]
+datasets = ">=2.0.0"
+dill = "*"
+fsspec = {version = ">=2021.05.0", extras = ["http"]}
+huggingface-hub = ">=0.7.0"
+multiprocess = "*"
+numpy = ">=1.17"
+packaging = "*"
+pandas = "*"
+requests = ">=2.19.0"
+tqdm = ">=4.62.1"
+xxhash = "*"
+
+[package.extras]
+dev = ["Werkzeug (>=1.0.1)", "absl-py", "accelerate", "bert-score (>=0.3.6)", "black (>=22.0,<23.0)", "cer (>=1.2.0)", "charcut (>=1.1.1)", "flake8 (>=3.8.3)", "isort (>=5.0.0)", "jiwer", "mauve-text", "nltk", "numpy (<2.0.0)", "pytest", "pytest-datadir", "pytest-xdist", "pyyaml (>=5.3.1)", "requests-file (>=1.5.1)", "rouge-score (>=0.1.2)", "sacrebleu", "sacremoses", "scikit-learn", "scipy (>=1.10.0)", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1,<=2.10)", "texttable (>=1.6.3)", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "torch", "transformers", "trectools", "unidecode (>=1.3.4)"]
+docs = ["s3fs"]
+evaluator = ["scipy (>=1.7.1)", "transformers"]
+quality = ["black (>=22.0,<23.0)", "flake8 (>=3.8.3)", "isort (>=5.0.0)", "pyyaml (>=5.3.1)"]
+template = ["cookiecutter", "gradio (>=3.0.0)"]
+tensorflow = ["tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)"]
+tensorflow-gpu = ["tensorflow-gpu (>=2.2.0,!=2.6.0,!=2.6.1)"]
+tests = ["Werkzeug (>=1.0.1)", "absl-py", "accelerate", "bert-score (>=0.3.6)", "cer (>=1.2.0)", "charcut (>=1.1.1)", "jiwer", "mauve-text", "nltk", "numpy (<2.0.0)", "pytest", "pytest-datadir", "pytest-xdist", "requests-file (>=1.5.1)", "rouge-score (>=0.1.2)", "sacrebleu", "sacremoses", "scikit-learn", "scipy (>=1.10.0)", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1,<=2.10)", "texttable (>=1.6.3)", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "torch", "transformers", "trectools", "unidecode (>=1.3.4)"]
+torch = ["torch"]
+
 [[package]]
 name = "execnet"
 version = "2.1.1"
@@ -1227,24 +1492,167 @@ files = [
     {file = "fqdn-1.5.1.tar.gz", hash = "sha256:105ed3677e767fb5ca086a0c1f4bb66ebc3c100be518f0e0d755d9eae164d89f"},
 ]
 
+[[package]]
+name = "frozenlist"
+version = "1.8.0"
+description = "A list-like structure which implements collections.abc.MutableSequence"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "frozenlist-1.8.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b37f6d31b3dcea7deb5e9696e529a6aa4a898adc33db82da12e4c60a7c4d2011"},
+    {file = "frozenlist-1.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ef2b7b394f208233e471abc541cc6991f907ffd47dc72584acee3147899d6565"},
+    {file = "frozenlist-1.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a88f062f072d1589b7b46e951698950e7da00442fc1cacbe17e19e025dc327ad"},
+    {file = "frozenlist-1.8.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f57fb59d9f385710aa7060e89410aeb5058b99e62f4d16b08b91986b9a2140c2"},
+    {file = "frozenlist-1.8.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:799345ab092bee59f01a915620b5d014698547afd011e691a208637312db9186"},
+    {file = "frozenlist-1.8.0-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c23c3ff005322a6e16f71bf8692fcf4d5a304aaafe1e262c98c6d4adc7be863e"},
+    {file = "frozenlist-1.8.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8a76ea0f0b9dfa06f254ee06053d93a600865b3274358ca48a352ce4f0798450"},
+    {file = "frozenlist-1.8.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c7366fe1418a6133d5aa824ee53d406550110984de7637d65a178010f759c6ef"},
+    {file = "frozenlist-1.8.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:13d23a45c4cebade99340c4165bd90eeb4a56c6d8a9d8aa49568cac19a6d0dc4"},
+    {file = "frozenlist-1.8.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:e4a3408834f65da56c83528fb52ce7911484f0d1eaf7b761fc66001db1646eff"},
+    {file = "frozenlist-1.8.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:42145cd2748ca39f32801dad54aeea10039da6f86e303659db90db1c4b614c8c"},
+    {file = "frozenlist-1.8.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:e2de870d16a7a53901e41b64ffdf26f2fbb8917b3e6ebf398098d72c5b20bd7f"},
+    {file = "frozenlist-1.8.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:20e63c9493d33ee48536600d1a5c95eefc870cd71e7ab037763d1fbb89cc51e7"},
+    {file = "frozenlist-1.8.0-cp310-cp310-win32.whl", hash = "sha256:adbeebaebae3526afc3c96fad434367cafbfd1b25d72369a9e5858453b1bb71a"},
+    {file = "frozenlist-1.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:667c3777ca571e5dbeb76f331562ff98b957431df140b54c85fd4d52eea8d8f6"},
+    {file = "frozenlist-1.8.0-cp310-cp310-win_arm64.whl", hash = "sha256:80f85f0a7cc86e7a54c46d99c9e1318ff01f4687c172ede30fd52d19d1da1c8e"},
+    {file = "frozenlist-1.8.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:09474e9831bc2b2199fad6da3c14c7b0fbdd377cce9d3d77131be28906cb7d84"},
+    {file = "frozenlist-1.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:17c883ab0ab67200b5f964d2b9ed6b00971917d5d8a92df149dc2c9779208ee9"},
+    {file = "frozenlist-1.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fa47e444b8ba08fffd1c18e8cdb9a75db1b6a27f17507522834ad13ed5922b93"},
+    {file = "frozenlist-1.8.0-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2552f44204b744fba866e573be4c1f9048d6a324dfe14475103fd51613eb1d1f"},
+    {file = "frozenlist-1.8.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:957e7c38f250991e48a9a73e6423db1bb9dd14e722a10f6b8bb8e16a0f55f695"},
+    {file = "frozenlist-1.8.0-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:8585e3bb2cdea02fc88ffa245069c36555557ad3609e83be0ec71f54fd4abb52"},
+    {file = "frozenlist-1.8.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:edee74874ce20a373d62dc28b0b18b93f645633c2943fd90ee9d898550770581"},
+    {file = "frozenlist-1.8.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c9a63152fe95756b85f31186bddf42e4c02c6321207fd6601a1c89ebac4fe567"},
+    {file = "frozenlist-1.8.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b6db2185db9be0a04fecf2f241c70b63b1a242e2805be291855078f2b404dd6b"},
+    {file = "frozenlist-1.8.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:f4be2e3d8bc8aabd566f8d5b8ba7ecc09249d74ba3c9ed52e54dc23a293f0b92"},
+    {file = "frozenlist-1.8.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:c8d1634419f39ea6f5c427ea2f90ca85126b54b50837f31497f3bf38266e853d"},
+    {file = "frozenlist-1.8.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:1a7fa382a4a223773ed64242dbe1c9c326ec09457e6b8428efb4118c685c3dfd"},
+    {file = "frozenlist-1.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:11847b53d722050808926e785df837353bd4d75f1d494377e59b23594d834967"},
+    {file = "frozenlist-1.8.0-cp311-cp311-win32.whl", hash = "sha256:27c6e8077956cf73eadd514be8fb04d77fc946a7fe9f7fe167648b0b9085cc25"},
+    {file = "frozenlist-1.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:ac913f8403b36a2c8610bbfd25b8013488533e71e62b4b4adce9c86c8cea905b"},
+    {file = "frozenlist-1.8.0-cp311-cp311-win_arm64.whl", hash = "sha256:d4d3214a0f8394edfa3e303136d0575eece0745ff2b47bd2cb2e66dd92d4351a"},
+    {file = "frozenlist-1.8.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:78f7b9e5d6f2fdb88cdde9440dc147259b62b9d3b019924def9f6478be254ac1"},
+    {file = "frozenlist-1.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:229bf37d2e4acdaf808fd3f06e854a4a7a3661e871b10dc1f8f1896a3b05f18b"},
+    {file = "frozenlist-1.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f833670942247a14eafbb675458b4e61c82e002a148f49e68257b79296e865c4"},
+    {file = "frozenlist-1.8.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:494a5952b1c597ba44e0e78113a7266e656b9794eec897b19ead706bd7074383"},
+    {file = "frozenlist-1.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96f423a119f4777a4a056b66ce11527366a8bb92f54e541ade21f2374433f6d4"},
+    {file = "frozenlist-1.8.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3462dd9475af2025c31cc61be6652dfa25cbfb56cbbf52f4ccfe029f38decaf8"},
+    {file = "frozenlist-1.8.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c4c800524c9cd9bac5166cd6f55285957fcfc907db323e193f2afcd4d9abd69b"},
+    {file = "frozenlist-1.8.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d6a5df73acd3399d893dafc71663ad22534b5aa4f94e8a2fabfe856c3c1b6a52"},
+    {file = "frozenlist-1.8.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:405e8fe955c2280ce66428b3ca55e12b3c4e9c336fb2103a4937e891c69a4a29"},
+    {file = "frozenlist-1.8.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:908bd3f6439f2fef9e85031b59fd4f1297af54415fb60e4254a95f75b3cab3f3"},
+    {file = "frozenlist-1.8.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:294e487f9ec720bd8ffcebc99d575f7eff3568a08a253d1ee1a0378754b74143"},
+    {file = "frozenlist-1.8.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:74c51543498289c0c43656701be6b077f4b265868fa7f8a8859c197006efb608"},
+    {file = "frozenlist-1.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:776f352e8329135506a1d6bf16ac3f87bc25b28e765949282dcc627af36123aa"},
+    {file = "frozenlist-1.8.0-cp312-cp312-win32.whl", hash = "sha256:433403ae80709741ce34038da08511d4a77062aa924baf411ef73d1146e74faf"},
+    {file = "frozenlist-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:34187385b08f866104f0c0617404c8eb08165ab1272e884abc89c112e9c00746"},
+    {file = "frozenlist-1.8.0-cp312-cp312-win_arm64.whl", hash = "sha256:fe3c58d2f5db5fbd18c2987cba06d51b0529f52bc3a6cdc33d3f4eab725104bd"},
+    {file = "frozenlist-1.8.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8d92f1a84bb12d9e56f818b3a746f3efba93c1b63c8387a73dde655e1e42282a"},
+    {file = "frozenlist-1.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:96153e77a591c8adc2ee805756c61f59fef4cf4073a9275ee86fe8cba41241f7"},
+    {file = "frozenlist-1.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f21f00a91358803399890ab167098c131ec2ddd5f8f5fd5fe9c9f2c6fcd91e40"},
+    {file = "frozenlist-1.8.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fb30f9626572a76dfe4293c7194a09fb1fe93ba94c7d4f720dfae3b646b45027"},
+    {file = "frozenlist-1.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eaa352d7047a31d87dafcacbabe89df0aa506abb5b1b85a2fb91bc3faa02d822"},
+    {file = "frozenlist-1.8.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:03ae967b4e297f58f8c774c7eabcce57fe3c2434817d4385c50661845a058121"},
+    {file = "frozenlist-1.8.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f6292f1de555ffcc675941d65fffffb0a5bcd992905015f85d0592201793e0e5"},
+    {file = "frozenlist-1.8.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:29548f9b5b5e3460ce7378144c3010363d8035cea44bc0bf02d57f5a685e084e"},
+    {file = "frozenlist-1.8.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ec3cc8c5d4084591b4237c0a272cc4f50a5b03396a47d9caaf76f5d7b38a4f11"},
+    {file = "frozenlist-1.8.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:517279f58009d0b1f2e7c1b130b377a349405da3f7621ed6bfae50b10adf20c1"},
+    {file = "frozenlist-1.8.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:db1e72ede2d0d7ccb213f218df6a078a9c09a7de257c2fe8fcef16d5925230b1"},
+    {file = "frozenlist-1.8.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:b4dec9482a65c54a5044486847b8a66bf10c9cb4926d42927ec4e8fd5db7fed8"},
+    {file = "frozenlist-1.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:21900c48ae04d13d416f0e1e0c4d81f7931f73a9dfa0b7a8746fb2fe7dd970ed"},
+    {file = "frozenlist-1.8.0-cp313-cp313-win32.whl", hash = "sha256:8b7b94a067d1c504ee0b16def57ad5738701e4ba10cec90529f13fa03c833496"},
+    {file = "frozenlist-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:878be833caa6a3821caf85eb39c5ba92d28e85df26d57afb06b35b2efd937231"},
+    {file = "frozenlist-1.8.0-cp313-cp313-win_arm64.whl", hash = "sha256:44389d135b3ff43ba8cc89ff7f51f5a0bb6b63d829c8300f79a2fe4fe61bcc62"},
+    {file = "frozenlist-1.8.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:e25ac20a2ef37e91c1b39938b591457666a0fa835c7783c3a8f33ea42870db94"},
+    {file = "frozenlist-1.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:07cdca25a91a4386d2e76ad992916a85038a9b97561bf7a3fd12d5d9ce31870c"},
+    {file = "frozenlist-1.8.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:4e0c11f2cc6717e0a741f84a527c52616140741cd812a50422f83dc31749fb52"},
+    {file = "frozenlist-1.8.0-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b3210649ee28062ea6099cfda39e147fa1bc039583c8ee4481cb7811e2448c51"},
+    {file = "frozenlist-1.8.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:581ef5194c48035a7de2aefc72ac6539823bb71508189e5de01d60c9dcd5fa65"},
+    {file = "frozenlist-1.8.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3ef2d026f16a2b1866e1d86fc4e1291e1ed8a387b2c333809419a2f8b3a77b82"},
+    {file = "frozenlist-1.8.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5500ef82073f599ac84d888e3a8c1f77ac831183244bfd7f11eaa0289fb30714"},
+    {file = "frozenlist-1.8.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:50066c3997d0091c411a66e710f4e11752251e6d2d73d70d8d5d4c76442a199d"},
+    {file = "frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5c1c8e78426e59b3f8005e9b19f6ff46e5845895adbde20ece9218319eca6506"},
+    {file = "frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:eefdba20de0d938cec6a89bd4d70f346a03108a19b9df4248d3cf0d88f1b0f51"},
+    {file = "frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:cf253e0e1c3ceb4aaff6df637ce033ff6535fb8c70a764a8f46aafd3d6ab798e"},
+    {file = "frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:032efa2674356903cd0261c4317a561a6850f3ac864a63fc1583147fb05a79b0"},
+    {file = "frozenlist-1.8.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6da155091429aeba16851ecb10a9104a108bcd32f6c1642867eadaee401c1c41"},
+    {file = "frozenlist-1.8.0-cp313-cp313t-win32.whl", hash = "sha256:0f96534f8bfebc1a394209427d0f8a63d343c9779cda6fc25e8e121b5fd8555b"},
+    {file = "frozenlist-1.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5d63a068f978fc69421fb0e6eb91a9603187527c86b7cd3f534a5b77a592b888"},
+    {file = "frozenlist-1.8.0-cp313-cp313t-win_arm64.whl", hash = "sha256:bf0a7e10b077bf5fb9380ad3ae8ce20ef919a6ad93b4552896419ac7e1d8e042"},
+    {file = "frozenlist-1.8.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:cee686f1f4cadeb2136007ddedd0aaf928ab95216e7691c63e50a8ec066336d0"},
+    {file = "frozenlist-1.8.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:119fb2a1bd47307e899c2fac7f28e85b9a543864df47aa7ec9d3c1b4545f096f"},
+    {file = "frozenlist-1.8.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4970ece02dbc8c3a92fcc5228e36a3e933a01a999f7094ff7c23fbd2beeaa67c"},
+    {file = "frozenlist-1.8.0-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:cba69cb73723c3f329622e34bdbf5ce1f80c21c290ff04256cff1cd3c2036ed2"},
+    {file = "frozenlist-1.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:778a11b15673f6f1df23d9586f83c4846c471a8af693a22e066508b77d201ec8"},
+    {file = "frozenlist-1.8.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0325024fe97f94c41c08872db482cf8ac4800d80e79222c6b0b7b162d5b13686"},
+    {file = "frozenlist-1.8.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:97260ff46b207a82a7567b581ab4190bd4dfa09f4db8a8b49d1a958f6aa4940e"},
+    {file = "frozenlist-1.8.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:54b2077180eb7f83dd52c40b2750d0a9f175e06a42e3213ce047219de902717a"},
+    {file = "frozenlist-1.8.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2f05983daecab868a31e1da44462873306d3cbfd76d1f0b5b69c473d21dbb128"},
+    {file = "frozenlist-1.8.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:33f48f51a446114bc5d251fb2954ab0164d5be02ad3382abcbfe07e2531d650f"},
+    {file = "frozenlist-1.8.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:154e55ec0655291b5dd1b8731c637ecdb50975a2ae70c606d100750a540082f7"},
+    {file = "frozenlist-1.8.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:4314debad13beb564b708b4a496020e5306c7333fa9a3ab90374169a20ffab30"},
+    {file = "frozenlist-1.8.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:073f8bf8becba60aa931eb3bc420b217bb7d5b8f4750e6f8b3be7f3da85d38b7"},
+    {file = "frozenlist-1.8.0-cp314-cp314-win32.whl", hash = "sha256:bac9c42ba2ac65ddc115d930c78d24ab8d4f465fd3fc473cdedfccadb9429806"},
+    {file = "frozenlist-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:3e0761f4d1a44f1d1a47996511752cf3dcec5bbdd9cc2b4fe595caf97754b7a0"},
+    {file = "frozenlist-1.8.0-cp314-cp314-win_arm64.whl", hash = "sha256:d1eaff1d00c7751b7c6662e9c5ba6eb2c17a2306ba5e2a37f24ddf3cc953402b"},
+    {file = "frozenlist-1.8.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:d3bb933317c52d7ea5004a1c442eef86f426886fba134ef8cf4226ea6ee1821d"},
+    {file = "frozenlist-1.8.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:8009897cdef112072f93a0efdce29cd819e717fd2f649ee3016efd3cd885a7ed"},
+    {file = "frozenlist-1.8.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2c5dcbbc55383e5883246d11fd179782a9d07a986c40f49abe89ddf865913930"},
+    {file = "frozenlist-1.8.0-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:39ecbc32f1390387d2aa4f5a995e465e9e2f79ba3adcac92d68e3e0afae6657c"},
+    {file = "frozenlist-1.8.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92db2bf818d5cc8d9c1f1fc56b897662e24ea5adb36ad1f1d82875bd64e03c24"},
+    {file = "frozenlist-1.8.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2dc43a022e555de94c3b68a4ef0b11c4f747d12c024a520c7101709a2144fb37"},
+    {file = "frozenlist-1.8.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cb89a7f2de3602cfed448095bab3f178399646ab7c61454315089787df07733a"},
+    {file = "frozenlist-1.8.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:33139dc858c580ea50e7e60a1b0ea003efa1fd42e6ec7fdbad78fff65fad2fd2"},
+    {file = "frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:168c0969a329b416119507ba30b9ea13688fafffac1b7822802537569a1cb0ef"},
+    {file = "frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:28bd570e8e189d7f7b001966435f9dac6718324b5be2990ac496cf1ea9ddb7fe"},
+    {file = "frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:b2a095d45c5d46e5e79ba1e5b9cb787f541a8dee0433836cea4b96a2c439dcd8"},
+    {file = "frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:eab8145831a0d56ec9c4139b6c3e594c7a83c2c8be25d5bcf2d86136a532287a"},
+    {file = "frozenlist-1.8.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:974b28cf63cc99dfb2188d8d222bc6843656188164848c4f679e63dae4b0708e"},
+    {file = "frozenlist-1.8.0-cp314-cp314t-win32.whl", hash = "sha256:342c97bf697ac5480c0a7ec73cd700ecfa5a8a40ac923bd035484616efecc2df"},
+    {file = "frozenlist-1.8.0-cp314-cp314t-win_amd64.whl", hash = "sha256:06be8f67f39c8b1dc671f5d83aaefd3358ae5cdcf8314552c57e7ed3e6475bdd"},
+    {file = "frozenlist-1.8.0-cp314-cp314t-win_arm64.whl", hash = "sha256:102e6314ca4da683dca92e3b1355490fed5f313b768500084fbe6371fddfdb79"},
+    {file = "frozenlist-1.8.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:d8b7138e5cd0647e4523d6685b0eac5d4be9a184ae9634492f25c6eb38c12a47"},
+    {file = "frozenlist-1.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a6483e309ca809f1efd154b4d37dc6d9f61037d6c6a81c2dc7a15cb22c8c5dca"},
+    {file = "frozenlist-1.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1b9290cf81e95e93fdf90548ce9d3c1211cf574b8e3f4b3b7cb0537cf2227068"},
+    {file = "frozenlist-1.8.0-cp39-cp39-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:59a6a5876ca59d1b63af8cd5e7ffffb024c3dc1e9cf9301b21a2e76286505c95"},
+    {file = "frozenlist-1.8.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6dc4126390929823e2d2d9dc79ab4046ed74680360fc5f38b585c12c66cdf459"},
+    {file = "frozenlist-1.8.0-cp39-cp39-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:332db6b2563333c5671fecacd085141b5800cb866be16d5e3eb15a2086476675"},
+    {file = "frozenlist-1.8.0-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:9ff15928d62a0b80bb875655c39bf517938c7d589554cbd2669be42d97c2cb61"},
+    {file = "frozenlist-1.8.0-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7bf6cdf8e07c8151fba6fe85735441240ec7f619f935a5205953d58009aef8c6"},
+    {file = "frozenlist-1.8.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:48e6d3f4ec5c7273dfe83ff27c91083c6c9065af655dc2684d2c200c94308bb5"},
+    {file = "frozenlist-1.8.0-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:1a7607e17ad33361677adcd1443edf6f5da0ce5e5377b798fba20fae194825f3"},
+    {file = "frozenlist-1.8.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:5a3a935c3a4e89c733303a2d5a7c257ea44af3a56c8202df486b7f5de40f37e1"},
+    {file = "frozenlist-1.8.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:940d4a017dbfed9daf46a3b086e1d2167e7012ee297fef9e1c545c4d022f5178"},
+    {file = "frozenlist-1.8.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:b9be22a69a014bc47e78072d0ecae716f5eb56c15238acca0f43d6eb8e4a5bda"},
+    {file = "frozenlist-1.8.0-cp39-cp39-win32.whl", hash = "sha256:1aa77cb5697069af47472e39612976ed05343ff2e84a3dcf15437b232cbfd087"},
+    {file = "frozenlist-1.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:7398c222d1d405e796970320036b1b563892b65809d9e5261487bb2c7f7b5c6a"},
+    {file = "frozenlist-1.8.0-cp39-cp39-win_arm64.whl", hash = "sha256:b4f3b365f31c6cd4af24545ca0a244a53688cad8834e32f56831c4923b50a103"},
+    {file = "frozenlist-1.8.0-py3-none-any.whl", hash = "sha256:0c18a16eab41e82c295618a77502e17b195883241c563b00f0aa5106fc4eaa0d"},
+    {file = "frozenlist-1.8.0.tar.gz", hash = "sha256:3ede829ed8d842f6cd48fc7081d7a41001a56f1f38603f9d49bf3020d59a31ad"},
+]
+
 [[package]]
 name = "fsspec"
-version = "2025.9.0"
+version = "2024.9.0"
 description = "File-system specification"
 optional = false
-python-versions = ">=3.9"
+python-versions = ">=3.8"
 groups = ["main"]
 files = [
-    {file = "fsspec-2025.9.0-py3-none-any.whl", hash = "sha256:530dc2a2af60a414a832059574df4a6e10cce927f6f4a78209390fe38955cfb7"},
-    {file = "fsspec-2025.9.0.tar.gz", hash = "sha256:19fd429483d25d28b65ec68f9f4adc16c17ea2c7c7bf54ec61360d478fb19c19"},
+    {file = "fsspec-2024.9.0-py3-none-any.whl", hash = "sha256:a0947d552d8a6efa72cc2c730b12c41d043509156966cca4fb157b0f2a0c574b"},
+    {file = "fsspec-2024.9.0.tar.gz", hash = "sha256:4b0afb90c2f21832df142f292649035d80b421f60a9e1c027802e5a0da2b04e8"},
 ]
 
+[package.dependencies]
+aiohttp = {version = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1", optional = true, markers = "extra == \"http\""}
+
 [package.extras]
 abfs = ["adlfs"]
 adl = ["adlfs"]
 arrow = ["pyarrow (>=1)"]
 dask = ["dask", "distributed"]
-dev = ["pre-commit", "ruff (>=0.5)"]
+dev = ["pre-commit", "ruff"]
 doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"]
 dropbox = ["dropbox", "dropboxdrivefs", "requests"]
 full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
@@ -1263,8 +1671,8 @@ sftp = ["paramiko"]
 smb = ["smbprotocol"]
 ssh = ["paramiko"]
 test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"]
-test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"]
-test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard ; python_version < \"3.14\""]
+test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask-expr", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"]
+test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"]
 tqdm = ["tqdm"]
 
 [[package]]
@@ -2719,6 +3127,187 @@ docs = ["sphinx"]
 gmpy = ["gmpy2 (>=2.1.0a4) ; platform_python_implementation != \"PyPy\""]
 tests = ["pytest (>=4.6)"]
 
+[[package]]
+name = "multidict"
+version = "6.7.0"
+description = "multidict implementation"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "multidict-6.7.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:9f474ad5acda359c8758c8accc22032c6abe6dc87a8be2440d097785e27a9349"},
+    {file = "multidict-6.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4b7a9db5a870f780220e931d0002bbfd88fb53aceb6293251e2c839415c1b20e"},
+    {file = "multidict-6.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:03ca744319864e92721195fa28c7a3b2bc7b686246b35e4078c1e4d0eb5466d3"},
+    {file = "multidict-6.7.0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f0e77e3c0008bc9316e662624535b88d360c3a5d3f81e15cf12c139a75250046"},
+    {file = "multidict-6.7.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:08325c9e5367aa379a3496aa9a022fe8837ff22e00b94db256d3a1378c76ab32"},
+    {file = "multidict-6.7.0-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e2862408c99f84aa571ab462d25236ef9cb12a602ea959ba9c9009a54902fc73"},
+    {file = "multidict-6.7.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4d72a9a2d885f5c208b0cb91ff2ed43636bb7e345ec839ff64708e04f69a13cc"},
+    {file = "multidict-6.7.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:478cc36476687bac1514d651cbbaa94b86b0732fb6855c60c673794c7dd2da62"},
+    {file = "multidict-6.7.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6843b28b0364dc605f21481c90fadb5f60d9123b442eb8a726bb74feef588a84"},
+    {file = "multidict-6.7.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:23bfeee5316266e5ee2d625df2d2c602b829435fc3a235c2ba2131495706e4a0"},
+    {file = "multidict-6.7.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:680878b9f3d45c31e1f730eef731f9b0bc1da456155688c6745ee84eb818e90e"},
+    {file = "multidict-6.7.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:eb866162ef2f45063acc7a53a88ef6fe8bf121d45c30ea3c9cd87ce7e191a8d4"},
+    {file = "multidict-6.7.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:df0e3bf7993bdbeca5ac25aa859cf40d39019e015c9c91809ba7093967f7a648"},
+    {file = "multidict-6.7.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:661709cdcd919a2ece2234f9bae7174e5220c80b034585d7d8a755632d3e2111"},
+    {file = "multidict-6.7.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:096f52730c3fb8ed419db2d44391932b63891b2c5ed14850a7e215c0ba9ade36"},
+    {file = "multidict-6.7.0-cp310-cp310-win32.whl", hash = "sha256:afa8a2978ec65d2336305550535c9c4ff50ee527914328c8677b3973ade52b85"},
+    {file = "multidict-6.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:b15b3afff74f707b9275d5ba6a91ae8f6429c3ffb29bbfd216b0b375a56f13d7"},
+    {file = "multidict-6.7.0-cp310-cp310-win_arm64.whl", hash = "sha256:4b73189894398d59131a66ff157837b1fafea9974be486d036bb3d32331fdbf0"},
+    {file = "multidict-6.7.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4d409aa42a94c0b3fa617708ef5276dfe81012ba6753a0370fcc9d0195d0a1fc"},
+    {file = "multidict-6.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:14c9e076eede3b54c636f8ce1c9c252b5f057c62131211f0ceeec273810c9721"},
+    {file = "multidict-6.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4c09703000a9d0fa3c3404b27041e574cc7f4df4c6563873246d0e11812a94b6"},
+    {file = "multidict-6.7.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a265acbb7bb33a3a2d626afbe756371dce0279e7b17f4f4eda406459c2b5ff1c"},
+    {file = "multidict-6.7.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51cb455de290ae462593e5b1cb1118c5c22ea7f0d3620d9940bf695cea5a4bd7"},
+    {file = "multidict-6.7.0-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:db99677b4457c7a5c5a949353e125ba72d62b35f74e26da141530fbb012218a7"},
+    {file = "multidict-6.7.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f470f68adc395e0183b92a2f4689264d1ea4b40504a24d9882c27375e6662bb9"},
+    {file = "multidict-6.7.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0db4956f82723cc1c270de9c6e799b4c341d327762ec78ef82bb962f79cc07d8"},
+    {file = "multidict-6.7.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e56d780c238f9e1ae66a22d2adf8d16f485381878250db8d496623cd38b22bd"},
+    {file = "multidict-6.7.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9d14baca2ee12c1a64740d4531356ba50b82543017f3ad6de0deb943c5979abb"},
+    {file = "multidict-6.7.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:295a92a76188917c7f99cda95858c822f9e4aae5824246bba9b6b44004ddd0a6"},
+    {file = "multidict-6.7.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:39f1719f57adbb767ef592a50ae5ebb794220d1188f9ca93de471336401c34d2"},
+    {file = "multidict-6.7.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:0a13fb8e748dfc94749f622de065dd5c1def7e0d2216dba72b1d8069a389c6ff"},
+    {file = "multidict-6.7.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:e3aa16de190d29a0ea1b48253c57d99a68492c8dd8948638073ab9e74dc9410b"},
+    {file = "multidict-6.7.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a048ce45dcdaaf1defb76b2e684f997fb5abf74437b6cb7b22ddad934a964e34"},
+    {file = "multidict-6.7.0-cp311-cp311-win32.whl", hash = "sha256:a90af66facec4cebe4181b9e62a68be65e45ac9b52b67de9eec118701856e7ff"},
+    {file = "multidict-6.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:95b5ffa4349df2887518bb839409bcf22caa72d82beec453216802f475b23c81"},
+    {file = "multidict-6.7.0-cp311-cp311-win_arm64.whl", hash = "sha256:329aa225b085b6f004a4955271a7ba9f1087e39dcb7e65f6284a988264a63912"},
+    {file = "multidict-6.7.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:8a3862568a36d26e650a19bb5cbbba14b71789032aebc0423f8cc5f150730184"},
+    {file = "multidict-6.7.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:960c60b5849b9b4f9dcc9bea6e3626143c252c74113df2c1540aebce70209b45"},
+    {file = "multidict-6.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2049be98fb57a31b4ccf870bf377af2504d4ae35646a19037ec271e4c07998aa"},
+    {file = "multidict-6.7.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:0934f3843a1860dd465d38895c17fce1f1cb37295149ab05cd1b9a03afacb2a7"},
+    {file = "multidict-6.7.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b3e34f3a1b8131ba06f1a73adab24f30934d148afcd5f5de9a73565a4404384e"},
+    {file = "multidict-6.7.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:efbb54e98446892590dc2458c19c10344ee9a883a79b5cec4bc34d6656e8d546"},
+    {file = "multidict-6.7.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a35c5fc61d4f51eb045061e7967cfe3123d622cd500e8868e7c0c592a09fedc4"},
+    {file = "multidict-6.7.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:29fe6740ebccba4175af1b9b87bf553e9c15cd5868ee967e010efcf94e4fd0f1"},
+    {file = "multidict-6.7.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:123e2a72e20537add2f33a79e605f6191fba2afda4cbb876e35c1a7074298a7d"},
+    {file = "multidict-6.7.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b284e319754366c1aee2267a2036248b24eeb17ecd5dc16022095e747f2f4304"},
+    {file = "multidict-6.7.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:803d685de7be4303b5a657b76e2f6d1240e7e0a8aa2968ad5811fa2285553a12"},
+    {file = "multidict-6.7.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c04a328260dfd5db8c39538f999f02779012268f54614902d0afc775d44e0a62"},
+    {file = "multidict-6.7.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8a19cdb57cd3df4cd865849d93ee14920fb97224300c88501f16ecfa2604b4e0"},
+    {file = "multidict-6.7.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9b2fd74c52accced7e75de26023b7dccee62511a600e62311b918ec5c168fc2a"},
+    {file = "multidict-6.7.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3e8bfdd0e487acf992407a140d2589fe598238eaeffa3da8448d63a63cd363f8"},
+    {file = "multidict-6.7.0-cp312-cp312-win32.whl", hash = "sha256:dd32a49400a2c3d52088e120ee00c1e3576cbff7e10b98467962c74fdb762ed4"},
+    {file = "multidict-6.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:92abb658ef2d7ef22ac9f8bb88e8b6c3e571671534e029359b6d9e845923eb1b"},
+    {file = "multidict-6.7.0-cp312-cp312-win_arm64.whl", hash = "sha256:490dab541a6a642ce1a9d61a4781656b346a55c13038f0b1244653828e3a83ec"},
+    {file = "multidict-6.7.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:bee7c0588aa0076ce77c0ea5d19a68d76ad81fcd9fe8501003b9a24f9d4000f6"},
+    {file = "multidict-6.7.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7ef6b61cad77091056ce0e7ce69814ef72afacb150b7ac6a3e9470def2198159"},
+    {file = "multidict-6.7.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9c0359b1ec12b1d6849c59f9d319610b7f20ef990a6d454ab151aa0e3b9f78ca"},
+    {file = "multidict-6.7.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cd240939f71c64bd658f186330603aac1a9a81bf6273f523fca63673cb7378a8"},
+    {file = "multidict-6.7.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a60a4d75718a5efa473ebd5ab685786ba0c67b8381f781d1be14da49f1a2dc60"},
+    {file = "multidict-6.7.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:53a42d364f323275126aff81fb67c5ca1b7a04fda0546245730a55c8c5f24bc4"},
+    {file = "multidict-6.7.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3b29b980d0ddbecb736735ee5bef69bb2ddca56eff603c86f3f29a1128299b4f"},
+    {file = "multidict-6.7.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f8a93b1c0ed2d04b97a5e9336fd2d33371b9a6e29ab7dd6503d63407c20ffbaf"},
+    {file = "multidict-6.7.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ff96e8815eecacc6645da76c413eb3b3d34cfca256c70b16b286a687d013c32"},
+    {file = "multidict-6.7.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7516c579652f6a6be0e266aec0acd0db80829ca305c3d771ed898538804c2036"},
+    {file = "multidict-6.7.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:040f393368e63fb0f3330e70c26bfd336656bed925e5cbe17c9da839a6ab13ec"},
+    {file = "multidict-6.7.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b3bc26a951007b1057a1c543af845f1c7e3e71cc240ed1ace7bf4484aa99196e"},
+    {file = "multidict-6.7.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:7b022717c748dd1992a83e219587aabe45980d88969f01b316e78683e6285f64"},
+    {file = "multidict-6.7.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:9600082733859f00d79dee64effc7aef1beb26adb297416a4ad2116fd61374bd"},
+    {file = "multidict-6.7.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:94218fcec4d72bc61df51c198d098ce2b378e0ccbac41ddbed5ef44092913288"},
+    {file = "multidict-6.7.0-cp313-cp313-win32.whl", hash = "sha256:a37bd74c3fa9d00be2d7b8eca074dc56bd8077ddd2917a839bd989612671ed17"},
+    {file = "multidict-6.7.0-cp313-cp313-win_amd64.whl", hash = "sha256:30d193c6cc6d559db42b6bcec8a5d395d34d60c9877a0b71ecd7c204fcf15390"},
+    {file = "multidict-6.7.0-cp313-cp313-win_arm64.whl", hash = "sha256:ea3334cabe4d41b7ccd01e4d349828678794edbc2d3ae97fc162a3312095092e"},
+    {file = "multidict-6.7.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:ad9ce259f50abd98a1ca0aa6e490b58c316a0fce0617f609723e40804add2c00"},
+    {file = "multidict-6.7.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:07f5594ac6d084cbb5de2df218d78baf55ef150b91f0ff8a21cc7a2e3a5a58eb"},
+    {file = "multidict-6.7.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:0591b48acf279821a579282444814a2d8d0af624ae0bc600aa4d1b920b6e924b"},
+    {file = "multidict-6.7.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:749a72584761531d2b9467cfbdfd29487ee21124c304c4b6cb760d8777b27f9c"},
+    {file = "multidict-6.7.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b4c3d199f953acd5b446bf7c0de1fe25d94e09e79086f8dc2f48a11a129cdf1"},
+    {file = "multidict-6.7.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:9fb0211dfc3b51efea2f349ec92c114d7754dd62c01f81c3e32b765b70c45c9b"},
+    {file = "multidict-6.7.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a027ec240fe73a8d6281872690b988eed307cd7d91b23998ff35ff577ca688b5"},
+    {file = "multidict-6.7.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1d964afecdf3a8288789df2f5751dc0a8261138c3768d9af117ed384e538fad"},
+    {file = "multidict-6.7.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:caf53b15b1b7df9fbd0709aa01409000a2b4dd03a5f6f5cc548183c7c8f8b63c"},
+    {file = "multidict-6.7.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:654030da3197d927f05a536a66186070e98765aa5142794c9904555d3a9d8fb5"},
+    {file = "multidict-6.7.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:2090d3718829d1e484706a2f525e50c892237b2bf9b17a79b059cb98cddc2f10"},
+    {file = "multidict-6.7.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2d2cfeec3f6f45651b3d408c4acec0ebf3daa9bc8a112a084206f5db5d05b754"},
+    {file = "multidict-6.7.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:4ef089f985b8c194d341eb2c24ae6e7408c9a0e2e5658699c92f497437d88c3c"},
+    {file = "multidict-6.7.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e93a0617cd16998784bf4414c7e40f17a35d2350e5c6f0bd900d3a8e02bd3762"},
+    {file = "multidict-6.7.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f0feece2ef8ebc42ed9e2e8c78fc4aa3cf455733b507c09ef7406364c94376c6"},
+    {file = "multidict-6.7.0-cp313-cp313t-win32.whl", hash = "sha256:19a1d55338ec1be74ef62440ca9e04a2f001a04d0cc49a4983dc320ff0f3212d"},
+    {file = "multidict-6.7.0-cp313-cp313t-win_amd64.whl", hash = "sha256:3da4fb467498df97e986af166b12d01f05d2e04f978a9c1c680ea1988e0bc4b6"},
+    {file = "multidict-6.7.0-cp313-cp313t-win_arm64.whl", hash = "sha256:b4121773c49a0776461f4a904cdf6264c88e42218aaa8407e803ca8025872792"},
+    {file = "multidict-6.7.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3bab1e4aff7adaa34410f93b1f8e57c4b36b9af0426a76003f441ee1d3c7e842"},
+    {file = "multidict-6.7.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b8512bac933afc3e45fb2b18da8e59b78d4f408399a960339598374d4ae3b56b"},
+    {file = "multidict-6.7.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:79dcf9e477bc65414ebfea98ffd013cb39552b5ecd62908752e0e413d6d06e38"},
+    {file = "multidict-6.7.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:31bae522710064b5cbeddaf2e9f32b1abab70ac6ac91d42572502299e9953128"},
+    {file = "multidict-6.7.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a0df7ff02397bb63e2fd22af2c87dfa39e8c7f12947bc524dbdc528282c7e34"},
+    {file = "multidict-6.7.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7a0222514e8e4c514660e182d5156a415c13ef0aabbd71682fc714e327b95e99"},
+    {file = "multidict-6.7.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2397ab4daaf2698eb51a76721e98db21ce4f52339e535725de03ea962b5a3202"},
+    {file = "multidict-6.7.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8891681594162635948a636c9fe0ff21746aeb3dd5463f6e25d9bea3a8a39ca1"},
+    {file = "multidict-6.7.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18706cc31dbf402a7945916dd5cddf160251b6dab8a2c5f3d6d5a55949f676b3"},
+    {file = "multidict-6.7.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:f844a1bbf1d207dd311a56f383f7eda2d0e134921d45751842d8235e7778965d"},
+    {file = "multidict-6.7.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:d4393e3581e84e5645506923816b9cc81f5609a778c7e7534054091acc64d1c6"},
+    {file = "multidict-6.7.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:fbd18dc82d7bf274b37aa48d664534330af744e03bccf696d6f4c6042e7d19e7"},
+    {file = "multidict-6.7.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:b6234e14f9314731ec45c42fc4554b88133ad53a09092cc48a88e771c125dadb"},
+    {file = "multidict-6.7.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:08d4379f9744d8f78d98c8673c06e202ffa88296f009c71bbafe8a6bf847d01f"},
+    {file = "multidict-6.7.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9fe04da3f79387f450fd0061d4dd2e45a72749d31bf634aecc9e27f24fdc4b3f"},
+    {file = "multidict-6.7.0-cp314-cp314-win32.whl", hash = "sha256:fbafe31d191dfa7c4c51f7a6149c9fb7e914dcf9ffead27dcfd9f1ae382b3885"},
+    {file = "multidict-6.7.0-cp314-cp314-win_amd64.whl", hash = "sha256:2f67396ec0310764b9222a1728ced1ab638f61aadc6226f17a71dd9324f9a99c"},
+    {file = "multidict-6.7.0-cp314-cp314-win_arm64.whl", hash = "sha256:ba672b26069957ee369cfa7fc180dde1fc6f176eaf1e6beaf61fbebbd3d9c000"},
+    {file = "multidict-6.7.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:c1dcc7524066fa918c6a27d61444d4ee7900ec635779058571f70d042d86ed63"},
+    {file = "multidict-6.7.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:27e0b36c2d388dc7b6ced3406671b401e84ad7eb0656b8f3a2f46ed0ce483718"},
+    {file = "multidict-6.7.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:2a7baa46a22e77f0988e3b23d4ede5513ebec1929e34ee9495be535662c0dfe2"},
+    {file = "multidict-6.7.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7bf77f54997a9166a2f5675d1201520586439424c2511723a7312bdb4bcc034e"},
+    {file = "multidict-6.7.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e011555abada53f1578d63389610ac8a5400fc70ce71156b0aa30d326f1a5064"},
+    {file = "multidict-6.7.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:28b37063541b897fd6a318007373930a75ca6d6ac7c940dbe14731ffdd8d498e"},
+    {file = "multidict-6.7.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:05047ada7a2fde2631a0ed706f1fd68b169a681dfe5e4cf0f8e4cb6618bbc2cd"},
+    {file = "multidict-6.7.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:716133f7d1d946a4e1b91b1756b23c088881e70ff180c24e864c26192ad7534a"},
+    {file = "multidict-6.7.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d1bed1b467ef657f2a0ae62844a607909ef1c6889562de5e1d505f74457d0b96"},
+    {file = "multidict-6.7.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:ca43bdfa5d37bd6aee89d85e1d0831fb86e25541be7e9d376ead1b28974f8e5e"},
+    {file = "multidict-6.7.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:44b546bd3eb645fd26fb949e43c02a25a2e632e2ca21a35e2e132c8105dc8599"},
+    {file = "multidict-6.7.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:a6ef16328011d3f468e7ebc326f24c1445f001ca1dec335b2f8e66bed3006394"},
+    {file = "multidict-6.7.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:5aa873cbc8e593d361ae65c68f85faadd755c3295ea2c12040ee146802f23b38"},
+    {file = "multidict-6.7.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:3d7b6ccce016e29df4b7ca819659f516f0bc7a4b3efa3bb2012ba06431b044f9"},
+    {file = "multidict-6.7.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:171b73bd4ee683d307599b66793ac80981b06f069b62eea1c9e29c9241aa66b0"},
+    {file = "multidict-6.7.0-cp314-cp314t-win32.whl", hash = "sha256:b2d7f80c4e1fd010b07cb26820aae86b7e73b681ee4889684fb8d2d4537aab13"},
+    {file = "multidict-6.7.0-cp314-cp314t-win_amd64.whl", hash = "sha256:09929cab6fcb68122776d575e03c6cc64ee0b8fca48d17e135474b042ce515cd"},
+    {file = "multidict-6.7.0-cp314-cp314t-win_arm64.whl", hash = "sha256:cc41db090ed742f32bd2d2c721861725e6109681eddf835d0a82bd3a5c382827"},
+    {file = "multidict-6.7.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:363eb68a0a59bd2303216d2346e6c441ba10d36d1f9969fcb6f1ba700de7bb5c"},
+    {file = "multidict-6.7.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d874eb056410ca05fed180b6642e680373688efafc7f077b2a2f61811e873a40"},
+    {file = "multidict-6.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8b55d5497b51afdfde55925e04a022f1de14d4f4f25cdfd4f5d9b0aa96166851"},
+    {file = "multidict-6.7.0-cp39-cp39-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f8e5c0031b90ca9ce555e2e8fd5c3b02a25f14989cbc310701823832c99eb687"},
+    {file = "multidict-6.7.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9cf41880c991716f3c7cec48e2f19ae4045fc9db5fc9cff27347ada24d710bb5"},
+    {file = "multidict-6.7.0-cp39-cp39-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:8cfc12a8630a29d601f48d47787bd7eb730e475e83edb5d6c5084317463373eb"},
+    {file = "multidict-6.7.0-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3996b50c3237c4aec17459217c1e7bbdead9a22a0fcd3c365564fbd16439dde6"},
+    {file = "multidict-6.7.0-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7f5170993a0dd3ab871c74f45c0a21a4e2c37a2f2b01b5f722a2ad9c6650469e"},
+    {file = "multidict-6.7.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ec81878ddf0e98817def1e77d4f50dae5ef5b0e4fe796fae3bd674304172416e"},
+    {file = "multidict-6.7.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:9281bf5b34f59afbc6b1e477a372e9526b66ca446f4bf62592839c195a718b32"},
+    {file = "multidict-6.7.0-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:68af405971779d8b37198726f2b6fe3955db846fee42db7a4286fc542203934c"},
+    {file = "multidict-6.7.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:3ba3ef510467abb0667421a286dc906e30eb08569365f5cdb131d7aff7c2dd84"},
+    {file = "multidict-6.7.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:b61189b29081a20c7e4e0b49b44d5d44bb0dc92be3c6d06a11cc043f81bf9329"},
+    {file = "multidict-6.7.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:fb287618b9c7aa3bf8d825f02d9201b2f13078a5ed3b293c8f4d953917d84d5e"},
+    {file = "multidict-6.7.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:521f33e377ff64b96c4c556b81c55d0cfffb96a11c194fd0c3f1e56f3d8dd5a4"},
+    {file = "multidict-6.7.0-cp39-cp39-win32.whl", hash = "sha256:ce8fdc2dca699f8dbf055a61d73eaa10482569ad20ee3c36ef9641f69afa8c91"},
+    {file = "multidict-6.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:7e73299c99939f089dd9b2120a04a516b95cdf8c1cd2b18c53ebf0de80b1f18f"},
+    {file = "multidict-6.7.0-cp39-cp39-win_arm64.whl", hash = "sha256:6bdce131e14b04fd34a809b6380dbfd826065c3e2fe8a50dbae659fa0c390546"},
+    {file = "multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3"},
+    {file = "multidict-6.7.0.tar.gz", hash = "sha256:c6e99d9a65ca282e578dfea819cfa9c0a62b2499d8677392e09feaf305e9e6f5"},
+]
+
+[[package]]
+name = "multiprocess"
+version = "0.70.16"
+description = "better multiprocessing and multithreading in Python"
+optional = false
+python-versions = ">=3.8"
+groups = ["main"]
+files = [
+    {file = "multiprocess-0.70.16-pp310-pypy310_pp73-macosx_10_13_x86_64.whl", hash = "sha256:476887be10e2f59ff183c006af746cb6f1fd0eadcfd4ef49e605cbe2659920ee"},
+    {file = "multiprocess-0.70.16-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d951bed82c8f73929ac82c61f01a7b5ce8f3e5ef40f5b52553b4f547ce2b08ec"},
+    {file = "multiprocess-0.70.16-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:37b55f71c07e2d741374998c043b9520b626a8dddc8b3129222ca4f1a06ef67a"},
+    {file = "multiprocess-0.70.16-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:ba8c31889abf4511c7308a8c52bb4a30b9d590e7f58523302ba00237702ca054"},
+    {file = "multiprocess-0.70.16-pp39-pypy39_pp73-macosx_10_13_x86_64.whl", hash = "sha256:0dfd078c306e08d46d7a8d06fb120313d87aa43af60d66da43ffff40b44d2f41"},
+    {file = "multiprocess-0.70.16-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e7b9d0f307cd9bd50851afaac0dba2cb6c44449efff697df7c7645f7d3f2be3a"},
+    {file = "multiprocess-0.70.16-py310-none-any.whl", hash = "sha256:c4a9944c67bd49f823687463660a2d6daae94c289adff97e0f9d696ba6371d02"},
+    {file = "multiprocess-0.70.16-py311-none-any.whl", hash = "sha256:af4cabb0dac72abfb1e794fa7855c325fd2b55a10a44628a3c1ad3311c04127a"},
+    {file = "multiprocess-0.70.16-py312-none-any.whl", hash = "sha256:fc0544c531920dde3b00c29863377f87e1632601092ea2daca74e4beb40faa2e"},
+    {file = "multiprocess-0.70.16-py38-none-any.whl", hash = "sha256:a71d82033454891091a226dfc319d0cfa8019a4e888ef9ca910372a446de4435"},
+    {file = "multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3"},
+    {file = "multiprocess-0.70.16.tar.gz", hash = "sha256:161af703d4652a0e1410be6abccecde4a7ddffd19341be0a7011b94aeb171ac1"},
+]
+
+[package.dependencies]
+dill = ">=0.3.8"
+
 [[package]]
 name = "mypy-extensions"
 version = "1.1.0"
@@ -3471,6 +4060,105 @@ files = [
 dev = ["pytest", "tox"]
 lint = ["black"]
 
+[[package]]
+name = "pandas"
+version = "2.3.3"
+description = "Powerful data structures for data analysis, time series, and statistics"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "pandas-2.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:376c6446ae31770764215a6c937f72d917f214b43560603cd60da6408f183b6c"},
+    {file = "pandas-2.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e19d192383eab2f4ceb30b412b22ea30690c9e618f78870357ae1d682912015a"},
+    {file = "pandas-2.3.3-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5caf26f64126b6c7aec964f74266f435afef1c1b13da3b0636c7518a1fa3e2b1"},
+    {file = "pandas-2.3.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dd7478f1463441ae4ca7308a70e90b33470fa593429f9d4c578dd00d1fa78838"},
+    {file = "pandas-2.3.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4793891684806ae50d1288c9bae9330293ab4e083ccd1c5e383c34549c6e4250"},
+    {file = "pandas-2.3.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:28083c648d9a99a5dd035ec125d42439c6c1c525098c58af0fc38dd1a7a1b3d4"},
+    {file = "pandas-2.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:503cf027cf9940d2ceaa1a93cfb5f8c8c7e6e90720a2850378f0b3f3b1e06826"},
+    {file = "pandas-2.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:602b8615ebcc4a0c1751e71840428ddebeb142ec02c786e8ad6b1ce3c8dec523"},
+    {file = "pandas-2.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8fe25fc7b623b0ef6b5009149627e34d2a4657e880948ec3c840e9402e5c1b45"},
+    {file = "pandas-2.3.3-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b468d3dad6ff947df92dcb32ede5b7bd41a9b3cceef0a30ed925f6d01fb8fa66"},
+    {file = "pandas-2.3.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b98560e98cb334799c0b07ca7967ac361a47326e9b4e5a7dfb5ab2b1c9d35a1b"},
+    {file = "pandas-2.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1d37b5848ba49824e5c30bedb9c830ab9b7751fd049bc7914533e01c65f79791"},
+    {file = "pandas-2.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db4301b2d1f926ae677a751eb2bd0e8c5f5319c9cb3f88b0becbbb0b07b34151"},
+    {file = "pandas-2.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:f086f6fe114e19d92014a1966f43a3e62285109afe874f067f5abbdcbb10e59c"},
+    {file = "pandas-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d21f6d74eb1725c2efaa71a2bfc661a0689579b58e9c0ca58a739ff0b002b53"},
+    {file = "pandas-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3fd2f887589c7aa868e02632612ba39acb0b8948faf5cc58f0850e165bd46f35"},
+    {file = "pandas-2.3.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecaf1e12bdc03c86ad4a7ea848d66c685cb6851d807a26aa245ca3d2017a1908"},
+    {file = "pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b3d11d2fda7eb164ef27ffc14b4fcab16a80e1ce67e9f57e19ec0afaf715ba89"},
+    {file = "pandas-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a68e15f780eddf2b07d242e17a04aa187a7ee12b40b930bfdd78070556550e98"},
+    {file = "pandas-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:371a4ab48e950033bcf52b6527eccb564f52dc826c02afd9a1bc0ab731bba084"},
+    {file = "pandas-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:a16dcec078a01eeef8ee61bf64074b4e524a2a3f4b3be9326420cabe59c4778b"},
+    {file = "pandas-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:56851a737e3470de7fa88e6131f41281ed440d29a9268dcbf0002da5ac366713"},
+    {file = "pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdcd9d1167f4885211e401b3036c0c8d9e274eee67ea8d0758a256d60704cfe8"},
+    {file = "pandas-2.3.3-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e32e7cc9af0f1cc15548288a51a3b681cc2a219faa838e995f7dc53dbab1062d"},
+    {file = "pandas-2.3.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:318d77e0e42a628c04dc56bcef4b40de67918f7041c2b061af1da41dcff670ac"},
+    {file = "pandas-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4e0a175408804d566144e170d0476b15d78458795bb18f1304fb94160cabf40c"},
+    {file = "pandas-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:93c2d9ab0fc11822b5eece72ec9587e172f63cff87c00b062f6e37448ced4493"},
+    {file = "pandas-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f8bfc0e12dc78f777f323f55c58649591b2cd0c43534e8355c51d3fede5f4dee"},
+    {file = "pandas-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:75ea25f9529fdec2d2e93a42c523962261e567d250b0013b16210e1d40d7c2e5"},
+    {file = "pandas-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74ecdf1d301e812db96a465a525952f4dde225fdb6d8e5a521d47e1f42041e21"},
+    {file = "pandas-2.3.3-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6435cb949cb34ec11cc9860246ccb2fdc9ecd742c12d3304989017d53f039a78"},
+    {file = "pandas-2.3.3-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:900f47d8f20860de523a1ac881c4c36d65efcb2eb850e6948140fa781736e110"},
+    {file = "pandas-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a45c765238e2ed7d7c608fc5bc4a6f88b642f2f01e70c0c23d2224dd21829d86"},
+    {file = "pandas-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c4fc4c21971a1a9f4bdb4c73978c7f7256caa3e62b323f70d6cb80db583350bc"},
+    {file = "pandas-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ee15f284898e7b246df8087fc82b87b01686f98ee67d85a17b7ab44143a3a9a0"},
+    {file = "pandas-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1611aedd912e1ff81ff41c745822980c49ce4a7907537be8692c8dbc31924593"},
+    {file = "pandas-2.3.3-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d2cefc361461662ac48810cb14365a365ce864afe85ef1f447ff5a1e99ea81c"},
+    {file = "pandas-2.3.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee67acbbf05014ea6c763beb097e03cd629961c8a632075eeb34247120abcb4b"},
+    {file = "pandas-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c46467899aaa4da076d5abc11084634e2d197e9460643dd455ac3db5856b24d6"},
+    {file = "pandas-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6253c72c6a1d990a410bc7de641d34053364ef8bcd3126f7e7450125887dffe3"},
+    {file = "pandas-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:1b07204a219b3b7350abaae088f451860223a52cfb8a6c53358e7948735158e5"},
+    {file = "pandas-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2462b1a365b6109d275250baaae7b760fd25c726aaca0054649286bcfbb3e8ec"},
+    {file = "pandas-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0242fe9a49aa8b4d78a4fa03acb397a58833ef6199e9aa40a95f027bb3a1b6e7"},
+    {file = "pandas-2.3.3-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a21d830e78df0a515db2b3d2f5570610f5e6bd2e27749770e8bb7b524b89b450"},
+    {file = "pandas-2.3.3-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e3ebdb170b5ef78f19bfb71b0dc5dc58775032361fa188e814959b74d726dd5"},
+    {file = "pandas-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d051c0e065b94b7a3cea50eb1ec32e912cd96dba41647eb24104b6c6c14c5788"},
+    {file = "pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87"},
+    {file = "pandas-2.3.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c503ba5216814e295f40711470446bc3fd00f0faea8a086cbc688808e26f92a2"},
+    {file = "pandas-2.3.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a637c5cdfa04b6d6e2ecedcb81fc52ffb0fd78ce2ebccc9ea964df9f658de8c8"},
+    {file = "pandas-2.3.3-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:854d00d556406bffe66a4c0802f334c9ad5a96b4f1f868adf036a21b11ef13ff"},
+    {file = "pandas-2.3.3-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bf1f8a81d04ca90e32a0aceb819d34dbd378a98bf923b6398b9a3ec0bf44de29"},
+    {file = "pandas-2.3.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:23ebd657a4d38268c7dfbdf089fbc31ea709d82e4923c5ffd4fbd5747133ce73"},
+    {file = "pandas-2.3.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5554c929ccc317d41a5e3d1234f3be588248e61f08a74dd17c9eabb535777dc9"},
+    {file = "pandas-2.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:d3e28b3e83862ccf4d85ff19cf8c20b2ae7e503881711ff2d534dc8f761131aa"},
+    {file = "pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b"},
+]
+
+[package.dependencies]
+numpy = [
+    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
+    {version = ">=1.23.2", markers = "python_version == \"3.11\""},
+]
+python-dateutil = ">=2.8.2"
+pytz = ">=2020.1"
+tzdata = ">=2022.7"
+
+[package.extras]
+all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"]
+aws = ["s3fs (>=2022.11.0)"]
+clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"]
+compression = ["zstandard (>=0.19.0)"]
+computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"]
+consortium-standard = ["dataframe-api-compat (>=0.1.7)"]
+excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"]
+feather = ["pyarrow (>=10.0.1)"]
+fss = ["fsspec (>=2022.11.0)"]
+gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"]
+hdf5 = ["tables (>=3.8.0)"]
+html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"]
+mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"]
+output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"]
+parquet = ["pyarrow (>=10.0.1)"]
+performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"]
+plot = ["matplotlib (>=3.6.3)"]
+postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"]
+pyarrow = ["pyarrow (>=10.0.1)"]
+spss = ["pyreadstat (>=1.2.0)"]
+sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"]
+test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"]
+xml = ["lxml (>=4.9.2)"]
+
 [[package]]
 name = "pandocfilters"
 version = "1.5.1"
@@ -3746,6 +4434,138 @@ files = [
 [package.dependencies]
 wcwidth = "*"
 
+[[package]]
+name = "propcache"
+version = "0.4.1"
+description = "Accelerated property cache"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "propcache-0.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7c2d1fa3201efaf55d730400d945b5b3ab6e672e100ba0f9a409d950ab25d7db"},
+    {file = "propcache-0.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1eb2994229cc8ce7fe9b3db88f5465f5fd8651672840b2e426b88cdb1a30aac8"},
+    {file = "propcache-0.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:66c1f011f45a3b33d7bcb22daed4b29c0c9e2224758b6be00686731e1b46f925"},
+    {file = "propcache-0.4.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9a52009f2adffe195d0b605c25ec929d26b36ef986ba85244891dee3b294df21"},
+    {file = "propcache-0.4.1-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5d4e2366a9c7b837555cf02fb9be2e3167d333aff716332ef1b7c3a142ec40c5"},
+    {file = "propcache-0.4.1-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:9d2b6caef873b4f09e26ea7e33d65f42b944837563a47a94719cc3544319a0db"},
+    {file = "propcache-0.4.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2b16ec437a8c8a965ecf95739448dd938b5c7f56e67ea009f4300d8df05f32b7"},
+    {file = "propcache-0.4.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:296f4c8ed03ca7476813fe666c9ea97869a8d7aec972618671b33a38a5182ef4"},
+    {file = "propcache-0.4.1-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:1f0978529a418ebd1f49dad413a2b68af33f85d5c5ca5c6ca2a3bed375a7ac60"},
+    {file = "propcache-0.4.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fd138803047fb4c062b1c1dd95462f5209456bfab55c734458f15d11da288f8f"},
+    {file = "propcache-0.4.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:8c9b3cbe4584636d72ff556d9036e0c9317fa27b3ac1f0f558e7e84d1c9c5900"},
+    {file = "propcache-0.4.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f93243fdc5657247533273ac4f86ae106cc6445a0efacb9a1bfe982fcfefd90c"},
+    {file = "propcache-0.4.1-cp310-cp310-win32.whl", hash = "sha256:a0ee98db9c5f80785b266eb805016e36058ac72c51a064040f2bc43b61101cdb"},
+    {file = "propcache-0.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:1cdb7988c4e5ac7f6d175a28a9aa0c94cb6f2ebe52756a3c0cda98d2809a9e37"},
+    {file = "propcache-0.4.1-cp310-cp310-win_arm64.whl", hash = "sha256:d82ad62b19645419fe79dd63b3f9253e15b30e955c0170e5cebc350c1844e581"},
+    {file = "propcache-0.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:60a8fda9644b7dfd5dece8c61d8a85e271cb958075bfc4e01083c148b61a7caf"},
+    {file = "propcache-0.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c30b53e7e6bda1d547cabb47c825f3843a0a1a42b0496087bb58d8fedf9f41b5"},
+    {file = "propcache-0.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6918ecbd897443087a3b7cd978d56546a812517dcaaca51b49526720571fa93e"},
+    {file = "propcache-0.4.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3d902a36df4e5989763425a8ab9e98cd8ad5c52c823b34ee7ef307fd50582566"},
+    {file = "propcache-0.4.1-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a9695397f85973bb40427dedddf70d8dc4a44b22f1650dd4af9eedf443d45165"},
+    {file = "propcache-0.4.1-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2bb07ffd7eaad486576430c89f9b215f9e4be68c4866a96e97db9e97fead85dc"},
+    {file = "propcache-0.4.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fd6f30fdcf9ae2a70abd34da54f18da086160e4d7d9251f81f3da0ff84fc5a48"},
+    {file = "propcache-0.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:fc38cba02d1acba4e2869eef1a57a43dfbd3d49a59bf90dda7444ec2be6a5570"},
+    {file = "propcache-0.4.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:67fad6162281e80e882fb3ec355398cf72864a54069d060321f6cd0ade95fe85"},
+    {file = "propcache-0.4.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f10207adf04d08bec185bae14d9606a1444715bc99180f9331c9c02093e1959e"},
+    {file = "propcache-0.4.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:e9b0d8d0845bbc4cfcdcbcdbf5086886bc8157aa963c31c777ceff7846c77757"},
+    {file = "propcache-0.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:981333cb2f4c1896a12f4ab92a9cc8f09ea664e9b7dbdc4eff74627af3a11c0f"},
+    {file = "propcache-0.4.1-cp311-cp311-win32.whl", hash = "sha256:f1d2f90aeec838a52f1c1a32fe9a619fefd5e411721a9117fbf82aea638fe8a1"},
+    {file = "propcache-0.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:364426a62660f3f699949ac8c621aad6977be7126c5807ce48c0aeb8e7333ea6"},
+    {file = "propcache-0.4.1-cp311-cp311-win_arm64.whl", hash = "sha256:e53f3a38d3510c11953f3e6a33f205c6d1b001129f972805ca9b42fc308bc239"},
+    {file = "propcache-0.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e153e9cd40cc8945138822807139367f256f89c6810c2634a4f6902b52d3b4e2"},
+    {file = "propcache-0.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:cd547953428f7abb73c5ad82cbb32109566204260d98e41e5dfdc682eb7f8403"},
+    {file = "propcache-0.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f048da1b4f243fc44f205dfd320933a951b8d89e0afd4c7cacc762a8b9165207"},
+    {file = "propcache-0.4.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ec17c65562a827bba85e3872ead335f95405ea1674860d96483a02f5c698fa72"},
+    {file = "propcache-0.4.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:405aac25c6394ef275dee4c709be43745d36674b223ba4eb7144bf4d691b7367"},
+    {file = "propcache-0.4.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0013cb6f8dde4b2a2f66903b8ba740bdfe378c943c4377a200551ceb27f379e4"},
+    {file = "propcache-0.4.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15932ab57837c3368b024473a525e25d316d8353016e7cc0e5ba9eb343fbb1cf"},
+    {file = "propcache-0.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:031dce78b9dc099f4c29785d9cf5577a3faf9ebf74ecbd3c856a7b92768c3df3"},
+    {file = "propcache-0.4.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:ab08df6c9a035bee56e31af99be621526bd237bea9f32def431c656b29e41778"},
+    {file = "propcache-0.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4d7af63f9f93fe593afbf104c21b3b15868efb2c21d07d8732c0c4287e66b6a6"},
+    {file = "propcache-0.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:cfc27c945f422e8b5071b6e93169679e4eb5bf73bbcbf1ba3ae3a83d2f78ebd9"},
+    {file = "propcache-0.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:35c3277624a080cc6ec6f847cbbbb5b49affa3598c4535a0a4682a697aaa5c75"},
+    {file = "propcache-0.4.1-cp312-cp312-win32.whl", hash = "sha256:671538c2262dadb5ba6395e26c1731e1d52534bfe9ae56d0b5573ce539266aa8"},
+    {file = "propcache-0.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:cb2d222e72399fcf5890d1d5cc1060857b9b236adff2792ff48ca2dfd46c81db"},
+    {file = "propcache-0.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:204483131fb222bdaaeeea9f9e6c6ed0cac32731f75dfc1d4a567fc1926477c1"},
+    {file = "propcache-0.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:43eedf29202c08550aac1d14e0ee619b0430aaef78f85864c1a892294fbc28cf"},
+    {file = "propcache-0.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d62cdfcfd89ccb8de04e0eda998535c406bf5e060ffd56be6c586cbcc05b3311"},
+    {file = "propcache-0.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cae65ad55793da34db5f54e4029b89d3b9b9490d8abe1b4c7ab5d4b8ec7ebf74"},
+    {file = "propcache-0.4.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:333ddb9031d2704a301ee3e506dc46b1fe5f294ec198ed6435ad5b6a085facfe"},
+    {file = "propcache-0.4.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:fd0858c20f078a32cf55f7e81473d96dcf3b93fd2ccdb3d40fdf54b8573df3af"},
+    {file = "propcache-0.4.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:678ae89ebc632c5c204c794f8dab2837c5f159aeb59e6ed0539500400577298c"},
+    {file = "propcache-0.4.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d472aeb4fbf9865e0c6d622d7f4d54a4e101a89715d8904282bb5f9a2f476c3f"},
+    {file = "propcache-0.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4d3df5fa7e36b3225954fba85589da77a0fe6a53e3976de39caf04a0db4c36f1"},
+    {file = "propcache-0.4.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:ee17f18d2498f2673e432faaa71698032b0127ebf23ae5974eeaf806c279df24"},
+    {file = "propcache-0.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:580e97762b950f993ae618e167e7be9256b8353c2dcd8b99ec100eb50f5286aa"},
+    {file = "propcache-0.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:501d20b891688eb8e7aa903021f0b72d5a55db40ffaab27edefd1027caaafa61"},
+    {file = "propcache-0.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a0bd56e5b100aef69bd8562b74b46254e7c8812918d3baa700c8a8009b0af66"},
+    {file = "propcache-0.4.1-cp313-cp313-win32.whl", hash = "sha256:bcc9aaa5d80322bc2fb24bb7accb4a30f81e90ab8d6ba187aec0744bc302ad81"},
+    {file = "propcache-0.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:381914df18634f5494334d201e98245c0596067504b9372d8cf93f4bb23e025e"},
+    {file = "propcache-0.4.1-cp313-cp313-win_arm64.whl", hash = "sha256:8873eb4460fd55333ea49b7d189749ecf6e55bf85080f11b1c4530ed3034cba1"},
+    {file = "propcache-0.4.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:92d1935ee1f8d7442da9c0c4fa7ac20d07e94064184811b685f5c4fada64553b"},
+    {file = "propcache-0.4.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:473c61b39e1460d386479b9b2f337da492042447c9b685f28be4f74d3529e566"},
+    {file = "propcache-0.4.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c0ef0aaafc66fbd87842a3fe3902fd889825646bc21149eafe47be6072725835"},
+    {file = "propcache-0.4.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f95393b4d66bfae908c3ca8d169d5f79cd65636ae15b5e7a4f6e67af675adb0e"},
+    {file = "propcache-0.4.1-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c07fda85708bc48578467e85099645167a955ba093be0a2dcba962195676e859"},
+    {file = "propcache-0.4.1-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:af223b406d6d000830c6f65f1e6431783fc3f713ba3e6cc8c024d5ee96170a4b"},
+    {file = "propcache-0.4.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a78372c932c90ee474559c5ddfffd718238e8673c340dc21fe45c5b8b54559a0"},
+    {file = "propcache-0.4.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:564d9f0d4d9509e1a870c920a89b2fec951b44bf5ba7d537a9e7c1ccec2c18af"},
+    {file = "propcache-0.4.1-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:17612831fda0138059cc5546f4d12a2aacfb9e47068c06af35c400ba58ba7393"},
+    {file = "propcache-0.4.1-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:41a89040cb10bd345b3c1a873b2bf36413d48da1def52f268a055f7398514874"},
+    {file = "propcache-0.4.1-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e35b88984e7fa64aacecea39236cee32dd9bd8c55f57ba8a75cf2399553f9bd7"},
+    {file = "propcache-0.4.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6f8b465489f927b0df505cbe26ffbeed4d6d8a2bbc61ce90eb074ff129ef0ab1"},
+    {file = "propcache-0.4.1-cp313-cp313t-win32.whl", hash = "sha256:2ad890caa1d928c7c2965b48f3a3815c853180831d0e5503d35cf00c472f4717"},
+    {file = "propcache-0.4.1-cp313-cp313t-win_amd64.whl", hash = "sha256:f7ee0e597f495cf415bcbd3da3caa3bd7e816b74d0d52b8145954c5e6fd3ff37"},
+    {file = "propcache-0.4.1-cp313-cp313t-win_arm64.whl", hash = "sha256:929d7cbe1f01bb7baffb33dc14eb5691c95831450a26354cd210a8155170c93a"},
+    {file = "propcache-0.4.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3f7124c9d820ba5548d431afb4632301acf965db49e666aa21c305cbe8c6de12"},
+    {file = "propcache-0.4.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:c0d4b719b7da33599dfe3b22d3db1ef789210a0597bc650b7cee9c77c2be8c5c"},
+    {file = "propcache-0.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9f302f4783709a78240ebc311b793f123328716a60911d667e0c036bc5dcbded"},
+    {file = "propcache-0.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c80ee5802e3fb9ea37938e7eecc307fb984837091d5fd262bb37238b1ae97641"},
+    {file = "propcache-0.4.1-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ed5a841e8bb29a55fb8159ed526b26adc5bdd7e8bd7bf793ce647cb08656cdf4"},
+    {file = "propcache-0.4.1-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:55c72fd6ea2da4c318e74ffdf93c4fe4e926051133657459131a95c846d16d44"},
+    {file = "propcache-0.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8326e144341460402713f91df60ade3c999d601e7eb5ff8f6f7862d54de0610d"},
+    {file = "propcache-0.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:060b16ae65bc098da7f6d25bf359f1f31f688384858204fe5d652979e0015e5b"},
+    {file = "propcache-0.4.1-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:89eb3fa9524f7bec9de6e83cf3faed9d79bffa560672c118a96a171a6f55831e"},
+    {file = "propcache-0.4.1-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:dee69d7015dc235f526fe80a9c90d65eb0039103fe565776250881731f06349f"},
+    {file = "propcache-0.4.1-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:5558992a00dfd54ccbc64a32726a3357ec93825a418a401f5cc67df0ac5d9e49"},
+    {file = "propcache-0.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:c9b822a577f560fbd9554812526831712c1436d2c046cedee4c3796d3543b144"},
+    {file = "propcache-0.4.1-cp314-cp314-win32.whl", hash = "sha256:ab4c29b49d560fe48b696cdcb127dd36e0bc2472548f3bf56cc5cb3da2b2984f"},
+    {file = "propcache-0.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:5a103c3eb905fcea0ab98be99c3a9a5ab2de60228aa5aceedc614c0281cf6153"},
+    {file = "propcache-0.4.1-cp314-cp314-win_arm64.whl", hash = "sha256:74c1fb26515153e482e00177a1ad654721bf9207da8a494a0c05e797ad27b992"},
+    {file = "propcache-0.4.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:824e908bce90fb2743bd6b59db36eb4f45cd350a39637c9f73b1c1ea66f5b75f"},
+    {file = "propcache-0.4.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2b5e7db5328427c57c8e8831abda175421b709672f6cfc3d630c3b7e2146393"},
+    {file = "propcache-0.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6f6ff873ed40292cd4969ef5310179afd5db59fdf055897e282485043fc80ad0"},
+    {file = "propcache-0.4.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:49a2dc67c154db2c1463013594c458881a069fcf98940e61a0569016a583020a"},
+    {file = "propcache-0.4.1-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:005f08e6a0529984491e37d8dbc3dd86f84bd78a8ceb5fa9a021f4c48d4984be"},
+    {file = "propcache-0.4.1-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5c3310452e0d31390da9035c348633b43d7e7feb2e37be252be6da45abd1abcc"},
+    {file = "propcache-0.4.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c3c70630930447f9ef1caac7728c8ad1c56bc5015338b20fed0d08ea2480b3a"},
+    {file = "propcache-0.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8e57061305815dfc910a3634dcf584f08168a8836e6999983569f51a8544cd89"},
+    {file = "propcache-0.4.1-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:521a463429ef54143092c11a77e04056dd00636f72e8c45b70aaa3140d639726"},
+    {file = "propcache-0.4.1-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:120c964da3fdc75e3731aa392527136d4ad35868cc556fd09bb6d09172d9a367"},
+    {file = "propcache-0.4.1-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:d8f353eb14ee3441ee844ade4277d560cdd68288838673273b978e3d6d2c8f36"},
+    {file = "propcache-0.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ab2943be7c652f09638800905ee1bab2c544e537edb57d527997a24c13dc1455"},
+    {file = "propcache-0.4.1-cp314-cp314t-win32.whl", hash = "sha256:05674a162469f31358c30bcaa8883cb7829fa3110bf9c0991fe27d7896c42d85"},
+    {file = "propcache-0.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:990f6b3e2a27d683cb7602ed6c86f15ee6b43b1194736f9baaeb93d0016633b1"},
+    {file = "propcache-0.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:ecef2343af4cc68e05131e45024ba34f6095821988a9d0a02aa7c73fcc448aa9"},
+    {file = "propcache-0.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3d233076ccf9e450c8b3bc6720af226b898ef5d051a2d145f7d765e6e9f9bcff"},
+    {file = "propcache-0.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:357f5bb5c377a82e105e44bd3d52ba22b616f7b9773714bff93573988ef0a5fb"},
+    {file = "propcache-0.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cbc3b6dfc728105b2a57c06791eb07a94229202ea75c59db644d7d496b698cac"},
+    {file = "propcache-0.4.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:182b51b421f0501952d938dc0b0eb45246a5b5153c50d42b495ad5fb7517c888"},
+    {file = "propcache-0.4.1-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4b536b39c5199b96fc6245eb5fb796c497381d3942f169e44e8e392b29c9ebcc"},
+    {file = "propcache-0.4.1-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:db65d2af507bbfbdcedb254a11149f894169d90488dd3e7190f7cdcb2d6cd57a"},
+    {file = "propcache-0.4.1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fd2dbc472da1f772a4dae4fa24be938a6c544671a912e30529984dd80400cd88"},
+    {file = "propcache-0.4.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:daede9cd44e0f8bdd9e6cc9a607fc81feb80fae7a5fc6cecaff0e0bb32e42d00"},
+    {file = "propcache-0.4.1-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:71b749281b816793678ae7f3d0d84bd36e694953822eaad408d682efc5ca18e0"},
+    {file = "propcache-0.4.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:0002004213ee1f36cfb3f9a42b5066100c44276b9b72b4e1504cddd3d692e86e"},
+    {file = "propcache-0.4.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:fe49d0a85038f36ba9e3ffafa1103e61170b28e95b16622e11be0a0ea07c6781"},
+    {file = "propcache-0.4.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:99d43339c83aaf4d32bda60928231848eee470c6bda8d02599cc4cebe872d183"},
+    {file = "propcache-0.4.1-cp39-cp39-win32.whl", hash = "sha256:a129e76735bc792794d5177069691c3217898b9f5cee2b2661471e52ffe13f19"},
+    {file = "propcache-0.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:948dab269721ae9a87fd16c514a0a2c2a1bdb23a9a61b969b0f9d9ee2968546f"},
+    {file = "propcache-0.4.1-cp39-cp39-win_arm64.whl", hash = "sha256:5fd37c406dd6dc85aa743e214cef35dc54bbdd1419baac4f6ae5e5b1a2976938"},
+    {file = "propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237"},
+    {file = "propcache-0.4.1.tar.gz", hash = "sha256:f48107a8c637e80362555f37ecf49abe20370e557cc4ab374f04ec4423c97c3d"},
+]
+
 [[package]]
 name = "protobuf"
 version = "3.20.3"
@@ -3835,6 +4655,66 @@ files = [
 [package.extras]
 tests = ["pytest"]
 
+[[package]]
+name = "pyarrow"
+version = "22.0.0"
+description = "Python library for Apache Arrow"
+optional = false
+python-versions = ">=3.10"
+groups = ["main"]
+files = [
+    {file = "pyarrow-22.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:77718810bd3066158db1e95a63c160ad7ce08c6b0710bc656055033e39cdad88"},
+    {file = "pyarrow-22.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:44d2d26cda26d18f7af7db71453b7b783788322d756e81730acb98f24eb90ace"},
+    {file = "pyarrow-22.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:b9d71701ce97c95480fecb0039ec5bb889e75f110da72005743451339262f4ce"},
+    {file = "pyarrow-22.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:710624ab925dc2b05a6229d47f6f0dac1c1155e6ed559be7109f684eba048a48"},
+    {file = "pyarrow-22.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f963ba8c3b0199f9d6b794c90ec77545e05eadc83973897a4523c9e8d84e9340"},
+    {file = "pyarrow-22.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:bd0d42297ace400d8febe55f13fdf46e86754842b860c978dfec16f081e5c653"},
+    {file = "pyarrow-22.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:00626d9dc0f5ef3a75fe63fd68b9c7c8302d2b5bbc7f74ecaedba83447a24f84"},
+    {file = "pyarrow-22.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:3e294c5eadfb93d78b0763e859a0c16d4051fc1c5231ae8956d61cb0b5666f5a"},
+    {file = "pyarrow-22.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:69763ab2445f632d90b504a815a2a033f74332997052b721002298ed6de40f2e"},
+    {file = "pyarrow-22.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:b41f37cabfe2463232684de44bad753d6be08a7a072f6a83447eeaf0e4d2a215"},
+    {file = "pyarrow-22.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:35ad0f0378c9359b3f297299c3309778bb03b8612f987399a0333a560b43862d"},
+    {file = "pyarrow-22.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8382ad21458075c2e66a82a29d650f963ce51c7708c7c0ff313a8c206c4fd5e8"},
+    {file = "pyarrow-22.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1a812a5b727bc09c3d7ea072c4eebf657c2f7066155506ba31ebf4792f88f016"},
+    {file = "pyarrow-22.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:ec5d40dd494882704fb876c16fa7261a69791e784ae34e6b5992e977bd2e238c"},
+    {file = "pyarrow-22.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:bea79263d55c24a32b0d79c00a1c58bb2ee5f0757ed95656b01c0fb310c5af3d"},
+    {file = "pyarrow-22.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:12fe549c9b10ac98c91cf791d2945e878875d95508e1a5d14091a7aaa66d9cf8"},
+    {file = "pyarrow-22.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:334f900ff08ce0423407af97e6c26ad5d4e3b0763645559ece6fbf3747d6a8f5"},
+    {file = "pyarrow-22.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c6c791b09c57ed76a18b03f2631753a4960eefbbca80f846da8baefc6491fcfe"},
+    {file = "pyarrow-22.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c3200cb41cdbc65156e5f8c908d739b0dfed57e890329413da2748d1a2cd1a4e"},
+    {file = "pyarrow-22.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ac93252226cf288753d8b46280f4edf3433bf9508b6977f8dd8526b521a1bbb9"},
+    {file = "pyarrow-22.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:44729980b6c50a5f2bfcc2668d36c569ce17f8b17bccaf470c4313dcbbf13c9d"},
+    {file = "pyarrow-22.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e6e95176209257803a8b3d0394f21604e796dadb643d2f7ca21b66c9c0b30c9a"},
+    {file = "pyarrow-22.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:001ea83a58024818826a9e3f89bf9310a114f7e26dfe404a4c32686f97bd7901"},
+    {file = "pyarrow-22.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:ce20fe000754f477c8a9125543f1936ea5b8867c5406757c224d745ed033e691"},
+    {file = "pyarrow-22.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e0a15757fccb38c410947df156f9749ae4a3c89b2393741a50521f39a8cf202a"},
+    {file = "pyarrow-22.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cedb9dd9358e4ea1d9bce3665ce0797f6adf97ff142c8e25b46ba9cdd508e9b6"},
+    {file = "pyarrow-22.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:252be4a05f9d9185bb8c18e83764ebcfea7185076c07a7a662253af3a8c07941"},
+    {file = "pyarrow-22.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:a4893d31e5ef780b6edcaf63122df0f8d321088bb0dee4c8c06eccb1ca28d145"},
+    {file = "pyarrow-22.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:f7fe3dbe871294ba70d789be16b6e7e52b418311e166e0e3cba9522f0f437fb1"},
+    {file = "pyarrow-22.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:ba95112d15fd4f1105fb2402c4eab9068f0554435e9b7085924bcfaac2cc306f"},
+    {file = "pyarrow-22.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:c064e28361c05d72eed8e744c9605cbd6d2bb7481a511c74071fd9b24bc65d7d"},
+    {file = "pyarrow-22.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:6f9762274496c244d951c819348afbcf212714902742225f649cf02823a6a10f"},
+    {file = "pyarrow-22.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a9d9ffdc2ab696f6b15b4d1f7cec6658e1d788124418cb30030afbae31c64746"},
+    {file = "pyarrow-22.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ec1a15968a9d80da01e1d30349b2b0d7cc91e96588ee324ce1b5228175043e95"},
+    {file = "pyarrow-22.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:bba208d9c7decf9961998edf5c65e3ea4355d5818dd6cd0f6809bec1afb951cc"},
+    {file = "pyarrow-22.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:9bddc2cade6561f6820d4cd73f99a0243532ad506bc510a75a5a65a522b2d74d"},
+    {file = "pyarrow-22.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:e70ff90c64419709d38c8932ea9fe1cc98415c4f87ea8da81719e43f02534bc9"},
+    {file = "pyarrow-22.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:92843c305330aa94a36e706c16209cd4df274693e777ca47112617db7d0ef3d7"},
+    {file = "pyarrow-22.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:6dda1ddac033d27421c20d7a7943eec60be44e0db4e079f33cc5af3b8280ccde"},
+    {file = "pyarrow-22.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:84378110dd9a6c06323b41b56e129c504d157d1a983ce8f5443761eb5256bafc"},
+    {file = "pyarrow-22.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:854794239111d2b88b40b6ef92aa478024d1e5074f364033e73e21e3f76b25e0"},
+    {file = "pyarrow-22.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:b883fe6fd85adad7932b3271c38ac289c65b7337c2c132e9569f9d3940620730"},
+    {file = "pyarrow-22.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:7a820d8ae11facf32585507c11f04e3f38343c1e784c9b5a8b1da5c930547fe2"},
+    {file = "pyarrow-22.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:c6ec3675d98915bf1ec8b3c7986422682f7232ea76cad276f4c8abd5b7319b70"},
+    {file = "pyarrow-22.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:3e739edd001b04f654b166204fc7a9de896cf6007eaff33409ee9e50ceaff754"},
+    {file = "pyarrow-22.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:7388ac685cab5b279a41dfe0a6ccd99e4dbf322edfb63e02fc0443bf24134e91"},
+    {file = "pyarrow-22.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f633074f36dbc33d5c05b5dc75371e5660f1dbf9c8b1d95669def05e5425989c"},
+    {file = "pyarrow-22.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4c19236ae2402a8663a2c8f21f1870a03cc57f0bef7e4b6eb3238cc82944de80"},
+    {file = "pyarrow-22.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:0c34fe18094686194f204a3b1787a27456897d8a2d62caf84b61e8dfbc0252ae"},
+    {file = "pyarrow-22.0.0.tar.gz", hash = "sha256:3d600dc583260d845c7d8a6db540339dd883081925da2bd1c5cb808f720b3cd9"},
+]
+
 [[package]]
 name = "pyasn1"
 version = "0.6.1"
@@ -4356,7 +5236,7 @@ version = "2025.2"
 description = "World timezone definitions, modern and historical"
 optional = false
 python-versions = "*"
-groups = ["docs"]
+groups = ["main", "docs"]
 files = [
     {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"},
     {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"},
@@ -5758,21 +6638,21 @@ files = [
 
 [[package]]
 name = "tqdm"
-version = "4.64.1"
+version = "4.66.6"
 description = "Fast, Extensible Progress Meter"
 optional = false
-python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7"
+python-versions = ">=3.7"
 groups = ["main"]
 files = [
-    {file = "tqdm-4.64.1-py2.py3-none-any.whl", hash = "sha256:6fee160d6ffcd1b1c68c65f14c829c22832bc401726335ce92c52d395944a6a1"},
-    {file = "tqdm-4.64.1.tar.gz", hash = "sha256:5f4f682a004951c1b450bc753c710e9280c5746ce6ffedee253ddbcbf54cf1e4"},
+    {file = "tqdm-4.66.6-py3-none-any.whl", hash = "sha256:223e8b5359c2efc4b30555531f09e9f2f3589bcd7fdd389271191031b49b7a63"},
+    {file = "tqdm-4.66.6.tar.gz", hash = "sha256:4bdd694238bef1485ce839d67967ab50af8f9272aab687c0d7702a01da0be090"},
 ]
 
 [package.dependencies]
 colorama = {version = "*", markers = "platform_system == \"Windows\""}
 
 [package.extras]
-dev = ["py-make (>=0.1.0)", "twine", "wheel"]
+dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"]
 notebook = ["ipywidgets (>=6)"]
 slack = ["slack-sdk"]
 telegram = ["requests"]
@@ -5986,6 +6866,18 @@ files = [
 [package.dependencies]
 typing-extensions = ">=4.12.0"
 
+[[package]]
+name = "tzdata"
+version = "2025.2"
+description = "Provider of IANA time zone data"
+optional = false
+python-versions = ">=2"
+groups = ["main"]
+files = [
+    {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"},
+    {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"},
+]
+
 [[package]]
 name = "unfoldnd"
 version = "0.2.3"
@@ -6208,6 +7100,301 @@ files = [
     {file = "widgetsnbextension-4.0.14.tar.gz", hash = "sha256:a3629b04e3edb893212df862038c7232f62973373869db5084aed739b437b5af"},
 ]
 
+[[package]]
+name = "xxhash"
+version = "3.6.0"
+description = "Python binding for xxHash"
+optional = false
+python-versions = ">=3.7"
+groups = ["main"]
+files = [
+    {file = "xxhash-3.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:87ff03d7e35c61435976554477a7f4cd1704c3596a89a8300d5ce7fc83874a71"},
+    {file = "xxhash-3.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f572dfd3d0e2eb1a57511831cf6341242f5a9f8298a45862d085f5b93394a27d"},
+    {file = "xxhash-3.6.0-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:89952ea539566b9fed2bbd94e589672794b4286f342254fad28b149f9615fef8"},
+    {file = "xxhash-3.6.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:48e6f2ffb07a50b52465a1032c3cf1f4a5683f944acaca8a134a2f23674c2058"},
+    {file = "xxhash-3.6.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b5b848ad6c16d308c3ac7ad4ba6bede80ed5df2ba8ed382f8932df63158dd4b2"},
+    {file = "xxhash-3.6.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a034590a727b44dd8ac5914236a7b8504144447a9682586c3327e935f33ec8cc"},
+    {file = "xxhash-3.6.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8a8f1972e75ebdd161d7896743122834fe87378160c20e97f8b09166213bf8cc"},
+    {file = "xxhash-3.6.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ee34327b187f002a596d7b167ebc59a1b729e963ce645964bbc050d2f1b73d07"},
+    {file = "xxhash-3.6.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:339f518c3c7a850dd033ab416ea25a692759dc7478a71131fe8869010d2b75e4"},
+    {file = "xxhash-3.6.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:bf48889c9630542d4709192578aebbd836177c9f7a4a2778a7d6340107c65f06"},
+    {file = "xxhash-3.6.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:5576b002a56207f640636056b4160a378fe36a58db73ae5c27a7ec8db35f71d4"},
+    {file = "xxhash-3.6.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:af1f3278bd02814d6dedc5dec397993b549d6f16c19379721e5a1d31e132c49b"},
+    {file = "xxhash-3.6.0-cp310-cp310-win32.whl", hash = "sha256:aed058764db109dc9052720da65fafe84873b05eb8b07e5e653597951af57c3b"},
+    {file = "xxhash-3.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:e82da5670f2d0d98950317f82a0e4a0197150ff19a6df2ba40399c2a3b9ae5fb"},
+    {file = "xxhash-3.6.0-cp310-cp310-win_arm64.whl", hash = "sha256:4a082ffff8c6ac07707fb6b671caf7c6e020c75226c561830b73d862060f281d"},
+    {file = "xxhash-3.6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b47bbd8cf2d72797f3c2772eaaac0ded3d3af26481a26d7d7d41dc2d3c46b04a"},
+    {file = "xxhash-3.6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2b6821e94346f96db75abaa6e255706fb06ebd530899ed76d32cd99f20dc52fa"},
+    {file = "xxhash-3.6.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d0a9751f71a1a65ce3584e9cae4467651c7e70c9d31017fa57574583a4540248"},
+    {file = "xxhash-3.6.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b29ee68625ab37b04c0b40c3fafdf24d2f75ccd778333cfb698f65f6c463f62"},
+    {file = "xxhash-3.6.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:6812c25fe0d6c36a46ccb002f40f27ac903bf18af9f6dd8f9669cb4d176ab18f"},
+    {file = "xxhash-3.6.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4ccbff013972390b51a18ef1255ef5ac125c92dc9143b2d1909f59abc765540e"},
+    {file = "xxhash-3.6.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:297b7fbf86c82c550e12e8fb71968b3f033d27b874276ba3624ea868c11165a8"},
+    {file = "xxhash-3.6.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:dea26ae1eb293db089798d3973a5fc928a18fdd97cc8801226fae705b02b14b0"},
+    {file = "xxhash-3.6.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:7a0b169aafb98f4284f73635a8e93f0735f9cbde17bd5ec332480484241aaa77"},
+    {file = "xxhash-3.6.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:08d45aef063a4531b785cd72de4887766d01dc8f362a515693df349fdb825e0c"},
+    {file = "xxhash-3.6.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:929142361a48ee07f09121fe9e96a84950e8d4df3bb298ca5d88061969f34d7b"},
+    {file = "xxhash-3.6.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:51312c768403d8540487dbbfb557454cfc55589bbde6424456951f7fcd4facb3"},
+    {file = "xxhash-3.6.0-cp311-cp311-win32.whl", hash = "sha256:d1927a69feddc24c987b337ce81ac15c4720955b667fe9b588e02254b80446fd"},
+    {file = "xxhash-3.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:26734cdc2d4ffe449b41d186bbeac416f704a482ed835d375a5c0cb02bc63fef"},
+    {file = "xxhash-3.6.0-cp311-cp311-win_arm64.whl", hash = "sha256:d72f67ef8bf36e05f5b6c65e8524f265bd61071471cd4cf1d36743ebeeeb06b7"},
+    {file = "xxhash-3.6.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:01362c4331775398e7bb34e3ab403bc9ee9f7c497bc7dee6272114055277dd3c"},
+    {file = "xxhash-3.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b7b2df81a23f8cb99656378e72501b2cb41b1827c0f5a86f87d6b06b69f9f204"},
+    {file = "xxhash-3.6.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:dc94790144e66b14f67b10ac8ed75b39ca47536bf8800eb7c24b50271ea0c490"},
+    {file = "xxhash-3.6.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:93f107c673bccf0d592cdba077dedaf52fe7f42dcd7676eba1f6d6f0c3efffd2"},
+    {file = "xxhash-3.6.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2aa5ee3444c25b69813663c9f8067dcfaa2e126dc55e8dddf40f4d1c25d7effa"},
+    {file = "xxhash-3.6.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f7f99123f0e1194fa59cc69ad46dbae2e07becec5df50a0509a808f90a0f03f0"},
+    {file = "xxhash-3.6.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:49e03e6fe2cac4a1bc64952dd250cf0dbc5ef4ebb7b8d96bce82e2de163c82a2"},
+    {file = "xxhash-3.6.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bd17fede52a17a4f9a7bc4472a5867cb0b160deeb431795c0e4abe158bc784e9"},
+    {file = "xxhash-3.6.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6fb5f5476bef678f69db04f2bd1efbed3030d2aba305b0fc1773645f187d6a4e"},
+    {file = "xxhash-3.6.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:843b52f6d88071f87eba1631b684fcb4b2068cd2180a0224122fe4ef011a9374"},
+    {file = "xxhash-3.6.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7d14a6cfaf03b1b6f5f9790f76880601ccc7896aff7ab9cd8978a939c1eb7e0d"},
+    {file = "xxhash-3.6.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:418daf3db71e1413cfe211c2f9a528456936645c17f46b5204705581a45390ae"},
+    {file = "xxhash-3.6.0-cp312-cp312-win32.whl", hash = "sha256:50fc255f39428a27299c20e280d6193d8b63b8ef8028995323bf834a026b4fbb"},
+    {file = "xxhash-3.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:c0f2ab8c715630565ab8991b536ecded9416d615538be8ecddce43ccf26cbc7c"},
+    {file = "xxhash-3.6.0-cp312-cp312-win_arm64.whl", hash = "sha256:eae5c13f3bc455a3bbb68bdc513912dc7356de7e2280363ea235f71f54064829"},
+    {file = "xxhash-3.6.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:599e64ba7f67472481ceb6ee80fa3bd828fd61ba59fb11475572cc5ee52b89ec"},
+    {file = "xxhash-3.6.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7d8b8aaa30fca4f16f0c84a5c8d7ddee0e25250ec2796c973775373257dde8f1"},
+    {file = "xxhash-3.6.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d597acf8506d6e7101a4a44a5e428977a51c0fadbbfd3c39650cca9253f6e5a6"},
+    {file = "xxhash-3.6.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:858dc935963a33bc33490128edc1c12b0c14d9c7ebaa4e387a7869ecc4f3e263"},
+    {file = "xxhash-3.6.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ba284920194615cb8edf73bf52236ce2e1664ccd4a38fdb543506413529cc546"},
+    {file = "xxhash-3.6.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4b54219177f6c6674d5378bd862c6aedf64725f70dd29c472eaae154df1a2e89"},
+    {file = "xxhash-3.6.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:42c36dd7dbad2f5238950c377fcbf6811b1cdb1c444fab447960030cea60504d"},
+    {file = "xxhash-3.6.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f22927652cba98c44639ffdc7aaf35828dccf679b10b31c4ad72a5b530a18eb7"},
+    {file = "xxhash-3.6.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b45fad44d9c5c119e9c6fbf2e1c656a46dc68e280275007bbfd3d572b21426db"},
+    {file = "xxhash-3.6.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:6f2580ffab1a8b68ef2b901cde7e55fa8da5e4be0977c68f78fc80f3c143de42"},
+    {file = "xxhash-3.6.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:40c391dd3cd041ebc3ffe6f2c862f402e306eb571422e0aa918d8070ba31da11"},
+    {file = "xxhash-3.6.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f205badabde7aafd1a31e8ca2a3e5a763107a71c397c4481d6a804eb5063d8bd"},
+    {file = "xxhash-3.6.0-cp313-cp313-win32.whl", hash = "sha256:2577b276e060b73b73a53042ea5bd5203d3e6347ce0d09f98500f418a9fcf799"},
+    {file = "xxhash-3.6.0-cp313-cp313-win_amd64.whl", hash = "sha256:757320d45d2fbcce8f30c42a6b2f47862967aea7bf458b9625b4bbe7ee390392"},
+    {file = "xxhash-3.6.0-cp313-cp313-win_arm64.whl", hash = "sha256:457b8f85dec5825eed7b69c11ae86834a018b8e3df5e77783c999663da2f96d6"},
+    {file = "xxhash-3.6.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a42e633d75cdad6d625434e3468126c73f13f7584545a9cf34e883aa1710e702"},
+    {file = "xxhash-3.6.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:568a6d743219e717b07b4e03b0a828ce593833e498c3b64752e0f5df6bfe84db"},
+    {file = "xxhash-3.6.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:bec91b562d8012dae276af8025a55811b875baace6af510412a5e58e3121bc54"},
+    {file = "xxhash-3.6.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:78e7f2f4c521c30ad5e786fdd6bae89d47a32672a80195467b5de0480aa97b1f"},
+    {file = "xxhash-3.6.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3ed0df1b11a79856df5ffcab572cbd6b9627034c1c748c5566fa79df9048a7c5"},
+    {file = "xxhash-3.6.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0e4edbfc7d420925b0dd5e792478ed393d6e75ff8fc219a6546fb446b6a417b1"},
+    {file = "xxhash-3.6.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fba27a198363a7ef87f8c0f6b171ec36b674fe9053742c58dd7e3201c1ab30ee"},
+    {file = "xxhash-3.6.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:794fe9145fe60191c6532fa95063765529770edcdd67b3d537793e8004cabbfd"},
+    {file = "xxhash-3.6.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:6105ef7e62b5ac73a837778efc331a591d8442f8ef5c7e102376506cb4ae2729"},
+    {file = "xxhash-3.6.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:f01375c0e55395b814a679b3eea205db7919ac2af213f4a6682e01220e5fe292"},
+    {file = "xxhash-3.6.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:d706dca2d24d834a4661619dcacf51a75c16d65985718d6a7d73c1eeeb903ddf"},
+    {file = "xxhash-3.6.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5f059d9faeacd49c0215d66f4056e1326c80503f51a1532ca336a385edadd033"},
+    {file = "xxhash-3.6.0-cp313-cp313t-win32.whl", hash = "sha256:1244460adc3a9be84731d72b8e80625788e5815b68da3da8b83f78115a40a7ec"},
+    {file = "xxhash-3.6.0-cp313-cp313t-win_amd64.whl", hash = "sha256:b1e420ef35c503869c4064f4a2f2b08ad6431ab7b229a05cce39d74268bca6b8"},
+    {file = "xxhash-3.6.0-cp313-cp313t-win_arm64.whl", hash = "sha256:ec44b73a4220623235f67a996c862049f375df3b1052d9899f40a6382c32d746"},
+    {file = "xxhash-3.6.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a40a3d35b204b7cc7643cbcf8c9976d818cb47befcfac8bbefec8038ac363f3e"},
+    {file = "xxhash-3.6.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a54844be970d3fc22630b32d515e79a90d0a3ddb2644d8d7402e3c4c8da61405"},
+    {file = "xxhash-3.6.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:016e9190af8f0a4e3741343777710e3d5717427f175adfdc3e72508f59e2a7f3"},
+    {file = "xxhash-3.6.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4f6f72232f849eb9d0141e2ebe2677ece15adfd0fa599bc058aad83c714bb2c6"},
+    {file = "xxhash-3.6.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:63275a8aba7865e44b1813d2177e0f5ea7eadad3dd063a21f7cf9afdc7054063"},
+    {file = "xxhash-3.6.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3cd01fa2aa00d8b017c97eb46b9a794fbdca53fc14f845f5a328c71254b0abb7"},
+    {file = "xxhash-3.6.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0226aa89035b62b6a86d3c68df4d7c1f47a342b8683da2b60cedcddb46c4d95b"},
+    {file = "xxhash-3.6.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c6e193e9f56e4ca4923c61238cdaced324f0feac782544eb4c6d55ad5cc99ddd"},
+    {file = "xxhash-3.6.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:9176dcaddf4ca963d4deb93866d739a343c01c969231dbe21680e13a5d1a5bf0"},
+    {file = "xxhash-3.6.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:c1ce4009c97a752e682b897aa99aef84191077a9433eb237774689f14f8ec152"},
+    {file = "xxhash-3.6.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:8cb2f4f679b01513b7adbb9b1b2f0f9cdc31b70007eaf9d59d0878809f385b11"},
+    {file = "xxhash-3.6.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:653a91d7c2ab54a92c19ccf43508b6a555440b9be1bc8be553376778be7f20b5"},
+    {file = "xxhash-3.6.0-cp314-cp314-win32.whl", hash = "sha256:a756fe893389483ee8c394d06b5ab765d96e68fbbfe6fde7aa17e11f5720559f"},
+    {file = "xxhash-3.6.0-cp314-cp314-win_amd64.whl", hash = "sha256:39be8e4e142550ef69629c9cd71b88c90e9a5db703fecbcf265546d9536ca4ad"},
+    {file = "xxhash-3.6.0-cp314-cp314-win_arm64.whl", hash = "sha256:25915e6000338999236f1eb68a02a32c3275ac338628a7eaa5a269c401995679"},
+    {file = "xxhash-3.6.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c5294f596a9017ca5a3e3f8884c00b91ab2ad2933cf288f4923c3fd4346cf3d4"},
+    {file = "xxhash-3.6.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1cf9dcc4ab9cff01dfbba78544297a3a01dafd60f3bde4e2bfd016cf7e4ddc67"},
+    {file = "xxhash-3.6.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:01262da8798422d0685f7cef03b2bd3f4f46511b02830861df548d7def4402ad"},
+    {file = "xxhash-3.6.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:51a73fb7cb3a3ead9f7a8b583ffd9b8038e277cdb8cb87cf890e88b3456afa0b"},
+    {file = "xxhash-3.6.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b9c6df83594f7df8f7f708ce5ebeacfc69f72c9fbaaababf6cf4758eaada0c9b"},
+    {file = "xxhash-3.6.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:627f0af069b0ea56f312fd5189001c24578868643203bca1abbc2c52d3a6f3ca"},
+    {file = "xxhash-3.6.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aa912c62f842dfd013c5f21a642c9c10cd9f4c4e943e0af83618b4a404d9091a"},
+    {file = "xxhash-3.6.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:b465afd7909db30168ab62afe40b2fcf79eedc0b89a6c0ab3123515dc0df8b99"},
+    {file = "xxhash-3.6.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:a881851cf38b0a70e7c4d3ce81fc7afd86fbc2a024f4cfb2a97cf49ce04b75d3"},
+    {file = "xxhash-3.6.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:9b3222c686a919a0f3253cfc12bb118b8b103506612253b5baeaac10d8027cf6"},
+    {file = "xxhash-3.6.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:c5aa639bc113e9286137cec8fadc20e9cd732b2cc385c0b7fa673b84fc1f2a93"},
+    {file = "xxhash-3.6.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5c1343d49ac102799905e115aee590183c3921d475356cb24b4de29a4bc56518"},
+    {file = "xxhash-3.6.0-cp314-cp314t-win32.whl", hash = "sha256:5851f033c3030dd95c086b4a36a2683c2ff4a799b23af60977188b057e467119"},
+    {file = "xxhash-3.6.0-cp314-cp314t-win_amd64.whl", hash = "sha256:0444e7967dac37569052d2409b00a8860c2135cff05502df4da80267d384849f"},
+    {file = "xxhash-3.6.0-cp314-cp314t-win_arm64.whl", hash = "sha256:bb79b1e63f6fd84ec778a4b1916dfe0a7c3fdb986c06addd5db3a0d413819d95"},
+    {file = "xxhash-3.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7dac94fad14a3d1c92affb661021e1d5cbcf3876be5f5b4d90730775ccb7ac41"},
+    {file = "xxhash-3.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6965e0e90f1f0e6cb78da568c13d4a348eeb7f40acfd6d43690a666a459458b8"},
+    {file = "xxhash-3.6.0-cp38-cp38-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:2ab89a6b80f22214b43d98693c30da66af910c04f9858dd39c8e570749593d7e"},
+    {file = "xxhash-3.6.0-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4903530e866b7a9c1eadfd3fa2fbe1b97d3aed4739a80abf506eb9318561c850"},
+    {file = "xxhash-3.6.0-cp38-cp38-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4da8168ae52c01ac64c511d6f4a709479da8b7a4a1d7621ed51652f93747dffa"},
+    {file = "xxhash-3.6.0-cp38-cp38-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:97460eec202017f719e839a0d3551fbc0b2fcc9c6c6ffaa5af85bbd5de432788"},
+    {file = "xxhash-3.6.0-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:45aae0c9df92e7fa46fbb738737324a563c727990755ec1965a6a339ea10a1df"},
+    {file = "xxhash-3.6.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:0d50101e57aad86f4344ca9b32d091a2135a9d0a4396f19133426c88025b09f1"},
+    {file = "xxhash-3.6.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:9085e798c163ce310d91f8aa6b325dda3c2944c93c6ce1edb314030d4167cc65"},
+    {file = "xxhash-3.6.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:a87f271a33fad0e5bf3be282be55d78df3a45ae457950deb5241998790326f87"},
+    {file = "xxhash-3.6.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:9e040d3e762f84500961791fa3709ffa4784d4dcd7690afc655c095e02fff05f"},
+    {file = "xxhash-3.6.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:b0359391c3dad6de872fefb0cf5b69d55b0655c55ee78b1bb7a568979b2ce96b"},
+    {file = "xxhash-3.6.0-cp38-cp38-win32.whl", hash = "sha256:e4ff728a2894e7f436b9e94c667b0f426b9c74b71f900cf37d5468c6b5da0536"},
+    {file = "xxhash-3.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:01be0c5b500c5362871fc9cfdf58c69b3e5c4f531a82229ddb9eb1eb14138004"},
+    {file = "xxhash-3.6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cc604dc06027dbeb8281aeac5899c35fcfe7c77b25212833709f0bff4ce74d2a"},
+    {file = "xxhash-3.6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:277175a73900ad43a8caeb8b99b9604f21fe8d7c842f2f9061a364a7e220ddb7"},
+    {file = "xxhash-3.6.0-cp39-cp39-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cfbc5b91397c8c2972fdac13fb3e4ed2f7f8ccac85cd2c644887557780a9b6e2"},
+    {file = "xxhash-3.6.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2762bfff264c4e73c0e507274b40634ff465e025f0eaf050897e88ec8367575d"},
+    {file = "xxhash-3.6.0-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2f171a900d59d51511209f7476933c34a0c2c711078d3c80e74e0fe4f38680ec"},
+    {file = "xxhash-3.6.0-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:780b90c313348f030b811efc37b0fa1431163cb8db8064cf88a7936b6ce5f222"},
+    {file = "xxhash-3.6.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18b242455eccdfcd1fa4134c431a30737d2b4f045770f8fe84356b3469d4b919"},
+    {file = "xxhash-3.6.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a75ffc1bd5def584129774c158e108e5d768e10b75813f2b32650bb041066ed6"},
+    {file = "xxhash-3.6.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1fc1ed882d1e8df932a66e2999429ba6cc4d5172914c904ab193381fba825360"},
+    {file = "xxhash-3.6.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:44e342e8cc11b4e79dae5c57f2fb6360c3c20cc57d32049af8f567f5b4bcb5f4"},
+    {file = "xxhash-3.6.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:c2f9ccd5c4be370939a2e17602fbc49995299203da72a3429db013d44d590e86"},
+    {file = "xxhash-3.6.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:02ea4cb627c76f48cd9fb37cf7ab22bd51e57e1b519807234b473faebe526796"},
+    {file = "xxhash-3.6.0-cp39-cp39-win32.whl", hash = "sha256:6551880383f0e6971dc23e512c9ccc986147ce7bfa1cd2e4b520b876c53e9f3d"},
+    {file = "xxhash-3.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:7c35c4cdc65f2a29f34425c446f2f5cdcd0e3c34158931e1cc927ece925ab802"},
+    {file = "xxhash-3.6.0-cp39-cp39-win_arm64.whl", hash = "sha256:ffc578717a347baf25be8397cb10d2528802d24f94cfc005c0e44fef44b5cdd6"},
+    {file = "xxhash-3.6.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0f7b7e2ec26c1666ad5fc9dbfa426a6a3367ceaf79db5dd76264659d509d73b0"},
+    {file = "xxhash-3.6.0-pp311-pypy311_pp73-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:5dc1e14d14fa0f5789ec29a7062004b5933964bb9b02aae6622b8f530dc40296"},
+    {file = "xxhash-3.6.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:881b47fc47e051b37d94d13e7455131054b56749b91b508b0907eb07900d1c13"},
+    {file = "xxhash-3.6.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c6dc31591899f5e5666f04cc2e529e69b4072827085c1ef15294d91a004bc1bd"},
+    {file = "xxhash-3.6.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:15e0dac10eb9309508bfc41f7f9deaa7755c69e35af835db9cb10751adebc35d"},
+    {file = "xxhash-3.6.0.tar.gz", hash = "sha256:f0162a78b13a0d7617b2845b90c763339d1f1d82bb04a4b07f4ab535cc5e05d6"},
+]
+
+[[package]]
+name = "yarl"
+version = "1.22.0"
+description = "Yet another URL library"
+optional = false
+python-versions = ">=3.9"
+groups = ["main"]
+files = [
+    {file = "yarl-1.22.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c7bd6683587567e5a49ee6e336e0612bec8329be1b7d4c8af5687dcdeb67ee1e"},
+    {file = "yarl-1.22.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5cdac20da754f3a723cceea5b3448e1a2074866406adeb4ef35b469d089adb8f"},
+    {file = "yarl-1.22.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:07a524d84df0c10f41e3ee918846e1974aba4ec017f990dc735aad487a0bdfdf"},
+    {file = "yarl-1.22.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e1b329cb8146d7b736677a2440e422eadd775d1806a81db2d4cded80a48efc1a"},
+    {file = "yarl-1.22.0-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:75976c6945d85dbb9ee6308cd7ff7b1fb9409380c82d6119bd778d8fcfe2931c"},
+    {file = "yarl-1.22.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:80ddf7a5f8c86cb3eb4bc9028b07bbbf1f08a96c5c0bc1244be5e8fefcb94147"},
+    {file = "yarl-1.22.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d332fc2e3c94dad927f2112395772a4e4fedbcf8f80efc21ed7cdfae4d574fdb"},
+    {file = "yarl-1.22.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0cf71bf877efeac18b38d3930594c0948c82b64547c1cf420ba48722fe5509f6"},
+    {file = "yarl-1.22.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:663e1cadaddae26be034a6ab6072449a8426ddb03d500f43daf952b74553bba0"},
+    {file = "yarl-1.22.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:6dcbb0829c671f305be48a7227918cfcd11276c2d637a8033a99a02b67bf9eda"},
+    {file = "yarl-1.22.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:f0d97c18dfd9a9af4490631905a3f131a8e4c9e80a39353919e2cfed8f00aedc"},
+    {file = "yarl-1.22.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:437840083abe022c978470b942ff832c3940b2ad3734d424b7eaffcd07f76737"},
+    {file = "yarl-1.22.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a899cbd98dce6f5d8de1aad31cb712ec0a530abc0a86bd6edaa47c1090138467"},
+    {file = "yarl-1.22.0-cp310-cp310-win32.whl", hash = "sha256:595697f68bd1f0c1c159fcb97b661fc9c3f5db46498043555d04805430e79bea"},
+    {file = "yarl-1.22.0-cp310-cp310-win_amd64.whl", hash = "sha256:cb95a9b1adaa48e41815a55ae740cfda005758104049a640a398120bf02515ca"},
+    {file = "yarl-1.22.0-cp310-cp310-win_arm64.whl", hash = "sha256:b85b982afde6df99ecc996990d4ad7ccbdbb70e2a4ba4de0aecde5922ba98a0b"},
+    {file = "yarl-1.22.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1ab72135b1f2db3fed3997d7e7dc1b80573c67138023852b6efb336a5eae6511"},
+    {file = "yarl-1.22.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:669930400e375570189492dc8d8341301578e8493aec04aebc20d4717f899dd6"},
+    {file = "yarl-1.22.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:792a2af6d58177ef7c19cbf0097aba92ca1b9cb3ffdd9c7470e156c8f9b5e028"},
+    {file = "yarl-1.22.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ea66b1c11c9150f1372f69afb6b8116f2dd7286f38e14ea71a44eee9ec51b9d"},
+    {file = "yarl-1.22.0-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3e2daa88dc91870215961e96a039ec73e4937da13cf77ce17f9cad0c18df3503"},
+    {file = "yarl-1.22.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ba440ae430c00eee41509353628600212112cd5018d5def7e9b05ea7ac34eb65"},
+    {file = "yarl-1.22.0-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e6438cc8f23a9c1478633d216b16104a586b9761db62bfacb6425bac0a36679e"},
+    {file = "yarl-1.22.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c52a6e78aef5cf47a98ef8e934755abf53953379b7d53e68b15ff4420e6683d"},
+    {file = "yarl-1.22.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3b06bcadaac49c70f4c88af4ffcfbe3dc155aab3163e75777818092478bcbbe7"},
+    {file = "yarl-1.22.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:6944b2dc72c4d7f7052683487e3677456050ff77fcf5e6204e98caf785ad1967"},
+    {file = "yarl-1.22.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:d5372ca1df0f91a86b047d1277c2aaf1edb32d78bbcefffc81b40ffd18f027ed"},
+    {file = "yarl-1.22.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:51af598701f5299012b8416486b40fceef8c26fc87dc6d7d1f6fc30609ea0aa6"},
+    {file = "yarl-1.22.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b266bd01fedeffeeac01a79ae181719ff848a5a13ce10075adbefc8f1daee70e"},
+    {file = "yarl-1.22.0-cp311-cp311-win32.whl", hash = "sha256:a9b1ba5610a4e20f655258d5a1fdc7ebe3d837bb0e45b581398b99eb98b1f5ca"},
+    {file = "yarl-1.22.0-cp311-cp311-win_amd64.whl", hash = "sha256:078278b9b0b11568937d9509b589ee83ef98ed6d561dfe2020e24a9fd08eaa2b"},
+    {file = "yarl-1.22.0-cp311-cp311-win_arm64.whl", hash = "sha256:b6a6f620cfe13ccec221fa312139135166e47ae169f8253f72a0abc0dae94376"},
+    {file = "yarl-1.22.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e340382d1afa5d32b892b3ff062436d592ec3d692aeea3bef3a5cfe11bbf8c6f"},
+    {file = "yarl-1.22.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f1e09112a2c31ffe8d80be1b0988fa6a18c5d5cad92a9ffbb1c04c91bfe52ad2"},
+    {file = "yarl-1.22.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:939fe60db294c786f6b7c2d2e121576628468f65453d86b0fe36cb52f987bd74"},
+    {file = "yarl-1.22.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e1651bf8e0398574646744c1885a41198eba53dc8a9312b954073f845c90a8df"},
+    {file = "yarl-1.22.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b8a0588521a26bf92a57a1705b77b8b59044cdceccac7151bd8d229e66b8dedb"},
+    {file = "yarl-1.22.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:42188e6a615c1a75bcaa6e150c3fe8f3e8680471a6b10150c5f7e83f47cc34d2"},
+    {file = "yarl-1.22.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f6d2cb59377d99718913ad9a151030d6f83ef420a2b8f521d94609ecc106ee82"},
+    {file = "yarl-1.22.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50678a3b71c751d58d7908edc96d332af328839eea883bb554a43f539101277a"},
+    {file = "yarl-1.22.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1e8fbaa7cec507aa24ea27a01456e8dd4b6fab829059b69844bd348f2d467124"},
+    {file = "yarl-1.22.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:433885ab5431bc3d3d4f2f9bd15bfa1614c522b0f1405d62c4f926ccd69d04fa"},
+    {file = "yarl-1.22.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:b790b39c7e9a4192dc2e201a282109ed2985a1ddbd5ac08dc56d0e121400a8f7"},
+    {file = "yarl-1.22.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:31f0b53913220599446872d757257be5898019c85e7971599065bc55065dc99d"},
+    {file = "yarl-1.22.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a49370e8f711daec68d09b821a34e1167792ee2d24d405cbc2387be4f158b520"},
+    {file = "yarl-1.22.0-cp312-cp312-win32.whl", hash = "sha256:70dfd4f241c04bd9239d53b17f11e6ab672b9f1420364af63e8531198e3f5fe8"},
+    {file = "yarl-1.22.0-cp312-cp312-win_amd64.whl", hash = "sha256:8884d8b332a5e9b88e23f60bb166890009429391864c685e17bd73a9eda9105c"},
+    {file = "yarl-1.22.0-cp312-cp312-win_arm64.whl", hash = "sha256:ea70f61a47f3cc93bdf8b2f368ed359ef02a01ca6393916bc8ff877427181e74"},
+    {file = "yarl-1.22.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8dee9c25c74997f6a750cd317b8ca63545169c098faee42c84aa5e506c819b53"},
+    {file = "yarl-1.22.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:01e73b85a5434f89fc4fe27dcda2aff08ddf35e4d47bbbea3bdcd25321af538a"},
+    {file = "yarl-1.22.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:22965c2af250d20c873cdbee8ff958fb809940aeb2e74ba5f20aaf6b7ac8c70c"},
+    {file = "yarl-1.22.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4f15793aa49793ec8d1c708ab7f9eded1aa72edc5174cae703651555ed1b601"},
+    {file = "yarl-1.22.0-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5542339dcf2747135c5c85f68680353d5cb9ffd741c0f2e8d832d054d41f35a"},
+    {file = "yarl-1.22.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5c401e05ad47a75869c3ab3e35137f8468b846770587e70d71e11de797d113df"},
+    {file = "yarl-1.22.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:243dda95d901c733f5b59214d28b0120893d91777cb8aa043e6ef059d3cddfe2"},
+    {file = "yarl-1.22.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bec03d0d388060058f5d291a813f21c011041938a441c593374da6077fe21b1b"},
+    {file = "yarl-1.22.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b0748275abb8c1e1e09301ee3cf90c8a99678a4e92e4373705f2a2570d581273"},
+    {file = "yarl-1.22.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:47fdb18187e2a4e18fda2c25c05d8251a9e4a521edaed757fef033e7d8498d9a"},
+    {file = "yarl-1.22.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c7044802eec4524fde550afc28edda0dd5784c4c45f0be151a2d3ba017daca7d"},
+    {file = "yarl-1.22.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:139718f35149ff544caba20fce6e8a2f71f1e39b92c700d8438a0b1d2a631a02"},
+    {file = "yarl-1.22.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e1b51bebd221006d3d2f95fbe124b22b247136647ae5dcc8c7acafba66e5ee67"},
+    {file = "yarl-1.22.0-cp313-cp313-win32.whl", hash = "sha256:d3e32536234a95f513bd374e93d717cf6b2231a791758de6c509e3653f234c95"},
+    {file = "yarl-1.22.0-cp313-cp313-win_amd64.whl", hash = "sha256:47743b82b76d89a1d20b83e60d5c20314cbd5ba2befc9cda8f28300c4a08ed4d"},
+    {file = "yarl-1.22.0-cp313-cp313-win_arm64.whl", hash = "sha256:5d0fcda9608875f7d052eff120c7a5da474a6796fe4d83e152e0e4d42f6d1a9b"},
+    {file = "yarl-1.22.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:719ae08b6972befcba4310e49edb1161a88cdd331e3a694b84466bd938a6ab10"},
+    {file = "yarl-1.22.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:47d8a5c446df1c4db9d21b49619ffdba90e77c89ec6e283f453856c74b50b9e3"},
+    {file = "yarl-1.22.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cfebc0ac8333520d2d0423cbbe43ae43c8838862ddb898f5ca68565e395516e9"},
+    {file = "yarl-1.22.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4398557cbf484207df000309235979c79c4356518fd5c99158c7d38203c4da4f"},
+    {file = "yarl-1.22.0-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2ca6fd72a8cd803be290d42f2dec5cdcd5299eeb93c2d929bf060ad9efaf5de0"},
+    {file = "yarl-1.22.0-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ca1f59c4e1ab6e72f0a23c13fca5430f889634166be85dbf1013683e49e3278e"},
+    {file = "yarl-1.22.0-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6c5010a52015e7c70f86eb967db0f37f3c8bd503a695a49f8d45700144667708"},
+    {file = "yarl-1.22.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d7672ecf7557476642c88497c2f8d8542f8e36596e928e9bcba0e42e1e7d71f"},
+    {file = "yarl-1.22.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:3b7c88eeef021579d600e50363e0b6ee4f7f6f728cd3486b9d0f3ee7b946398d"},
+    {file = "yarl-1.22.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:f4afb5c34f2c6fecdcc182dfcfc6af6cccf1aa923eed4d6a12e9d96904e1a0d8"},
+    {file = "yarl-1.22.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:59c189e3e99a59cf8d83cbb31d4db02d66cda5a1a4374e8a012b51255341abf5"},
+    {file = "yarl-1.22.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:5a3bf7f62a289fa90f1990422dc8dff5a458469ea71d1624585ec3a4c8d6960f"},
+    {file = "yarl-1.22.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:de6b9a04c606978fdfe72666fa216ffcf2d1a9f6a381058d4378f8d7b1e5de62"},
+    {file = "yarl-1.22.0-cp313-cp313t-win32.whl", hash = "sha256:1834bb90991cc2999f10f97f5f01317f99b143284766d197e43cd5b45eb18d03"},
+    {file = "yarl-1.22.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ff86011bd159a9d2dfc89c34cfd8aff12875980e3bd6a39ff097887520e60249"},
+    {file = "yarl-1.22.0-cp313-cp313t-win_arm64.whl", hash = "sha256:7861058d0582b847bc4e3a4a4c46828a410bca738673f35a29ba3ca5db0b473b"},
+    {file = "yarl-1.22.0-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:34b36c2c57124530884d89d50ed2c1478697ad7473efd59cfd479945c95650e4"},
+    {file = "yarl-1.22.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:0dd9a702591ca2e543631c2a017e4a547e38a5c0f29eece37d9097e04a7ac683"},
+    {file = "yarl-1.22.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:594fcab1032e2d2cc3321bb2e51271e7cd2b516c7d9aee780ece81b07ff8244b"},
+    {file = "yarl-1.22.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f3d7a87a78d46a2e3d5b72587ac14b4c16952dd0887dbb051451eceac774411e"},
+    {file = "yarl-1.22.0-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:852863707010316c973162e703bddabec35e8757e67fcb8ad58829de1ebc8590"},
+    {file = "yarl-1.22.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:131a085a53bfe839a477c0845acf21efc77457ba2bcf5899618136d64f3303a2"},
+    {file = "yarl-1.22.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:078a8aefd263f4d4f923a9677b942b445a2be970ca24548a8102689a3a8ab8da"},
+    {file = "yarl-1.22.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bca03b91c323036913993ff5c738d0842fc9c60c4648e5c8d98331526df89784"},
+    {file = "yarl-1.22.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:68986a61557d37bb90d3051a45b91fa3d5c516d177dfc6dd6f2f436a07ff2b6b"},
+    {file = "yarl-1.22.0-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:4792b262d585ff0dff6bcb787f8492e40698443ec982a3568c2096433660c694"},
+    {file = "yarl-1.22.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:ebd4549b108d732dba1d4ace67614b9545b21ece30937a63a65dd34efa19732d"},
+    {file = "yarl-1.22.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:f87ac53513d22240c7d59203f25cc3beac1e574c6cd681bbfd321987b69f95fd"},
+    {file = "yarl-1.22.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:22b029f2881599e2f1b06f8f1db2ee63bd309e2293ba2d566e008ba12778b8da"},
+    {file = "yarl-1.22.0-cp314-cp314-win32.whl", hash = "sha256:6a635ea45ba4ea8238463b4f7d0e721bad669f80878b7bfd1f89266e2ae63da2"},
+    {file = "yarl-1.22.0-cp314-cp314-win_amd64.whl", hash = "sha256:0d6e6885777af0f110b0e5d7e5dda8b704efed3894da26220b7f3d887b839a79"},
+    {file = "yarl-1.22.0-cp314-cp314-win_arm64.whl", hash = "sha256:8218f4e98d3c10d683584cb40f0424f4b9fd6e95610232dd75e13743b070ee33"},
+    {file = "yarl-1.22.0-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:45c2842ff0e0d1b35a6bf1cd6c690939dacb617a70827f715232b2e0494d55d1"},
+    {file = "yarl-1.22.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:d947071e6ebcf2e2bee8fce76e10faca8f7a14808ca36a910263acaacef08eca"},
+    {file = "yarl-1.22.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:334b8721303e61b00019474cc103bdac3d7b1f65e91f0bfedeec2d56dfe74b53"},
+    {file = "yarl-1.22.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1e7ce67c34138a058fd092f67d07a72b8e31ff0c9236e751957465a24b28910c"},
+    {file = "yarl-1.22.0-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d77e1b2c6d04711478cb1c4ab90db07f1609ccf06a287d5607fcd90dc9863acf"},
+    {file = "yarl-1.22.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c4647674b6150d2cae088fc07de2738a84b8bcedebef29802cf0b0a82ab6face"},
+    {file = "yarl-1.22.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:efb07073be061c8f79d03d04139a80ba33cbd390ca8f0297aae9cce6411e4c6b"},
+    {file = "yarl-1.22.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e51ac5435758ba97ad69617e13233da53908beccc6cfcd6c34bbed8dcbede486"},
+    {file = "yarl-1.22.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:33e32a0dd0c8205efa8e83d04fc9f19313772b78522d1bdc7d9aed706bfd6138"},
+    {file = "yarl-1.22.0-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:bf4a21e58b9cde0e401e683ebd00f6ed30a06d14e93f7c8fd059f8b6e8f87b6a"},
+    {file = "yarl-1.22.0-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:e4b582bab49ac33c8deb97e058cd67c2c50dac0dd134874106d9c774fd272529"},
+    {file = "yarl-1.22.0-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:0b5bcc1a9c4839e7e30b7b30dd47fe5e7e44fb7054ec29b5bb8d526aa1041093"},
+    {file = "yarl-1.22.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c0232bce2170103ec23c454e54a57008a9a72b5d1c3105dc2496750da8cfa47c"},
+    {file = "yarl-1.22.0-cp314-cp314t-win32.whl", hash = "sha256:8009b3173bcd637be650922ac455946197d858b3630b6d8787aa9e5c4564533e"},
+    {file = "yarl-1.22.0-cp314-cp314t-win_amd64.whl", hash = "sha256:9fb17ea16e972c63d25d4a97f016d235c78dd2344820eb35bc034bc32012ee27"},
+    {file = "yarl-1.22.0-cp314-cp314t-win_arm64.whl", hash = "sha256:9f6d73c1436b934e3f01df1e1b21ff765cd1d28c77dfb9ace207f746d4610ee1"},
+    {file = "yarl-1.22.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3aa27acb6de7a23785d81557577491f6c38a5209a254d1191519d07d8fe51748"},
+    {file = "yarl-1.22.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:af74f05666a5e531289cb1cc9c883d1de2088b8e5b4de48004e5ca8a830ac859"},
+    {file = "yarl-1.22.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:62441e55958977b8167b2709c164c91a6363e25da322d87ae6dd9c6019ceecf9"},
+    {file = "yarl-1.22.0-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b580e71cac3f8113d3135888770903eaf2f507e9421e5697d6ee6d8cd1c7f054"},
+    {file = "yarl-1.22.0-cp39-cp39-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e81fda2fb4a07eda1a2252b216aa0df23ebcd4d584894e9612e80999a78fd95b"},
+    {file = "yarl-1.22.0-cp39-cp39-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:99b6fc1d55782461b78221e95fc357b47ad98b041e8e20f47c1411d0aacddc60"},
+    {file = "yarl-1.22.0-cp39-cp39-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:088e4e08f033db4be2ccd1f34cf29fe994772fb54cfe004bbf54db320af56890"},
+    {file = "yarl-1.22.0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e4e1f6f0b4da23e61188676e3ed027ef0baa833a2e633c29ff8530800edccba"},
+    {file = "yarl-1.22.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:84fc3ec96fce86ce5aa305eb4aa9358279d1aa644b71fab7b8ed33fe3ba1a7ca"},
+    {file = "yarl-1.22.0-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:5dbeefd6ca588b33576a01b0ad58aa934bc1b41ef89dee505bf2932b22ddffba"},
+    {file = "yarl-1.22.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:14291620375b1060613f4aab9ebf21850058b6b1b438f386cc814813d901c60b"},
+    {file = "yarl-1.22.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:a4fcfc8eb2c34148c118dfa02e6427ca278bfd0f3df7c5f99e33d2c0e81eae3e"},
+    {file = "yarl-1.22.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:029866bde8d7b0878b9c160e72305bbf0a7342bcd20b9999381704ae03308dc8"},
+    {file = "yarl-1.22.0-cp39-cp39-win32.whl", hash = "sha256:4dcc74149ccc8bba31ce1944acee24813e93cfdee2acda3c172df844948ddf7b"},
+    {file = "yarl-1.22.0-cp39-cp39-win_amd64.whl", hash = "sha256:10619d9fdee46d20edc49d3479e2f8269d0779f1b031e6f7c2aa1c76be04b7ed"},
+    {file = "yarl-1.22.0-cp39-cp39-win_arm64.whl", hash = "sha256:dd7afd3f8b0bfb4e0d9fc3c31bfe8a4ec7debe124cfd90619305def3c8ca8cd2"},
+    {file = "yarl-1.22.0-py3-none-any.whl", hash = "sha256:1380560bdba02b6b6c90de54133c81c9f2a453dee9912fe58c1dcced1edb7cff"},
+    {file = "yarl-1.22.0.tar.gz", hash = "sha256:bebf8557577d4401ba8bd9ff33906f1376c877aa78d1fe216ad01b4d6745af71"},
+]
+
+[package.dependencies]
+idna = ">=2.0"
+multidict = ">=4.0"
+propcache = ">=0.2.1"
+
 [[package]]
 name = "zipp"
 version = "3.23.0"
@@ -6231,4 +7418,5 @@ type = ["pytest-mypy"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.11"
-content-hash = "314027fa3494e71fe10a1e1053cf8203556345db9901345cd4e7d5de0ce5ecea"
+
+content-hash = "58f21a09f32e6ac718b387b3813c3980b32ee094db897fda22a18f0afea1899a"
diff --git a/pyproject.toml b/pyproject.toml
index c1d80df9..407b0eb3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -159,6 +159,8 @@ ml-dtypes = "~=0.5.3"
 torch = "~=2.7.0"
 torchvision = "~=0.22.0"  # Required by finn/brevitas
 transformers = "~=4.48.3"
+datasets = "~=3.0.0"
+evaluate = "~=0.4.0"
 
 # ONNX ecosystem
 onnx = "~=1.17.0"
@@ -172,7 +174,7 @@ netron = "~=8.6"
 ipython = "~=8.12.2"
 ipykernel = "~=6.21.2"
 pygments = ">=2.16,<3.0"  # Updated for mkdocs-material compatibility
-tqdm = "~=4.64.1"
+tqdm = "~=4.66.3"
 
 # Hardware and optimization
 bitstring = "~=4.2.3"
diff --git a/tests/conftest.py b/tests/conftest.py
index ebf5d1b6..1d3b7281 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -7,6 +7,7 @@
 import pytest
 
 from brainsmith.registry import reset_registry
+from brainsmith.registry._state import _discovered_sources
 from brainsmith.settings import reset_config
 from brainsmith.settings.validation import ensure_environment_sourced
 
@@ -14,6 +15,13 @@
 # This ensures FINN_ROOT, VIVADO_PATH, etc. are available for tests
 ensure_environment_sourced()
 
+# Populate discovered sources for proper source detection
+# This allows @backend/@kernel decorators to correctly detect "brainsmith" source
+# without running full discovery (which test framework avoids for speed)
+_discovered_sources.add("brainsmith")
+_discovered_sources.add("finn")
+_discovered_sources.add("project")
+
 # Import test components - these register @step, @kernel, @backend decorators
 # Available for tests that need globally-registered test components
 import tests.fixtures.components.backends  # noqa: F401 - Registers test backends via @backend decorator
diff --git a/tests/fixtures/model_builders.py b/tests/fixtures/model_builders.py
index e2dbe3a6..235d1859 100644
--- a/tests/fixtures/model_builders.py
+++ b/tests/fixtures/model_builders.py
@@ -577,11 +577,17 @@ def make_multithreshold_model(
             helper.make_attribute("out_scale", float(out_scale)),
             helper.make_attribute("out_bias", float(out_bias)),
             helper.make_attribute("out_dtype", output_dtype),
+            helper.make_attribute("data_layout", "NHWC"),  # Input is in NHWC format
         ]
     )
 
-    # Generate evenly-spaced threshold values (sorted ascending)
-    thresh_vals = np.linspace(-10, 10, num_thresholds, dtype=np.float32)
+    # Generate evenly-spaced threshold values within input dtype range
+    # Use 80% of range to ensure values fit after rounding/clipping in FINN pipeline
+    # Round to integers so FINN's MinimizeAccumulatorWidth can validate them
+    inp_dt = DataType[input_dtype]
+    inp_min, inp_max = inp_dt.min(), inp_dt.max()
+    thresh_vals = np.linspace(inp_min * 0.8, inp_max * 0.8, num_thresholds, dtype=np.float32)
+    thresh_vals = np.round(thresh_vals).astype(np.float32)  # Round to integers
     thresh_vals = np.tile(thresh_vals, (channels, 1))  # Replicate for each channel
 
     # Create graph
diff --git a/tests/frameworks/kernel_parity_test.py b/tests/frameworks/kernel_parity_test.py
index ee0c259c..2a396d96 100644
--- a/tests/frameworks/kernel_parity_test.py
+++ b/tests/frameworks/kernel_parity_test.py
@@ -253,7 +253,7 @@ def golden_outputs(
         def builder():
             return self._build_golden_outputs(stage1_model, test_inputs)
 
-        return model_cache.get_golden_reference(kernel_test_config.test_id, builder)
+        return model_cache.get_golden_outputs(kernel_test_config.test_id, builder)
 
     # ========================================================================
     # Pytest Fixtures
@@ -469,6 +469,17 @@ def test_normal_shapes_parity(self, kernel_test_config, stage2_model, stage2_mod
         for i in range(self.get_num_inputs()):
             shape = op.get_normal_input_shape(i)
             shape_ref = op_ref.get_normal_input_shape(i)
+
+            # FINN Bug: get_normal_input_shape(ind) ignores index for multi-input nodes
+            # FINN's Thresholding.get_normal_input_shape() always returns activation shape,
+            # even for threshold input (ind=1). Skip comparison for non-zero indices.
+            if i > 0 and self.get_num_inputs() > 1:
+                pytest.skip(
+                    f"FINN limitation: get_normal_input_shape({i}) incorrectly returns "
+                    f"activation shape {shape_ref} instead of parameter shape {shape}. "
+                    f"FINN's implementation ignores the 'ind' parameter for multi-input nodes."
+                )
+
             assert_shapes_match(shape, shape_ref, i, "normal input")
 
         # Output shapes
@@ -489,6 +500,14 @@ def test_folded_shapes_parity(self, kernel_test_config, stage2_model, stage2_mod
         for i in range(self.get_num_inputs()):
             shape = op.get_folded_input_shape(i)
             shape_ref = op_ref.get_folded_input_shape(i)
+
+            # FINN Bug: Same issue as test_normal_shapes_parity
+            if i > 0 and self.get_num_inputs() > 1:
+                pytest.skip(
+                    f"FINN limitation: get_folded_input_shape({i}) uses broken "
+                    f"get_normal_input_shape() which ignores 'ind' parameter."
+                )
+
             assert_shapes_match(shape, shape_ref, i, "folded input")
 
         # Output shapes
diff --git a/tests/frameworks/kernel_test_base.py b/tests/frameworks/kernel_test_base.py
index bb338ce2..d1a32c57 100644
--- a/tests/frameworks/kernel_test_base.py
+++ b/tests/frameworks/kernel_test_base.py
@@ -206,7 +206,7 @@ def _find_hw_node(
 
         Args:
             model: Model after inference transform
-            target_node: Original ONNX node name
+            target_node: Original ONNX node name (may have been renamed during transformation)
             expected_type: Expected kernel class or op_type string (optional)
 
         Returns:
@@ -215,9 +215,32 @@ def _find_hw_node(
         Raises:
             AssertionError: If node not found or wrong type
         """
-        # Get ONNX node from model
+        # Try to get ONNX node by name (may fail with new sequential naming)
         onnx_node = model.get_node_from_name(target_node)
 
+        # If not found by exact name, search by kernel type
+        # (needed for new sequential naming scheme: KernelName_<index>)
+        if onnx_node is None:
+            kernel_op = self.get_kernel_op()
+            kernel_name = kernel_op.__name__  # e.g., "Thresholding"
+
+            # Find all nodes with matching op_type and brainsmith domain
+            hw_nodes = [
+                node for node in model.graph.node
+                if node.op_type == kernel_name and node.domain == "brainsmith.kernels"
+            ]
+
+            # For test models with single kernel instance, take the first one
+            assert len(hw_nodes) > 0, (
+                f"Could not find transformed kernel node. "
+                f"Original node: {target_node}, Expected kernel: {kernel_name}"
+            )
+            assert len(hw_nodes) == 1, (
+                f"Found multiple {kernel_name} nodes: {[n.name for n in hw_nodes]}. "
+                f"Cannot determine which corresponds to {target_node}"
+            )
+            onnx_node = hw_nodes[0]
+
         # Wrap with custom op class (pass model for KernelOp initialization)
         op = getHWCustomOp(onnx_node, model)
 
@@ -255,7 +278,22 @@ def _compute_golden_reference(
         Returns:
             Expected outputs from QONNX execution
         """
-        return execute_onnx(quant_model, inputs, return_full_exec_context=False)
+        from qonnx.core.datatype import DataType
+
+        outputs = execute_onnx(quant_model, inputs, return_full_exec_context=False)
+
+        # Post-process BIPOLAR outputs
+        # QONNX MultiThreshold returns {0, 1} but BIPOLAR datatype represents {-1, 1}
+        # Apply the same conversion as hardware Thresholding kernels
+        graph = quant_model.graph
+        for node in graph.node:
+            for output_name in node.output:
+                if output_name in outputs:
+                    output_dtype = quant_model.get_tensor_datatype(output_name)
+                    if output_dtype == DataType["BIPOLAR"]:
+                        outputs[output_name] = 2 * outputs[output_name] - 1
+
+        return outputs
 
     def _build_stage1_model(self, kernel_test_config: "KernelTestConfig") -> ModelWrapper:
         """Build Stage 1 model with QONNX annotations.
diff --git a/tests/frameworks/test_config.py b/tests/frameworks/test_config.py
index 90dc5c71..9b8c96e2 100644
--- a/tests/frameworks/test_config.py
+++ b/tests/frameworks/test_config.py
@@ -27,6 +27,7 @@ class ModelStructure:
         operation: ONNX operation name (e.g., "Add", "MatMul", "Conv")
         input_shapes: Dict mapping input names to shapes
         input_dtypes: Dict mapping input names to DataTypes
+        output_dtypes: Dict mapping output names to DataTypes (optional)
 
     Example:
         model = ModelStructure(
@@ -39,6 +40,8 @@ class ModelStructure:
     operation: str
     input_shapes: dict[str, tuple[int, ...]]
     input_dtypes: dict[str, DataType]
+    output_dtypes: dict[str, DataType] | None = None
+    dimensions: dict[str, any] | None = None  # Extra parameters (e.g., threshold config)
 
     def __post_init__(self):
         """Validate that shapes and dtypes have matching keys."""
diff --git a/tests/integration/test_mem_modes_kernels.py b/tests/integration/test_mem_modes_kernels.py
new file mode 100644
index 00000000..91fd58e6
--- /dev/null
+++ b/tests/integration/test_mem_modes_kernels.py
@@ -0,0 +1,504 @@
+"""Integration tests for mem_modes with Thresholding and ElementwiseBinaryOp kernels.
+
+Tests the full flow:
+- Schema definition with mem_modes
+- Design space building
+- Design point instantiation
+- Interface mem_mode access in kernel implementations
+"""
+
+import numpy as np
+import pytest
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.util.basic import gen_finn_dt_tensor
+
+from brainsmith.dataflow.builder import BuildContext, DesignSpaceBuilder
+from brainsmith.kernels.thresholding.thresholding import (
+    THRESHOLDING_SCHEMA,
+    Thresholding,
+)
+
+
+def _create_param_getter(node):
+    """Create a param_getter function for testing."""
+
+    def param_getter(key):
+        for attr in node.attribute:
+            if attr.name == key:
+                # Return the first value from the attribute
+                if attr.HasField("i"):
+                    return attr.i
+                elif attr.HasField("f"):
+                    return attr.f
+                elif attr.HasField("s"):
+                    return attr.s.decode("utf-8")
+                elif attr.ints:
+                    return list(attr.ints)
+        raise KeyError(f"Attribute {key} not found")
+
+    return param_getter
+
+
+def _create_param_setter():
+    """Create a param_setter function for testing (no-op)."""
+
+    def param_setter(key, value):
+        pass  # No-op for tests
+
+    return param_setter
+
+
+class TestThresholdingMemModes:
+    """Integration tests for Thresholding kernel with mem_modes."""
+
+    def test_thresholding_schema_has_mem_modes(self):
+        """Test that Thresholding schema has mem_modes on thresholds input."""
+        # Find thresholds input (index 1)
+        thresholds_input = THRESHOLDING_SCHEMA.inputs[1]
+        assert thresholds_input.name == "thresholds"
+        assert thresholds_input.mem_modes is not None
+        # mem_modes is now a static frozenset defining capabilities
+        assert thresholds_input.mem_modes == frozenset({"embedded", "decoupled", "dynamic"})
+
+    def test_thresholding_generates_input1_memtype(self):
+        """Test that Thresholding design space has input1MemType parameter."""
+        # Create a simple thresholding model
+        import onnx
+
+        inp = onnx.helper.make_tensor_value_info("inp", onnx.TensorProto.FLOAT, [1, 4])
+        thresh = onnx.helper.make_tensor_value_info("thresh", onnx.TensorProto.FLOAT, [4, 1])
+        out = onnx.helper.make_tensor_value_info("out", onnx.TensorProto.FLOAT, [1, 4])
+
+        # Create threshold initializer
+        threshold_values = np.array([[0.5], [1.0], [1.5], [2.0]], dtype=np.float32)
+        thresh_init = onnx.helper.make_tensor(
+            "thresh", onnx.TensorProto.FLOAT, [4, 1], threshold_values.flatten()
+        )
+
+        node = onnx.helper.make_node(
+            "MultiThreshold",
+            inputs=["inp", "thresh"],
+            outputs=["out"],
+            domain="qonnx.custom_op.general",
+            name="threshold_node",
+        )
+
+        graph = onnx.helper.make_graph(
+            [node], "threshold_graph", [inp, thresh], [out], initializer=[thresh_init]
+        )
+        model = onnx.helper.make_model(graph)
+        model_w = ModelWrapper(model)
+        model_w.set_tensor_datatype("inp", DataType["INT8"])
+        model_w.set_tensor_datatype("thresh", DataType["INT8"])
+        model_w.set_tensor_datatype("out", DataType["INT8"])
+
+        # Build design space
+        node = model_w.graph.node[0]
+        # Mark thresholds as weight (simulating what InferKernel would do)
+        node.attribute.append(onnx.helper.make_attribute("input1MemType", "embedded"))
+        build_ctx = BuildContext(
+            schema=THRESHOLDING_SCHEMA,
+            model_w=model_w,
+            node=node,
+            param_getter=_create_param_getter(node),
+            param_setter=_create_param_setter(),
+        )
+        design_space = DesignSpaceBuilder().build(build_ctx)
+
+        # Verify input1MemType parameter exists
+        assert "input1MemType" in design_space.parameters
+        mem_modes = design_space.parameters["input1MemType"]
+
+        # Should have all modes from static schema
+        assert mem_modes == frozenset({"embedded", "decoupled", "dynamic"})
+
+    def test_thresholding_mlo_forces_dynamic(self):
+        """Test that adapt_for_loop_body() forces input1MemType to dynamic."""
+        import onnx
+        from qonnx.custom_op.registry import getCustomOp
+        from finn.transformation.fpgadataflow.loop_rolling import LoopBodyInputType
+
+        inp = onnx.helper.make_tensor_value_info("inp", onnx.TensorProto.FLOAT, [1, 4])
+        thresh = onnx.helper.make_tensor_value_info("thresh", onnx.TensorProto.FLOAT, [4, 1])
+        out = onnx.helper.make_tensor_value_info("out", onnx.TensorProto.FLOAT, [1, 4])
+
+        threshold_values = np.array([[0.5], [1.0], [1.5], [2.0]], dtype=np.float32)
+        thresh_init = onnx.helper.make_tensor(
+            "thresh", onnx.TensorProto.FLOAT, [4, 1], threshold_values.flatten()
+        )
+
+        # Create Thresholding node (not MultiThreshold)
+        node = onnx.helper.make_node(
+            "Thresholding",
+            inputs=["inp", "thresh"],
+            outputs=["out"],
+            domain="brainsmith.kernels",
+            backend="fpgadataflow",
+            name="threshold_node",
+            num_steps=1,
+            act_val=0,
+            num_input_vectors=[1],
+            runtime_writeable_weights=0,
+            PE=4,
+        )
+
+        # Mark thresholds as weight with initial mode "embedded"
+        node.attribute.append(onnx.helper.make_attribute("input1MemType", "embedded"))
+
+        graph = onnx.helper.make_graph(
+            [node], "threshold_graph", [inp, thresh], [out], initializer=[thresh_init]
+        )
+        model = onnx.helper.make_model(graph)
+        model_w = ModelWrapper(model)
+        model_w.set_tensor_datatype("inp", DataType["INT8"])
+        model_w.set_tensor_datatype("thresh", DataType["INT8"])
+        model_w.set_tensor_datatype("out", DataType["INT8"])
+
+        # Get KernelOp instance and verify initial state
+        node = model_w.graph.node[0]
+        thl_inst = getCustomOp(node)
+        assert thl_inst.get_nodeattr("input1MemType") == "embedded"
+
+        # Call adapt_for_loop_body with MLO signature (thresholds are PARAMETER)
+        loop_signature = [LoopBodyInputType.ACTIVATION, LoopBodyInputType.PARAMETER]
+        thl_inst.adapt_for_loop_body(loop_signature)
+
+        # Should be forced to dynamic
+        assert thl_inst.get_nodeattr("input1MemType") == "dynamic"
+
+    def test_thresholding_mlo_no_change_without_parameter(self):
+        """Test that adapt_for_loop_body() doesn't change mode if not PARAMETER."""
+        import onnx
+        from qonnx.custom_op.registry import getCustomOp
+        from finn.transformation.fpgadataflow.loop_rolling import LoopBodyInputType
+
+        inp = onnx.helper.make_tensor_value_info("inp", onnx.TensorProto.FLOAT, [1, 4])
+        thresh = onnx.helper.make_tensor_value_info("thresh", onnx.TensorProto.FLOAT, [4, 1])
+        out = onnx.helper.make_tensor_value_info("out", onnx.TensorProto.FLOAT, [1, 4])
+
+        threshold_values = np.array([[0.5], [1.0], [1.5], [2.0]], dtype=np.float32)
+        thresh_init = onnx.helper.make_tensor(
+            "thresh", onnx.TensorProto.FLOAT, [4, 1], threshold_values.flatten()
+        )
+
+        node = onnx.helper.make_node(
+            "Thresholding",
+            inputs=["inp", "thresh"],
+            outputs=["out"],
+            domain="brainsmith.kernels",
+            backend="fpgadataflow",
+            name="threshold_node",
+            num_steps=1,
+            act_val=0,
+            num_input_vectors=[1],
+            runtime_writeable_weights=0,
+            PE=4,
+        )
+
+        # Mark thresholds as weight with initial mode "embedded"
+        node.attribute.append(onnx.helper.make_attribute("input1MemType", "embedded"))
+
+        graph = onnx.helper.make_graph(
+            [node], "threshold_graph", [inp, thresh], [out], initializer=[thresh_init]
+        )
+        model = onnx.helper.make_model(graph)
+        model_w = ModelWrapper(model)
+        model_w.set_tensor_datatype("inp", DataType["INT8"])
+        model_w.set_tensor_datatype("thresh", DataType["INT8"])
+        model_w.set_tensor_datatype("out", DataType["INT8"])
+
+        node = model_w.graph.node[0]
+        thl_inst = getCustomOp(node)
+        assert thl_inst.get_nodeattr("input1MemType") == "embedded"
+
+        # Call adapt_for_loop_body with signature where thresholds are CONSTANT (not streamed)
+        loop_signature = [LoopBodyInputType.ACTIVATION, LoopBodyInputType.CONSTANT]
+        thl_inst.adapt_for_loop_body(loop_signature)
+
+        # Should remain embedded (not changed to dynamic)
+        assert thl_inst.get_nodeattr("input1MemType") == "embedded"
+
+    def test_thresholding_interface_mem_mode_accessible(self):
+        """Test that mem_mode is accessible from design point interface."""
+        import onnx
+
+        inp = onnx.helper.make_tensor_value_info("inp", onnx.TensorProto.FLOAT, [1, 4])
+        thresh = onnx.helper.make_tensor_value_info("thresh", onnx.TensorProto.FLOAT, [4, 1])
+        out = onnx.helper.make_tensor_value_info("out", onnx.TensorProto.FLOAT, [1, 4])
+
+        threshold_values = np.array([[0.5], [1.0], [1.5], [2.0]], dtype=np.float32)
+        thresh_init = onnx.helper.make_tensor(
+            "thresh", onnx.TensorProto.FLOAT, [4, 1], threshold_values.flatten()
+        )
+
+        node = onnx.helper.make_node(
+            "MultiThreshold",
+            inputs=["inp", "thresh"],
+            outputs=["out"],
+            domain="qonnx.custom_op.general",
+            name="threshold_node",
+        )
+
+        graph = onnx.helper.make_graph(
+            [node], "threshold_graph", [inp, thresh], [out], initializer=[thresh_init]
+        )
+        model = onnx.helper.make_model(graph)
+        model_w = ModelWrapper(model)
+        model_w.set_tensor_datatype("inp", DataType["INT8"])
+        model_w.set_tensor_datatype("thresh", DataType["INT8"])
+        model_w.set_tensor_datatype("out", DataType["INT8"])
+
+        # Build design space
+        node = model_w.graph.node[0]
+        # Mark thresholds as weight (simulating what InferKernel would do)
+        node.attribute.append(onnx.helper.make_attribute("input1MemType", "embedded"))
+        build_ctx = BuildContext(
+            schema=THRESHOLDING_SCHEMA,
+            model_w=model_w,
+            node=node,
+            param_getter=_create_param_getter(node),
+            param_setter=_create_param_setter(),
+        )
+        design_space = DesignSpaceBuilder().build(build_ctx)
+
+        # Configure with embedded mode
+        design_point = design_space.configure({"PE": 1, "input1MemType": "embedded"})
+
+        # Verify mem_mode is accessible from interface
+        thresholds_iface = design_point.inputs["thresholds"]
+        assert thresholds_iface.mem_mode == "embedded"
+        assert thresholds_iface.is_weight is True
+
+        # Configure with decoupled mode
+        design_point2 = design_space.configure({"PE": 1, "input1MemType": "decoupled"})
+        assert design_point2.inputs["thresholds"].mem_mode == "decoupled"
+
+
+class TestElementwiseBinaryOpMemModes:
+    """Integration tests for ElementwiseBinaryOp kernel with mem_modes."""
+
+    def test_elementwise_schema_has_mem_modes(self):
+        """Test that ElementwiseBinaryOp schema has mem_modes on RHS input."""
+        from brainsmith.kernels.elementwise_binary.elementwise_binary import (
+            ELEMENTWISE_BINARY_SCHEMA,
+        )
+
+        # Find RHS input (index 1)
+        rhs_input = ELEMENTWISE_BINARY_SCHEMA.inputs[1]
+        assert rhs_input.name == "rhs"
+        assert rhs_input.mem_modes is not None
+        # mem_modes is now a static frozenset defining capabilities
+        assert rhs_input.mem_modes == frozenset({"embedded", "decoupled", "dynamic"})
+
+    def test_elementwise_generates_input1_memtype(self):
+        """Test that ElementwiseBinaryOp design space has input1MemType parameter."""
+        from brainsmith.kernels.elementwise_binary.elementwise_binary import (
+            ELEMENTWISE_BINARY_SCHEMA,
+        )
+        import onnx
+
+        # Create simple Add operation with static RHS
+        lhs = onnx.helper.make_tensor_value_info("lhs", onnx.TensorProto.FLOAT, [4])
+        rhs = onnx.helper.make_tensor_value_info("rhs", onnx.TensorProto.FLOAT, [4])
+        out = onnx.helper.make_tensor_value_info("out", onnx.TensorProto.FLOAT, [4])
+
+        # RHS is static (initializer)
+        rhs_data = gen_finn_dt_tensor(DataType["INT8"], [4])
+        rhs_init = onnx.numpy_helper.from_array(rhs_data, name="rhs")
+
+        node = onnx.helper.make_node(
+            "Add",
+            inputs=["lhs", "rhs"],
+            outputs=["out"],
+            name="add_node",
+            func="Add",
+            input_pattern="dynamic_static",
+        )
+
+        graph = onnx.helper.make_graph([node], "add_graph", [lhs, rhs], [out], initializer=[rhs_init])
+        model = onnx.helper.make_model(graph)
+        model_w = ModelWrapper(model)
+        model_w.set_tensor_datatype("lhs", DataType["INT8"])
+        model_w.set_tensor_datatype("rhs", DataType["INT8"])
+        model_w.set_tensor_datatype("out", DataType["INT8"])
+
+        # Build design space
+        node = model_w.graph.node[0]
+        # Mark RHS as weight (simulating what InferKernel would do)
+        node.attribute.append(onnx.helper.make_attribute("input1MemType", "embedded"))
+        build_ctx = BuildContext(
+            schema=ELEMENTWISE_BINARY_SCHEMA,
+            model_w=model_w,
+            node=node,
+            param_getter=_create_param_getter(node),
+            param_setter=_create_param_setter(),
+        )
+        design_space = DesignSpaceBuilder().build(build_ctx)
+
+        # Verify input1MemType parameter exists (RHS is index 1)
+        assert "input1MemType" in design_space.parameters
+        mem_modes = design_space.parameters["input1MemType"]
+        # Should have all modes from static schema
+        assert mem_modes == frozenset({"embedded", "decoupled", "dynamic"})
+
+    def test_elementwise_no_mem_mode_for_dynamic_lhs(self):
+        """Test that LHS (dynamic input) does not have mem_mode parameter."""
+        from brainsmith.kernels.elementwise_binary.elementwise_binary import (
+            ELEMENTWISE_BINARY_SCHEMA,
+        )
+        import onnx
+
+        lhs = onnx.helper.make_tensor_value_info("lhs", onnx.TensorProto.FLOAT, [4])
+        rhs = onnx.helper.make_tensor_value_info("rhs", onnx.TensorProto.FLOAT, [4])
+        out = onnx.helper.make_tensor_value_info("out", onnx.TensorProto.FLOAT, [4])
+
+        rhs_data = gen_finn_dt_tensor(DataType["INT8"], [4])
+        rhs_init = onnx.numpy_helper.from_array(rhs_data, name="rhs")
+
+        node = onnx.helper.make_node(
+            "Add",
+            inputs=["lhs", "rhs"],
+            outputs=["out"],
+            name="add_node",
+            func="Add",
+            input_pattern="dynamic_static",
+        )
+
+        graph = onnx.helper.make_graph([node], "add_graph", [lhs, rhs], [out], initializer=[rhs_init])
+        model = onnx.helper.make_model(graph)
+        model_w = ModelWrapper(model)
+        model_w.set_tensor_datatype("lhs", DataType["INT8"])
+        model_w.set_tensor_datatype("rhs", DataType["INT8"])
+        model_w.set_tensor_datatype("out", DataType["INT8"])
+
+        # Build design space
+        node = model_w.graph.node[0]
+        # Mark RHS as weight (simulating what InferKernel would do)
+        node.attribute.append(onnx.helper.make_attribute("input1MemType", "embedded"))
+        build_ctx = BuildContext(
+            schema=ELEMENTWISE_BINARY_SCHEMA,
+            model_w=model_w,
+            node=node,
+            param_getter=_create_param_getter(node),
+            param_setter=_create_param_setter(),
+        )
+        design_space = DesignSpaceBuilder().build(build_ctx)
+
+        # LHS should NOT have mem_mode parameter (it's dynamic)
+        assert "input0MemType" not in design_space.parameters
+
+        # But RHS should have it (it's a weight)
+        assert "input1MemType" in design_space.parameters
+
+
+class TestChannelwiseOpMemModesIntegration:
+    """Integration tests for ChannelwiseOp kernel with mem_modes."""
+
+    def test_channelwise_interface_mem_mode_accessible(self):
+        """Test that mem_mode is accessible from design point interface."""
+        from brainsmith.kernels.channelwise.channelwise import CHANNELWISE_SCHEMA
+        import onnx
+
+        # Create simple Add operation with static parameters
+        lhs = onnx.helper.make_tensor_value_info("lhs", onnx.TensorProto.FLOAT, [4])
+        params = onnx.helper.make_tensor_value_info("params", onnx.TensorProto.FLOAT, [4])
+        out = onnx.helper.make_tensor_value_info("out", onnx.TensorProto.FLOAT, [4])
+
+        # Parameters are static (initializer)
+        params_data = gen_finn_dt_tensor(DataType["INT8"], [4])
+        params_init = onnx.numpy_helper.from_array(params_data, name="params")
+
+        node = onnx.helper.make_node(
+            "Add",
+            inputs=["lhs", "params"],
+            outputs=["out"],
+            name="add_node",
+            func="Add",
+        )
+
+        graph = onnx.helper.make_graph(
+            [node], "add_graph", [lhs, params], [out], initializer=[params_init]
+        )
+        model = onnx.helper.make_model(graph)
+        model_w = ModelWrapper(model)
+        model_w.set_tensor_datatype("lhs", DataType["INT8"])
+        model_w.set_tensor_datatype("params", DataType["INT8"])
+        model_w.set_tensor_datatype("out", DataType["INT8"])
+
+        # Build design space
+        node = model_w.graph.node[0]
+        # Mark parameters as weight (simulating what InferKernel would do)
+        node.attribute.append(onnx.helper.make_attribute("input1MemType", "embedded"))
+        build_ctx = BuildContext(
+            schema=CHANNELWISE_SCHEMA,
+            model_w=model_w,
+            node=node,
+            param_getter=_create_param_getter(node),
+            param_setter=_create_param_setter(),
+        )
+        design_space = DesignSpaceBuilder().build(build_ctx)
+
+        # Configure with embedded mode
+        design_point = design_space.configure({
+            "PE": 1,
+            "input1MemType": "embedded",
+            "ram_style": "distributed"
+        })
+
+        # Verify mem_mode is accessible from interface
+        params_iface = design_point.inputs["parameters"]
+        assert params_iface.mem_mode == "embedded"
+        assert params_iface.is_weight is True
+
+    def test_channelwise_no_mem_mode_for_dynamic_input(self):
+        """Test that LHS (dynamic input) does not have mem_mode parameter."""
+        from brainsmith.kernels.channelwise.channelwise import CHANNELWISE_SCHEMA
+        import onnx
+
+        lhs = onnx.helper.make_tensor_value_info("lhs", onnx.TensorProto.FLOAT, [4])
+        params = onnx.helper.make_tensor_value_info("params", onnx.TensorProto.FLOAT, [4])
+        out = onnx.helper.make_tensor_value_info("out", onnx.TensorProto.FLOAT, [4])
+
+        params_data = gen_finn_dt_tensor(DataType["INT8"], [4])
+        params_init = onnx.numpy_helper.from_array(params_data, name="params")
+
+        node = onnx.helper.make_node(
+            "Add",
+            inputs=["lhs", "params"],
+            outputs=["out"],
+            name="add_node",
+            func="Add",
+        )
+
+        graph = onnx.helper.make_graph(
+            [node], "add_graph", [lhs, params], [out], initializer=[params_init]
+        )
+        model = onnx.helper.make_model(graph)
+        model_w = ModelWrapper(model)
+        model_w.set_tensor_datatype("lhs", DataType["INT8"])
+        model_w.set_tensor_datatype("params", DataType["INT8"])
+        model_w.set_tensor_datatype("out", DataType["INT8"])
+
+        # Build design space
+        node = model_w.graph.node[0]
+        # Mark parameters as weight (simulating what InferKernel would do)
+        node.attribute.append(onnx.helper.make_attribute("input1MemType", "embedded"))
+        build_ctx = BuildContext(
+            schema=CHANNELWISE_SCHEMA,
+            model_w=model_w,
+            node=node,
+            param_getter=_create_param_getter(node),
+            param_setter=_create_param_setter(),
+        )
+        design_space = DesignSpaceBuilder().build(build_ctx)
+
+        # LHS should NOT have mem_mode parameter (it's dynamic)
+        assert "input0MemType" not in design_space.parameters
+
+        # But parameters should have it (static weight)
+        assert "input1MemType" in design_space.parameters
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/tests/kernels/migration/test_thresholding_parity.py b/tests/kernels/migration/test_thresholding_parity.py
new file mode 100644
index 00000000..ab3e5746
--- /dev/null
+++ b/tests/kernels/migration/test_thresholding_parity.py
@@ -0,0 +1,893 @@
+"""Parity tests for Thresholding kernel (Brainsmith vs FINN).
+
+Compares Brainsmith's schema-driven Thresholding implementation against
+FINN's traditional Thresholding kernel across both HLS and RTL backends.
+
+Test coverage (18 inherited tests × N configurations):
+- Core parity: shapes, datatypes, stream widths
+- HW estimation: cycles, resources, efficiency
+- Golden execution: python/cppsim/rtlsim for both implementations
+
+Note: Brainsmith removed runtime_writeable_weights support, so we skip
+those test cases.
+"""
+
+import pytest
+from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+
+from brainsmith.kernels.thresholding.thresholding import Thresholding
+from tests.fixtures.model_builders import make_multithreshold_model
+from tests.frameworks.kernel_parity_test import KernelParityTest
+from tests.frameworks.test_config import (
+    DesignParameters,
+    KernelTestConfig,
+    ModelStructure,
+    PlatformConfig,
+)
+
+
+class TestThresholdingParity(KernelParityTest):
+    """Test parity between Brainsmith Thresholding and FINN Thresholding.
+
+    Validates that Brainsmith's schema-driven implementation produces
+    identical results to FINN's traditional implementation across:
+    - Multiple quantization configurations (INT8→UINT4, BIPOLAR)
+    - Different parallelization factors (PE=4, PE=16)
+    - Both HLS and RTL backends
+    """
+
+    # ========================================================================
+    # Test Configurations
+    # ========================================================================
+
+    @pytest.fixture(
+        params=[
+            # =================================================================
+            # CATEGORY 1: Output Datatype Edge Cases
+            # =================================================================
+            # UINT2: Minimum threshold count (3 thresholds)
+            KernelTestConfig(
+                test_id="dtype_uint2_min_thresholds",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 16, 16, 64)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    output_dtypes={"out": DataType["UINT2"]},
+                    dimensions={"thresh_shape": (64, 3), "thresh_dtype": "INT8"},
+                ),
+                design=DesignParameters(input_streams={0: 8}),
+                platform=PlatformConfig(fpgapart="xc7z020clg400-1"),
+            ),
+            # UINT4: Standard 4-bit unsigned (15 thresholds)
+            KernelTestConfig(
+                test_id="dtype_uint4_standard",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 28, 28, 128)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    output_dtypes={"out": DataType["UINT4"]},
+                    dimensions={"thresh_shape": (128, 15), "thresh_dtype": "INT8"},
+                ),
+                design=DesignParameters(input_streams={0: 16}),
+                platform=PlatformConfig(fpgapart="xc7z020clg400-1"),
+            ),
+            # UINT8: Maximum threshold count (255 thresholds)
+            KernelTestConfig(
+                test_id="dtype_uint8_max_thresholds",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 8, 8, 64)},
+                    input_dtypes={"inp": DataType["INT16"]},
+                    output_dtypes={"out": DataType["UINT8"]},
+                    dimensions={"thresh_shape": (64, 255), "thresh_dtype": "INT16"},
+                ),
+                design=DesignParameters(input_streams={0: 8}),
+                platform=PlatformConfig(fpgapart="xc7z020clg400-1"),
+            ),
+            # INT4: Signed output (requires ActVal=-8)
+            KernelTestConfig(
+                test_id="dtype_int4_signed_output",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 16, 16, 64)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    output_dtypes={"out": DataType["INT4"]},
+                    dimensions={"thresh_shape": (64, 15), "thresh_dtype": "INT8"},
+                ),
+                design=DesignParameters(input_streams={0: 8}),
+                platform=PlatformConfig(fpgapart="xc7z020clg400-1"),
+            ),
+            # BIPOLAR: Binary classification (-1/+1 output)
+            KernelTestConfig(
+                test_id="dtype_bipolar_binary",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 16, 16, 64)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    output_dtypes={"out": DataType["BIPOLAR"]},
+                    dimensions={"thresh_shape": (64, 1), "thresh_dtype": "INT8"},
+                ),
+                design=DesignParameters(input_streams={0: 8}),
+                platform=PlatformConfig(fpgapart="xc7z020clg400-1"),
+            ),
+            # =================================================================
+            # CATEGORY 2: PE Configuration Edge Cases
+            # =================================================================
+            # PE = 1: Maximum folding (sequential processing)
+            KernelTestConfig(
+                test_id="pe_1_max_folding",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 8, 8, 64)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    output_dtypes={"out": DataType["UINT4"]},
+                    dimensions={"thresh_shape": (64, 15), "thresh_dtype": "INT8"},
+                ),
+                design=DesignParameters(input_streams={0: 1}),
+                platform=PlatformConfig(fpgapart="xc7z020clg400-1"),
+            ),
+            # PE = channels: Full parallelism (unrolls to FFs)
+            KernelTestConfig(
+                test_id="pe_equals_channels_full_parallel",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 8, 8, 32)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    output_dtypes={"out": DataType["UINT4"]},
+                    dimensions={"thresh_shape": (32, 15), "thresh_dtype": "INT8"},
+                ),
+                design=DesignParameters(input_streams={0: 32}),
+                platform=PlatformConfig(fpgapart="xc7z020clg400-1"),
+            ),
+            # PE = 4: Low parallelism
+            KernelTestConfig(
+                test_id="pe_4_low_parallel",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 14, 14, 64)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    output_dtypes={"out": DataType["UINT4"]},
+                    dimensions={"thresh_shape": (64, 15), "thresh_dtype": "INT8"},
+                ),
+                design=DesignParameters(input_streams={0: 4}),
+                platform=PlatformConfig(fpgapart="xc7z020clg400-1"),
+            ),
+            # PE = 32: High parallelism with large channels
+            KernelTestConfig(
+                test_id="pe_32_high_parallel",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 14, 14, 256)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    output_dtypes={"out": DataType["UINT4"]},
+                    dimensions={"thresh_shape": (256, 15), "thresh_dtype": "INT8"},
+                ),
+                design=DesignParameters(input_streams={0: 32}),
+                platform=PlatformConfig(fpgapart="xc7z020clg400-1"),
+            ),
+            # =================================================================
+            # CATEGORY 3: Per-Tensor Quantization (Threshold Broadcasting)
+            # =================================================================
+            # Per-tensor UINT4 with PE=8
+            KernelTestConfig(
+                test_id="pertensor_uint4_pe8",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 16, 16, 64)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    output_dtypes={"out": DataType["UINT4"]},
+                    dimensions={"thresh_shape": (1, 15), "thresh_dtype": "INT8"},
+                ),
+                design=DesignParameters(input_streams={0: 8}),
+                platform=PlatformConfig(fpgapart="xc7z020clg400-1"),
+            ),
+            # Per-tensor with PE=1 (maximum folding + broadcasting)
+            KernelTestConfig(
+                test_id="pertensor_pe1_max_folding",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 8, 8, 32)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    output_dtypes={"out": DataType["UINT4"]},
+                    dimensions={"thresh_shape": (1, 15), "thresh_dtype": "INT8"},
+                ),
+                design=DesignParameters(input_streams={0: 1}),
+                platform=PlatformConfig(fpgapart="xc7z020clg400-1"),
+            ),
+            # Per-tensor with PE=channels (full parallel + broadcasting)
+            KernelTestConfig(
+                test_id="pertensor_pe_equals_channels",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 8, 8, 16)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    output_dtypes={"out": DataType["UINT4"]},
+                    dimensions={"thresh_shape": (1, 15), "thresh_dtype": "INT8"},
+                ),
+                design=DesignParameters(input_streams={0: 16}),
+                platform=PlatformConfig(fpgapart="xc7z020clg400-1"),
+            ),
+            # Per-tensor BIPOLAR
+            KernelTestConfig(
+                test_id="pertensor_bipolar",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 8, 8, 32)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    output_dtypes={"out": DataType["BIPOLAR"]},
+                    dimensions={"thresh_shape": (1, 1), "thresh_dtype": "INT8"},
+                ),
+                design=DesignParameters(input_streams={0: 8}),
+                platform=PlatformConfig(fpgapart="xc7z020clg400-1"),
+            ),
+            # Per-tensor with large channel count (BERT-like)
+            # NOTE: 3D inputs not supported by QONNX MultiThreshold.execute_node()
+            # Uncomment when QONNX adds 3D support
+            # KernelTestConfig(
+            #     test_id="pertensor_large_channels_bert",
+            #     model=ModelStructure(
+            #         operation="MultiThreshold",
+            #         input_shapes={"inp": (1, 32, 128)},
+            #         input_dtypes={"inp": DataType["INT8"]},
+            #         output_dtypes={"out": DataType["UINT4"]},
+            #         dimensions={"thresh_shape": (1, 15), "thresh_dtype": "INT8"},
+            #     ),
+            #     design=DesignParameters(input_streams={0: 16}),
+            #     platform=PlatformConfig(fpgapart="xc7z020clg400-1"),
+            # ),
+            # Per-tensor UINT2 (minimum thresholds + broadcasting)
+            KernelTestConfig(
+                test_id="pertensor_uint2_min",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 8, 8, 64)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    output_dtypes={"out": DataType["UINT2"]},
+                    dimensions={"thresh_shape": (1, 3), "thresh_dtype": "INT8"},
+                ),
+                design=DesignParameters(input_streams={0: 8}),
+                platform=PlatformConfig(fpgapart="xc7z020clg400-1"),
+            ),
+            # =================================================================
+            # CATEGORY 4: Input Dimension Variations
+            # =================================================================
+            # 2D input: FC layer output (batch, features)
+            KernelTestConfig(
+                test_id="dim_2d_fc_like",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 128)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    output_dtypes={"out": DataType["UINT4"]},
+                    dimensions={"thresh_shape": (128, 15), "thresh_dtype": "INT8"},
+                ),
+                design=DesignParameters(input_streams={0: 16}),
+                platform=PlatformConfig(fpgapart="xc7z020clg400-1"),
+            ),
+            # 3D input: Sequence model (batch, seq, features)
+            # NOTE: 3D inputs not supported by QONNX MultiThreshold.execute_node()
+            # Uncomment when QONNX adds 3D support
+            # KernelTestConfig(
+            #     test_id="dim_3d_sequence",
+            #     model=ModelStructure(
+            #         operation="MultiThreshold",
+            #         input_shapes={"inp": (1, 64, 128)},
+            #         input_dtypes={"inp": DataType["INT8"]},
+            #         output_dtypes={"out": DataType["UINT4"]},
+            #         dimensions={"thresh_shape": (128, 15), "thresh_dtype": "INT8"},
+            #     ),
+            #     design=DesignParameters(input_streams={0: 16}),
+            #     platform=PlatformConfig(fpgapart="xc7z020clg400-1"),
+            # ),
+            # 4D non-square spatial dimensions
+            KernelTestConfig(
+                test_id="dim_4d_nonsquare",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 28, 14, 128)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    output_dtypes={"out": DataType["UINT4"]},
+                    dimensions={"thresh_shape": (128, 15), "thresh_dtype": "INT8"},
+                ),
+                design=DesignParameters(input_streams={0: 16}),
+                platform=PlatformConfig(fpgapart="xc7z020clg400-1"),
+            ),
+            # 4D small spatial (edge case)
+            KernelTestConfig(
+                test_id="dim_4d_small_spatial",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 1, 1, 64)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    output_dtypes={"out": DataType["UINT4"]},
+                    dimensions={"thresh_shape": (64, 15), "thresh_dtype": "INT8"},
+                ),
+                design=DesignParameters(input_streams={0: 8}),
+                platform=PlatformConfig(fpgapart="xc7z020clg400-1"),
+            ),
+            # =================================================================
+            # CATEGORY 5: Input Datatype Variations
+            # =================================================================
+            # UINT8 input (unsigned, non-negative thresholds)
+            KernelTestConfig(
+                test_id="input_uint8_unsigned",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 16, 16, 64)},
+                    input_dtypes={"inp": DataType["UINT8"]},
+                    output_dtypes={"out": DataType["UINT4"]},
+                    dimensions={"thresh_shape": (64, 15), "thresh_dtype": "UINT8"},
+                ),
+                design=DesignParameters(input_streams={0: 8}),
+                platform=PlatformConfig(fpgapart="xc7z020clg400-1"),
+            ),
+            # INT16 input (wider datapath)
+            KernelTestConfig(
+                test_id="input_int16_wide",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 8, 8, 64)},
+                    input_dtypes={"inp": DataType["INT16"]},
+                    output_dtypes={"out": DataType["UINT4"]},
+                    dimensions={"thresh_shape": (64, 15), "thresh_dtype": "INT16"},
+                ),
+                design=DesignParameters(input_streams={0: 8}),
+                platform=PlatformConfig(fpgapart="xc7z020clg400-1"),
+            ),
+            # =================================================================
+            # CATEGORY 6: Narrow Range Quantization
+            # =================================================================
+            # Narrow UINT4: 14 thresholds instead of 15
+            KernelTestConfig(
+                test_id="narrow_uint4_14_thresholds",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 16, 16, 64)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    output_dtypes={"out": DataType["UINT4"]},
+                    dimensions={"thresh_shape": (64, 14), "thresh_dtype": "INT8"},
+                ),
+                design=DesignParameters(input_streams={0: 8}),
+                platform=PlatformConfig(fpgapart="xc7z020clg400-1"),
+            ),
+            # Narrow per-tensor (broadcasting + narrow range)
+            KernelTestConfig(
+                test_id="narrow_pertensor_14_thresholds",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 8, 8, 32)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    output_dtypes={"out": DataType["UINT4"]},
+                    dimensions={"thresh_shape": (1, 14), "thresh_dtype": "INT8"},
+                ),
+                design=DesignParameters(input_streams={0: 8}),
+                platform=PlatformConfig(fpgapart="xc7z020clg400-1"),
+            ),
+        ]
+    )
+    def kernel_test_config(self, request):
+        """Provide test configurations for Thresholding parity tests."""
+        return request.param
+
+    # ========================================================================
+    # Required Abstract Methods - Primary Implementation (Brainsmith)
+    # ========================================================================
+
+    def get_kernel_op(self):
+        """Return Brainsmith Thresholding class for primary implementation."""
+        return Thresholding
+
+    # ========================================================================
+    # Required Abstract Methods - Reference Implementation (FINN)
+    # ========================================================================
+
+    def infer_kernel_reference(
+        self,
+        model: ModelWrapper,
+        target_node: str,
+    ) -> tuple[HWCustomOp, ModelWrapper]:
+        """Infer reference kernel using FINN InferThresholdingLayer.
+
+        Applies FINN's transformation pipeline:
+        1. InferThresholdingLayer: MultiThreshold → Thresholding
+        2. Dtype optimization transforms (match Brainsmith's VALUE_OPTIMIZED)
+        3. Find and return Thresholding node
+
+        Args:
+            model: Stage 1 model (ONNX with annotations)
+            target_node: Target node name (unused - FINN doesn't preserve names)
+
+        Returns:
+            (op, model): FINN Thresholding kernel and transformed model
+        """
+        from finn.transformation.fpgadataflow.convert_to_hw_layers import InferThresholdingLayer
+        from finn.transformation.fpgadataflow.minimize_accumulator_width import (
+            MinimizeAccumulatorWidth,
+        )
+        from finn.transformation.fpgadataflow.minimize_weight_bit_width import (
+            MinimizeWeightBitWidth,
+        )
+        from finn.transformation.streamline.round_thresholds import RoundAndClipThresholds
+        from qonnx.custom_op.registry import getCustomOp
+        from qonnx.transformation.infer_datatypes import InferDataTypes
+
+        # Apply FINN transformation pipeline
+        model = model.transform(InferThresholdingLayer())
+
+        # Apply dtype optimizations to match Brainsmith's VALUE_OPTIMIZED behavior
+        model = model.transform(MinimizeWeightBitWidth())
+        model = model.transform(MinimizeAccumulatorWidth())
+        model = model.transform(RoundAndClipThresholds())
+        model = model.transform(InferDataTypes())
+
+        # FINN doesn't preserve node names during transformation
+        # Find Thresholding node by op_type
+        nodes_by_op_type = model.get_nodes_by_op_type("Thresholding")
+        assert len(nodes_by_op_type) == 1, (
+            f"Expected 1 Thresholding node after InferThresholdingLayer, "
+            f"found {len(nodes_by_op_type)}"
+        )
+
+        onnx_node = nodes_by_op_type[0]
+        op = getCustomOp(onnx_node)
+
+        return op, model
+
+    def get_backend_variants_reference(self) -> list[type]:
+        """Return FINN backend variants (HLS and RTL).
+
+        Returns:
+            List containing FINN's Thresholding_hls backend class
+        """
+        from finn.custom_op.fpgadataflow.hls.thresholding_hls import Thresholding_hls
+
+        # Note: Could also test RTL backend:
+        # from finn.custom_op.fpgadataflow.rtl.thresholding_rtl import Thresholding_rtl
+        # return [Thresholding_rtl]
+
+        return [Thresholding_hls]
+
+    # ========================================================================
+    # Required Abstract Methods - Validation Counts
+    # ========================================================================
+
+    def get_num_inputs(self) -> int:
+        """Thresholding has 1 dynamic input (data), thresholds are static."""
+        return 1
+
+    def get_num_outputs(self) -> int:
+        """Thresholding has 1 output."""
+        return 1
+
+    # ========================================================================
+    # Model Builder
+    # ========================================================================
+
+    def make_test_model(
+        self,
+        kernel_test_config: KernelTestConfig,
+    ) -> tuple[ModelWrapper, list[str]]:
+        """Create ONNX model with MultiThreshold node.
+
+        Uses make_multithreshold_model() helper to generate a properly
+        configured MultiThreshold node with evenly-spaced threshold values.
+
+        Args:
+            kernel_test_config: Test configuration with shapes/dtypes
+
+        Returns:
+            (model, input_names): ONNX model and list of input tensor names
+        """
+        model_struct = kernel_test_config.model
+
+        # Extract configuration
+        inp_shape = model_struct.input_shapes["inp"]
+
+        # Threshold config comes from dimensions (static weight, not dynamic input)
+        thresh_shape = model_struct.dimensions.get("thresh_shape", (inp_shape[-1], 15))
+
+        inp_dtype_str = model_struct.input_dtypes["inp"].name
+        thresh_dtype_str = model_struct.dimensions.get("thresh_dtype", "INT8")
+
+        # Get output dtype from model structure (check both output_dtypes and dimensions)
+        if model_struct.output_dtypes and "out" in model_struct.output_dtypes:
+            out_dtype = model_struct.output_dtypes["out"]
+            out_dtype_str = out_dtype.name
+        elif model_struct.dimensions and "output_dtype" in model_struct.dimensions:
+            out_dtype_str = model_struct.dimensions["output_dtype"]
+            out_dtype = DataType[out_dtype_str]
+        else:
+            out_dtype_str = "UINT4"
+            out_dtype = DataType[out_dtype_str]
+
+        # Compute num_thresholds from thresh_shape
+        num_channels = thresh_shape[0]
+        num_thresholds = thresh_shape[1]
+
+        # Determine out_bias (ActVal) based on output datatype
+        # For signed outputs (except BIPOLAR), out_bias should be negative
+        if out_dtype != DataType["BIPOLAR"] and out_dtype.signed():
+            # Signed output: ActVal should be negative
+            # For UINT4 (15 thresholds), ActVal = 0
+            # For INT4 (15 thresholds), ActVal = -8
+            out_bias = -(2 ** (out_dtype.bitwidth() - 1))
+        else:
+            # Unsigned or BIPOLAR: ActVal = 0
+            out_bias = 0
+
+        # Create MultiThreshold model
+        model, node = make_multithreshold_model(
+            shape=list(inp_shape),
+            input_dtype=inp_dtype_str,
+            threshold_dtype=thresh_dtype_str,
+            output_dtype=out_dtype_str,
+            num_thresholds=num_thresholds,
+            out_scale=1.0,
+            out_bias=out_bias,
+        )
+
+        # Return model and input names
+        # Only dynamic input "inp" - thresholds are static initializer
+        input_names = [node.input[0]]  # ["inp"] only
+
+        return model, input_names
+
+
+# =============================================================================
+# RTL Backend Parity Tests
+# =============================================================================
+
+
+class TestThresholdingParityRTL(TestThresholdingParity):
+    """Test parity for Thresholding RTL backend.
+
+    Inherits all test configurations from TestThresholdingParity but uses
+    RTL backend variants instead of HLS.
+    """
+
+    def get_backend_variants(self) -> list[type]:
+        """Return Brainsmith RTL backend variant."""
+        from brainsmith.kernels.thresholding.thresholding_rtl import Thresholding_rtl
+
+        return [Thresholding_rtl]
+
+    def get_backend_variants_reference(self) -> list[type]:
+        """Return FINN RTL backend variant."""
+        from finn.custom_op.fpgadataflow.rtl.thresholding_rtl import Thresholding_rtl
+
+        return [Thresholding_rtl]
+
+    @pytest.fixture(
+        params=[
+            # =================================================================
+            # CATEGORY 1: Output Datatype Edge Cases (RTL)
+            # =================================================================
+            # UINT2: Minimum threshold count
+            KernelTestConfig(
+                test_id="rtl_dtype_uint2_min",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 16, 16, 64)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    dimensions={
+                        "thresh_shape": (64, 3),
+                        "thresh_dtype": "INT8",
+                        "output_dtype": "UINT2"
+                    },
+                ),
+                design=DesignParameters(input_streams={0: 8}),
+                platform=PlatformConfig(fpgapart="xczu3eg-sbva484-1-e"),
+            ),
+            # UINT4: Standard
+            KernelTestConfig(
+                test_id="rtl_dtype_uint4_standard",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 28, 28, 128)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    dimensions={
+                        "thresh_shape": (128, 15),
+                        "thresh_dtype": "INT8",
+                        "output_dtype": "UINT4"
+                    },
+                ),
+                design=DesignParameters(input_streams={0: 16}),
+                platform=PlatformConfig(fpgapart="xczu3eg-sbva484-1-e"),
+            ),
+            # UINT8: Maximum thresholds
+            KernelTestConfig(
+                test_id="rtl_dtype_uint8_max",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 8, 8, 64)},
+                    input_dtypes={"inp": DataType["INT16"]},
+                    dimensions={
+                        "thresh_shape": (64, 255),
+                        "thresh_dtype": "INT16",
+                        "output_dtype": "UINT8"
+                    },
+                ),
+                design=DesignParameters(input_streams={0: 8}),
+                platform=PlatformConfig(fpgapart="xczu3eg-sbva484-1-e"),
+            ),
+            # INT4: Signed output
+            KernelTestConfig(
+                test_id="rtl_dtype_int4_signed",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 16, 16, 64)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    dimensions={
+                        "thresh_shape": (64, 15),
+                        "thresh_dtype": "INT8",
+                        "output_dtype": "INT4"
+                    },
+                ),
+                design=DesignParameters(input_streams={0: 8}),
+                platform=PlatformConfig(fpgapart="xczu3eg-sbva484-1-e"),
+            ),
+            # BIPOLAR
+            KernelTestConfig(
+                test_id="rtl_dtype_bipolar",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 16, 16, 64)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    dimensions={
+                        "thresh_shape": (64, 1),
+                        "thresh_dtype": "INT8",
+                        "output_dtype": "BIPOLAR"
+                    },
+                ),
+                design=DesignParameters(input_streams={0: 8}),
+                platform=PlatformConfig(fpgapart="xczu3eg-sbva484-1-e"),
+            ),
+            # =================================================================
+            # CATEGORY 2: PE Configuration Edge Cases (RTL)
+            # =================================================================
+            # PE = 1: Maximum folding
+            KernelTestConfig(
+                test_id="rtl_pe_1_max_folding",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 8, 8, 64)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    dimensions={
+                        "thresh_shape": (64, 15),
+                        "thresh_dtype": "INT8",
+                        "output_dtype": "UINT4"
+                    },
+                ),
+                design=DesignParameters(input_streams={0: 1}),
+                platform=PlatformConfig(fpgapart="xczu3eg-sbva484-1-e"),
+            ),
+            # PE = channels: Full parallelism
+            KernelTestConfig(
+                test_id="rtl_pe_equals_channels",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 8, 8, 32)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    dimensions={
+                        "thresh_shape": (32, 15),
+                        "thresh_dtype": "INT8",
+                        "output_dtype": "UINT4"
+                    },
+                ),
+                design=DesignParameters(input_streams={0: 32}),
+                platform=PlatformConfig(fpgapart="xczu3eg-sbva484-1-e"),
+            ),
+            # PE = 32: High parallelism
+            KernelTestConfig(
+                test_id="rtl_pe_32_high_parallel",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 14, 14, 256)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    dimensions={
+                        "thresh_shape": (256, 15),
+                        "thresh_dtype": "INT8",
+                        "output_dtype": "UINT4"
+                    },
+                ),
+                design=DesignParameters(input_streams={0: 32}),
+                platform=PlatformConfig(fpgapart="xczu3eg-sbva484-1-e"),
+            ),
+            # =================================================================
+            # CATEGORY 3: Per-Tensor Quantization (RTL)
+            # =================================================================
+            # Per-tensor UINT4 with PE=8
+            KernelTestConfig(
+                test_id="rtl_pertensor_uint4_pe8",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 16, 16, 64)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    dimensions={
+                        "thresh_shape": (1, 15),
+                        "thresh_dtype": "INT8",
+                        "output_dtype": "UINT4"
+                    },
+                ),
+                design=DesignParameters(input_streams={0: 8}),
+                platform=PlatformConfig(fpgapart="xczu3eg-sbva484-1-e"),
+            ),
+            # Per-tensor with PE=1 (max folding + broadcasting)
+            KernelTestConfig(
+                test_id="rtl_pertensor_pe1",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 8, 8, 32)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    dimensions={
+                        "thresh_shape": (1, 15),
+                        "thresh_dtype": "INT8",
+                        "output_dtype": "UINT4"
+                    },
+                ),
+                design=DesignParameters(input_streams={0: 1}),
+                platform=PlatformConfig(fpgapart="xczu3eg-sbva484-1-e"),
+            ),
+            # Per-tensor with PE=channels (full parallel + broadcasting)
+            KernelTestConfig(
+                test_id="rtl_pertensor_pe_equals_channels",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 8, 8, 16)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    dimensions={
+                        "thresh_shape": (1, 15),
+                        "thresh_dtype": "INT8",
+                        "output_dtype": "UINT4"
+                    },
+                ),
+                design=DesignParameters(input_streams={0: 16}),
+                platform=PlatformConfig(fpgapart="xczu3eg-sbva484-1-e"),
+            ),
+            # Per-tensor BIPOLAR
+            KernelTestConfig(
+                test_id="rtl_pertensor_bipolar",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 8, 8, 32)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    dimensions={
+                        "thresh_shape": (1, 1),
+                        "thresh_dtype": "INT8",
+                        "output_dtype": "BIPOLAR"
+                    },
+                ),
+                design=DesignParameters(input_streams={0: 8}),
+                platform=PlatformConfig(fpgapart="xczu3eg-sbva484-1-e"),
+            ),
+            # Per-tensor with large channels (BERT-like)
+            # NOTE: 3D inputs not supported by QONNX MultiThreshold.execute_node()
+            # KernelTestConfig(
+            #     test_id="rtl_pertensor_bert_large",
+            #     model=ModelStructure(
+            #         operation="MultiThreshold",
+            #         input_shapes={"inp": (1, 32, 128)},
+            #         input_dtypes={"inp": DataType["INT8"]},
+            #         dimensions={
+            #             "thresh_shape": (1, 15),
+            #             "thresh_dtype": "INT8",
+            #             "output_dtype": "UINT4"
+            #         },
+            #     ),
+            #     design=DesignParameters(input_streams={0: 16}),
+            #     platform=PlatformConfig(fpgapart="xczu3eg-sbva484-1-e"),
+            # ),
+            # Per-tensor UINT2 (minimum thresholds + broadcasting)
+            KernelTestConfig(
+                test_id="rtl_pertensor_uint2",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 8, 8, 64)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    dimensions={
+                        "thresh_shape": (1, 3),
+                        "thresh_dtype": "INT8",
+                        "output_dtype": "UINT2"
+                    },
+                ),
+                design=DesignParameters(input_streams={0: 8}),
+                platform=PlatformConfig(fpgapart="xczu3eg-sbva484-1-e"),
+            ),
+            # =================================================================
+            # CATEGORY 4: Input Dimension Variations (RTL)
+            # =================================================================
+            # 2D input: FC layer
+            KernelTestConfig(
+                test_id="rtl_dim_2d_fc",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 128)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    dimensions={
+                        "thresh_shape": (128, 15),
+                        "thresh_dtype": "INT8",
+                        "output_dtype": "UINT4"
+                    },
+                ),
+                design=DesignParameters(input_streams={0: 16}),
+                platform=PlatformConfig(fpgapart="xczu3eg-sbva484-1-e"),
+            ),
+            # 3D input: Sequence model
+            # NOTE: 3D inputs not supported by QONNX MultiThreshold.execute_node()
+            # KernelTestConfig(
+            #     test_id="rtl_dim_3d_sequence",
+            #     model=ModelStructure(
+            #         operation="MultiThreshold",
+            #         input_shapes={"inp": (1, 64, 128)},
+            #         input_dtypes={"inp": DataType["INT8"]},
+            #         dimensions={
+            #             "thresh_shape": (128, 15),
+            #             "thresh_dtype": "INT8",
+            #             "output_dtype": "UINT4"
+            #         },
+            #     ),
+            #     design=DesignParameters(input_streams={0: 16}),
+            #     platform=PlatformConfig(fpgapart="xczu3eg-sbva484-1-e"),
+            # ),
+            # 4D non-square
+            KernelTestConfig(
+                test_id="rtl_dim_4d_nonsquare",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 28, 14, 128)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    dimensions={
+                        "thresh_shape": (128, 15),
+                        "thresh_dtype": "INT8",
+                        "output_dtype": "UINT4"
+                    },
+                ),
+                design=DesignParameters(input_streams={0: 16}),
+                platform=PlatformConfig(fpgapart="xczu3eg-sbva484-1-e"),
+            ),
+            # =================================================================
+            # CATEGORY 5: Narrow Range Quantization (RTL)
+            # =================================================================
+            # Narrow UINT4: 14 thresholds
+            KernelTestConfig(
+                test_id="rtl_narrow_uint4",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 16, 16, 64)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    dimensions={
+                        "thresh_shape": (64, 14),
+                        "thresh_dtype": "INT8",
+                        "output_dtype": "UINT4"
+                    },
+                ),
+                design=DesignParameters(input_streams={0: 8}),
+                platform=PlatformConfig(fpgapart="xczu3eg-sbva484-1-e"),
+            ),
+            # Narrow per-tensor (broadcasting + narrow range)
+            KernelTestConfig(
+                test_id="rtl_narrow_pertensor",
+                model=ModelStructure(
+                    operation="MultiThreshold",
+                    input_shapes={"inp": (1, 8, 8, 32)},
+                    input_dtypes={"inp": DataType["INT8"]},
+                    dimensions={
+                        "thresh_shape": (1, 14),
+                        "thresh_dtype": "INT8",
+                        "output_dtype": "UINT4"
+                    },
+                ),
+                design=DesignParameters(input_streams={0: 8}),
+                platform=PlatformConfig(fpgapart="xczu3eg-sbva484-1-e"),
+            ),
+        ]
+    )
+    def kernel_test_config(self, request):
+        """Provide RTL-specific test configurations."""
+        return request.param
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v", "-m", "parity"])
diff --git a/tests/support/backend_utils.py b/tests/support/backend_utils.py
index 427e25db..83d682b4 100644
--- a/tests/support/backend_utils.py
+++ b/tests/support/backend_utils.py
@@ -129,8 +129,14 @@ def specialize_to_backend(
             backend_names.append(backend_class_name)
             continue
 
-        # Try to find backend in registry
-        # Try common sources (brainsmith, finn, project)
+        # Use __registry_name__ if available (handles custom names from @backend decorator)
+        # Registry attaches this attribute for O(1 reverse lookup
+        if hasattr(backend_cls, "__registry_name__"):
+            backend_names.append(backend_cls.__registry_name__)
+            continue
+
+        # Fallback: Try to find backend in registry using class name
+        # This handles backends not yet registered or loaded dynamically
         found = False
         for source in ["brainsmith", "finn", "project"]:
             candidate_name = f"{source}:{backend_class_name}"
diff --git a/tests/unit/test_mem_modes.py b/tests/unit/test_mem_modes.py
new file mode 100644
index 00000000..2e7f3261
--- /dev/null
+++ b/tests/unit/test_mem_modes.py
@@ -0,0 +1,518 @@
+"""Unit tests for mem_modes interface-level DSE parameter system.
+
+Tests cover:
+- Schema validation of mem_modes
+- Builder generation of input<idx>MemType parameters
+- Callable mem_modes for MLO filtering
+- InterfaceDesignPoint.mem_mode population
+- Integration with Thresholding and ElementwiseBinaryOp
+"""
+
+import numpy as np
+import onnx
+import pytest
+from qonnx.core.datatype import DataType
+from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.util.basic import gen_finn_dt_tensor
+
+import brainsmith.dataflow as df
+from brainsmith.dataflow.builder import BuildContext, DesignSpaceBuilder
+from brainsmith.dataflow.schemas import InputSchema, KernelSchema, OutputSchema
+from brainsmith.dataflow.types import FULL_DIM
+
+
+def _create_param_getter(node):
+    """Create a param_getter function for testing."""
+
+    def param_getter(key):
+        for attr in node.attribute:
+            if attr.name == key:
+                # Return the first value from the attribute
+                if attr.HasField("i"):
+                    return attr.i
+                elif attr.HasField("f"):
+                    return attr.f
+                elif attr.HasField("s"):
+                    return attr.s.decode("utf-8")
+                elif attr.ints:
+                    return list(attr.ints)
+        raise KeyError(f"Attribute {key} not found")
+
+    return param_getter
+
+
+def _create_param_setter():
+    """Create a param_setter function for testing (no-op)."""
+
+    def param_setter(key, value):
+        pass  # No-op for tests
+
+    return param_setter
+
+
+class TestMemModesSchemaValidation:
+    """Test InputSchema validation of mem_modes."""
+
+    def test_valid_mem_modes_frozenset(self):
+        """Test that valid mem_modes frozenset is accepted."""
+        schema = InputSchema(
+            name="test_input",
+            block_tiling=[FULL_DIM],
+            stream_tiling=[],
+            mem_modes=frozenset({"embedded", "decoupled", "dynamic"}),
+        )
+        assert schema.mem_modes == frozenset({"embedded", "decoupled", "dynamic"})
+
+    def test_valid_mem_modes_callable(self):
+        """Test that callable mem_modes is accepted."""
+
+        def compute_modes(ctx):
+            return frozenset({"embedded"})
+
+        schema = InputSchema(
+            name="test_input",
+            block_tiling=[FULL_DIM],
+            stream_tiling=[],
+            mem_modes=compute_modes,
+        )
+        assert callable(schema.mem_modes)
+
+    def test_invalid_mem_modes_type(self):
+        """Test that non-frozenset/callable mem_modes raises TypeError."""
+        with pytest.raises(TypeError, match="must be frozenset or callable"):
+            InputSchema(
+                name="test_input",
+                block_tiling=[FULL_DIM],
+                stream_tiling=[],
+                mem_modes={"embedded", "decoupled"},  # set, not frozenset
+            )
+
+    def test_invalid_mem_mode_values(self):
+        """Test that invalid mode names raise ValueError."""
+        with pytest.raises(ValueError, match="Invalid mem_modes"):
+            InputSchema(
+                name="test_input",
+                block_tiling=[FULL_DIM],
+                stream_tiling=[],
+                mem_modes=frozenset({"embedded", "invalid_mode"}),
+            )
+
+    def test_mem_modes_none_is_valid(self):
+        """Test that mem_modes=None (non-weight input) is valid."""
+        schema = InputSchema(
+            name="test_input",
+            block_tiling=[FULL_DIM],
+            stream_tiling=["PE"],
+            mem_modes=None,  # Dynamic input, not a weight
+        )
+        assert schema.mem_modes is None
+
+
+class TestBuilderParameterGeneration:
+    """Test that builder generates input<idx>MemType parameters from mem_modes."""
+
+    def test_generates_input0_memtype_parameter(self):
+        """Test that mem_modes on input0 generates input0MemType parameter."""
+        # Create minimal schema with mem_modes on first input
+        schema = KernelSchema(
+            name="TestKernel",
+            inputs=[
+                InputSchema(
+                    name="weights",
+                    block_tiling=[],
+                    stream_tiling=[],
+                    mem_modes=frozenset({"embedded", "decoupled"}),
+                ),
+            ],
+            outputs=[
+                OutputSchema(
+                    name="output",
+                    block_tiling=[FULL_DIM],
+                    stream_tiling=[],
+                ),
+            ],
+            kernel_params={},
+            dse_parameters={},
+        )
+
+        # Create minimal ONNX model
+        import onnx
+
+        inp = onnx.helper.make_tensor_value_info("weights", onnx.TensorProto.FLOAT, [4])
+        out = onnx.helper.make_tensor_value_info("output", onnx.TensorProto.FLOAT, [4])
+        weight_data = gen_finn_dt_tensor(DataType["INT8"], [4])
+        weight_init = onnx.numpy_helper.from_array(weight_data, name="weights")
+        node = onnx.helper.make_node("TestOp", inputs=["weights"], outputs=["output"], name="test_node")
+        graph = onnx.helper.make_graph(
+            [node], "test_graph", [inp], [out], initializer=[weight_init]
+        )
+        model = onnx.helper.make_model(graph)
+        model_w = ModelWrapper(model)
+
+        # Build design space
+        node = model_w.graph.node[0]
+        # Mark weights as weight (simulating what InferKernel would do)
+        node.attribute.append(onnx.helper.make_attribute("input0MemType", "embedded"))
+        build_ctx = BuildContext(
+            schema=schema,
+            model_w=model_w,
+            node=node,
+            param_getter=_create_param_getter(node),
+            param_setter=_create_param_setter(),
+        )
+        design_space = DesignSpaceBuilder().build(build_ctx)
+
+        # Verify input0MemType parameter exists
+        assert "input0MemType" in design_space.parameters
+        assert design_space.parameters["input0MemType"] == frozenset({"embedded", "decoupled"})
+
+    def test_generates_input1_memtype_parameter(self):
+        """Test that mem_modes on input1 generates input1MemType parameter."""
+        schema = KernelSchema(
+            name="TestKernel",
+            inputs=[
+                InputSchema(
+                    name="data",
+                    block_tiling=[FULL_DIM],
+                    stream_tiling=["PE"],
+                    mem_modes=None,  # Dynamic input
+                ),
+                InputSchema(
+                    name="thresholds",
+                    block_tiling=[],
+                    stream_tiling=[],
+                    mem_modes=frozenset({"embedded", "decoupled", "dynamic"}),
+                ),
+            ],
+            outputs=[
+                OutputSchema(
+                    name="output",
+                    block_tiling=[FULL_DIM],
+                    stream_tiling=["PE"],
+                ),
+            ],
+            kernel_params={},
+            dse_parameters={"PE": df.ParameterSpec(name="PE", values=[1, 2, 4], default=1)},
+        )
+
+        # Create ONNX model with initializer for thresholds
+        import onnx
+
+        inp1 = onnx.helper.make_tensor_value_info("data", onnx.TensorProto.FLOAT, [16])
+        inp2 = onnx.helper.make_tensor_value_info("thresholds", onnx.TensorProto.FLOAT, [16])
+        out = onnx.helper.make_tensor_value_info("output", onnx.TensorProto.FLOAT, [16])
+        threshold_data = gen_finn_dt_tensor(DataType["INT8"], [16])
+        threshold_init = onnx.numpy_helper.from_array(threshold_data, name="thresholds")
+        node = onnx.helper.make_node(
+            "TestOp", inputs=["data", "thresholds"], outputs=["output"], name="test_node"
+        )
+        graph = onnx.helper.make_graph(
+            [node], "test_graph", [inp1, inp2], [out], initializer=[threshold_init]
+        )
+        model = onnx.helper.make_model(graph)
+        model_w = ModelWrapper(model)
+
+        # Build design space
+        node = model_w.graph.node[0]
+        # Mark thresholds as weight (simulating what InferKernel would do)
+        node.attribute.append(onnx.helper.make_attribute("input1MemType", "embedded"))
+        build_ctx = BuildContext(
+            schema=schema,
+            model_w=model_w,
+            node=node,
+            param_getter=_create_param_getter(node),
+            param_setter=_create_param_setter(),
+        )
+        design_space = DesignSpaceBuilder().build(build_ctx)
+
+        # Verify input1MemType parameter exists (thresholds is index 1)
+        assert "input1MemType" in design_space.parameters
+        assert design_space.parameters["input1MemType"] == frozenset(
+            {"embedded", "decoupled", "dynamic"}
+        )
+
+        # Verify input0 does NOT have mem_mode parameter (dynamic input)
+        assert "input0MemType" not in design_space.parameters
+
+
+class TestCallableMemModes:
+    """Test callable mem_modes for context-aware filtering."""
+
+    def test_callable_mlo_filtering(self):
+        """Test that callable filters to dynamic mode when mlo_max_iter > 1."""
+
+        def compute_modes(ctx: BuildContext) -> frozenset[str]:
+            """Filter modes based on MLO context."""
+            try:
+                mlo_max_iter = ctx.param_getter("mlo_max_iter")
+                if mlo_max_iter and mlo_max_iter > 1:
+                    return frozenset({"dynamic"})  # MLO forces streaming
+            except (AttributeError, KeyError):
+                pass
+            return frozenset({"embedded", "decoupled"})
+
+        schema = KernelSchema(
+            name="TestKernel",
+            inputs=[
+                InputSchema(
+                    name="weights",
+                    block_tiling=[],
+                    stream_tiling=[],
+                    mem_modes=compute_modes,  # Callable
+                ),
+            ],
+            outputs=[
+                OutputSchema(
+                    name="output",
+                    block_tiling=[FULL_DIM],
+                    stream_tiling=[],
+                ),
+            ],
+            kernel_params={"mlo_max_iter": ("i", False, 1)},
+            dse_parameters={},
+        )
+
+        # Test 1: Non-MLO context (mlo_max_iter=1)
+        import onnx
+
+        inp = onnx.helper.make_tensor_value_info("weights", onnx.TensorProto.FLOAT, [4])
+        out = onnx.helper.make_tensor_value_info("output", onnx.TensorProto.FLOAT, [4])
+        weight_data = gen_finn_dt_tensor(DataType["INT8"], [4])
+        weight_init = onnx.numpy_helper.from_array(weight_data, name="weights")
+        node = onnx.helper.make_node(
+            "TestOp",
+            inputs=["weights"],
+            outputs=["output"],
+            name="test_node",
+            mlo_max_iter=1,  # Non-MLO
+        )
+        graph = onnx.helper.make_graph(
+            [node], "test_graph", [inp], [out], initializer=[weight_init]
+        )
+        model = onnx.helper.make_model(graph)
+        model_w = ModelWrapper(model)
+
+        node = model_w.graph.node[0]
+        # Mark weights as weight (simulating what InferKernel would do)
+        node.attribute.append(onnx.helper.make_attribute("input0MemType", "embedded"))
+        build_ctx = BuildContext(
+            schema=schema,
+            model_w=model_w,
+            node=node,
+            param_getter=_create_param_getter(node),
+            param_setter=_create_param_setter(),
+        )
+        design_space = DesignSpaceBuilder().build(build_ctx)
+
+        # Should have embedded and decoupled
+        assert design_space.parameters["input0MemType"] == frozenset({"embedded", "decoupled"})
+
+        # Test 2: MLO context (mlo_max_iter=4)
+        node_mlo = onnx.helper.make_node(
+            "TestOp",
+            inputs=["weights"],
+            outputs=["output"],
+            name="test_node_mlo",
+            mlo_max_iter=4,  # MLO mode!
+        )
+        # Reuse the weight_init from above
+        graph_mlo = onnx.helper.make_graph(
+            [node_mlo], "test_graph", [inp], [out], initializer=[weight_init]
+        )
+        model_mlo = onnx.helper.make_model(graph_mlo)
+        model_w_mlo = ModelWrapper(model_mlo)
+
+        node_mlo = model_w_mlo.graph.node[0]
+        # Mark weights as weight
+        node_mlo.attribute.append(onnx.helper.make_attribute("input0MemType", "embedded"))
+        build_ctx_mlo = BuildContext(
+            schema=schema,
+            model_w=model_w_mlo,
+            node=node_mlo,
+            param_getter=_create_param_getter(node_mlo),
+            param_setter=_create_param_setter(),
+        )
+        design_space_mlo = DesignSpaceBuilder().build(build_ctx_mlo)
+
+        # Should only have dynamic mode
+        assert design_space_mlo.parameters["input0MemType"] == frozenset({"dynamic"})
+
+
+class TestInterfaceDesignPointMemMode:
+    """Test that InterfaceDesignPoint.mem_mode is populated from config."""
+
+    def test_mem_mode_populated_on_instantiation(self):
+        """Test that mem_mode is extracted from params and set on interface."""
+        schema = KernelSchema(
+            name="TestKernel",
+            inputs=[
+                InputSchema(
+                    name="weights",
+                    block_tiling=[],
+                    stream_tiling=[],
+                    mem_modes=frozenset({"embedded", "decoupled"}),
+                ),
+            ],
+            outputs=[
+                OutputSchema(
+                    name="output",
+                    block_tiling=[FULL_DIM],
+                    stream_tiling=[],
+                ),
+            ],
+            kernel_params={},
+            dse_parameters={},
+        )
+
+        # Create ONNX model
+        import onnx
+
+        inp = onnx.helper.make_tensor_value_info("weights", onnx.TensorProto.FLOAT, [4])
+        out = onnx.helper.make_tensor_value_info("output", onnx.TensorProto.FLOAT, [4])
+        weight_data = gen_finn_dt_tensor(DataType["INT8"], [4])
+        weight_init = onnx.numpy_helper.from_array(weight_data, name="weights")
+        node = onnx.helper.make_node("TestOp", inputs=["weights"], outputs=["output"], name="test_node")
+        graph = onnx.helper.make_graph(
+            [node], "test_graph", [inp], [out], initializer=[weight_init]
+        )
+        model = onnx.helper.make_model(graph)
+        model_w = ModelWrapper(model)
+
+        # Build design space
+        node = model_w.graph.node[0]
+        # Mark weights as weight (simulating what InferKernel would do)
+        node.attribute.append(onnx.helper.make_attribute("input0MemType", "embedded"))
+        build_ctx = BuildContext(
+            schema=schema,
+            model_w=model_w,
+            node=node,
+            param_getter=_create_param_getter(node),
+            param_setter=_create_param_setter(),
+        )
+        design_space = DesignSpaceBuilder().build(build_ctx)
+
+        # Configure with embedded mode
+        design_point = design_space.configure({"input0MemType": "embedded"})
+
+        # Verify mem_mode is set on interface
+        assert design_point.inputs["weights"].mem_mode == "embedded"
+
+        # Configure with decoupled mode
+        design_point2 = design_space.configure({"input0MemType": "decoupled"})
+        assert design_point2.inputs["weights"].mem_mode == "decoupled"
+
+
+class TestChannelwiseOpMemModes:
+    """Test mem_modes for ChannelwiseOp kernel."""
+
+    def test_channelwise_schema_has_mem_modes(self):
+        """Test that ChannelwiseOp schema has mem_modes on parameters input."""
+        from brainsmith.kernels.channelwise.channelwise import CHANNELWISE_SCHEMA
+
+        # Find parameters input (index 1)
+        params_input = CHANNELWISE_SCHEMA.inputs[1]
+        assert params_input.name == "parameters"
+        assert params_input.mem_modes is not None
+        assert params_input.mem_modes == frozenset({"embedded"})
+
+    def test_channelwise_generates_input1_memtype(self):
+        """Test that ChannelwiseOp design space has input1MemType parameter."""
+        from brainsmith.kernels.channelwise.channelwise import CHANNELWISE_SCHEMA
+        import onnx
+
+        # Create simple Add operation with static RHS
+        lhs = onnx.helper.make_tensor_value_info("lhs", onnx.TensorProto.FLOAT, [4])
+        rhs = onnx.helper.make_tensor_value_info("rhs", onnx.TensorProto.FLOAT, [4])
+        out = onnx.helper.make_tensor_value_info("out", onnx.TensorProto.FLOAT, [4])
+
+        # RHS is static (initializer)
+        rhs_data = gen_finn_dt_tensor(DataType["INT8"], [4])
+        rhs_init = onnx.numpy_helper.from_array(rhs_data, name="rhs")
+
+        node = onnx.helper.make_node(
+            "Add",
+            inputs=["lhs", "rhs"],
+            outputs=["out"],
+            name="add_node",
+            func="Add",
+        )
+
+        graph = onnx.helper.make_graph([node], "add_graph", [lhs, rhs], [out], initializer=[rhs_init])
+        model = onnx.helper.make_model(graph)
+        model_w = ModelWrapper(model)
+        model_w.set_tensor_datatype("lhs", DataType["INT8"])
+        model_w.set_tensor_datatype("rhs", DataType["INT8"])
+        model_w.set_tensor_datatype("out", DataType["INT8"])
+
+        # Build design space
+        node = model_w.graph.node[0]
+        # Mark parameters as weight (simulating what InferKernel would do)
+        node.attribute.append(onnx.helper.make_attribute("input1MemType", "embedded"))
+        build_ctx = BuildContext(
+            schema=CHANNELWISE_SCHEMA,
+            model_w=model_w,
+            node=node,
+            param_getter=_create_param_getter(node),
+            param_setter=_create_param_setter(),
+        )
+        design_space = DesignSpaceBuilder().build(build_ctx)
+
+        # Verify input1MemType parameter exists (RHS is index 1)
+        assert "input1MemType" in design_space.parameters
+        mem_modes = design_space.parameters["input1MemType"]
+        assert mem_modes == frozenset({"embedded"})
+
+    def test_channelwise_interface_mem_mode_accessible(self):
+        """Test that mem_mode is accessible from design point interface."""
+        from brainsmith.kernels.channelwise.channelwise import CHANNELWISE_SCHEMA
+        import onnx
+
+        lhs = onnx.helper.make_tensor_value_info("lhs", onnx.TensorProto.FLOAT, [4])
+        rhs = onnx.helper.make_tensor_value_info("rhs", onnx.TensorProto.FLOAT, [4])
+        out = onnx.helper.make_tensor_value_info("out", onnx.TensorProto.FLOAT, [4])
+
+        rhs_data = gen_finn_dt_tensor(DataType["INT8"], [4])
+        rhs_init = onnx.numpy_helper.from_array(rhs_data, name="rhs")
+
+        node = onnx.helper.make_node(
+            "Add",
+            inputs=["lhs", "rhs"],
+            outputs=["out"],
+            name="add_node",
+            func="Add",
+        )
+
+        graph = onnx.helper.make_graph([node], "add_graph", [lhs, rhs], [out], initializer=[rhs_init])
+        model = onnx.helper.make_model(graph)
+        model_w = ModelWrapper(model)
+        model_w.set_tensor_datatype("lhs", DataType["INT8"])
+        model_w.set_tensor_datatype("rhs", DataType["INT8"])
+        model_w.set_tensor_datatype("out", DataType["INT8"])
+
+        # Build design space
+        node = model_w.graph.node[0]
+        # Mark parameters as weight (simulating what InferKernel would do)
+        node.attribute.append(onnx.helper.make_attribute("input1MemType", "embedded"))
+        build_ctx = BuildContext(
+            schema=CHANNELWISE_SCHEMA,
+            model_w=model_w,
+            node=node,
+            param_getter=_create_param_getter(node),
+            param_setter=_create_param_setter(),
+        )
+        design_space = DesignSpaceBuilder().build(build_ctx)
+
+        # Configure with embedded mode
+        design_point = design_space.configure({
+            "PE": 1,
+            "input1MemType": "embedded",
+            "ram_style": "distributed"
+        })
+
+        # Verify mem_mode is accessible from interface
+        params_iface = design_point.inputs["parameters"]
+        assert params_iface.mem_mode == "embedded"
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])