microsoft · tafk7 · Jan 30, 2025 · Jan 30, 2025 · Feb 17, 2025 · Mar 7, 2025
diff --git a/brainsmith/_internal/io/dependency_installers.py b/brainsmith/_internal/io/dependency_installers.py
@@ -104,7 +104,7 @@ def install(self, name: str, dep: dict, dest: Path, force: bool, quiet: bool) ->
         cmd.extend([dep["url"], str(dest)])
 
         if not quiet:
-            logger.info("Cloning %s from %s", name, dep["url"])
+            logger.debug("Cloning %s from %s", name, dep['url'])
 
         result = subprocess.run(cmd, capture_output=True, text=True)
         if result.returncode != 0:
@@ -170,7 +170,7 @@ def install(self, name: str, dep: dict, dest: Path, force: bool, quiet: bool) ->
 
         try:
             if not quiet:
-                logger.info("Downloading %s from %s", name, dep["url"])
+                logger.debug("Downloading %s from %s", name, dep['url'])
 
             urlretrieve(dep["url"], zip_path)
 
@@ -254,7 +254,7 @@ def _install_finn_xsim(self, force: bool, quiet: bool) -> None:
 
         # Build with finn-xsim
         if not quiet:
-            logger.info("Building finn-xsim...")
+            logger.debug("Building finn-xsim...")
 
         # Construct build command
         build_cmd = ["python3", "-m", "finn.xsi.setup"]
@@ -265,16 +265,16 @@ def _install_finn_xsim(self, force: bool, quiet: bool) -> None:
         python_cmd = " ".join(build_cmd)
         bash_cmd = f"source {settings_script} && {python_cmd}"
 
-        logger.info("Running: %s", bash_cmd)
+        logger.debug("Running: %s", bash_cmd)
 
         # Execute build
         result = subprocess.run(["bash", "-c", bash_cmd], capture_output=True, text=True)
 
-        # Log output at INFO level (visible with --logs info)
+        # Log output at DEBUG level (visible with --logs debug)
         if result.stdout:
             for line in result.stdout.splitlines():
                 if line.strip():
-                    logger.info(line)
+                    logger.debug(line)
 
         if result.stderr:
             for line in result.stderr.splitlines():
@@ -322,7 +322,7 @@ def _install_generic_build(self, name: str, dep: dict, force: bool, quiet: bool)
             raise BuildError(error_msg)
 
         if not quiet:
-            logger.info("Building %s in %s", name, source_dir)
+            logger.debug("Building %s in %s", name, source_dir)
 
         # Run build command
         env = os.environ.copy()
@@ -334,7 +334,7 @@ def _install_generic_build(self, name: str, dep: dict, force: bool, quiet: bool)
         if result.stdout:
             for line in result.stdout.splitlines():
                 if line.strip():
-                    logger.info(line)
+                    logger.debug(line)
 
         if result.stderr:
             for line in result.stderr.splitlines():

diff --git a/brainsmith/_version.py b/brainsmith/_version.py
@@ -2,5 +2,5 @@
 # Licensed under the MIT License.
 
 # Version information for brainsmith
-__version__ = "0.0.1a"
-__version_tuple__ = (0, 0, 1, "a")
+__version__ = "0.1.0"
+__version_tuple__ = (0, 1, 0)
diff --git a/brainsmith/dataflow/builder.py b/brainsmith/dataflow/builder.py
@@ -31,8 +31,10 @@
 from math import gcd
 from typing import TYPE_CHECKING, Any
 
+from onnx import NodeProto
 from qonnx.core.datatype import BaseDataType
 from qonnx.core.modelwrapper import ModelWrapper
+from qonnx.util.basic import get_by_name
 
 from brainsmith._internal.math import divisors
 
@@ -57,20 +59,16 @@ class BuildContext:
     Attributes:
         schema: KernelSchema defining structure
         model_w: ModelWrapper for ONNX graph access
-        node_inputs: ONNX node input tensor names
-        node_outputs: ONNX node output tensor names
+        node: ONNX NodeProto (provides .input, .output, .name)
         param_getter: Function to retrieve nodeattr values
         param_setter: Function to store nodeattr values
-        node_name: Node name for error messages
     """
 
     schema: KernelSchema
     model_w: ModelWrapper
-    node_inputs: list[str]
-    node_outputs: list[str]
+    node: NodeProto
     param_getter: Callable[[str], Any]
     param_setter: Callable[[str, Any], None]
-    node_name: str = "<unknown>"
 
 
 class DesignSpaceBuilder:
@@ -85,11 +83,9 @@ class DesignSpaceBuilder:
         >>> context = BuildContext(
         ...     schema=kernel_schema,
         ...     model_w=model_wrapper,
-        ...     node_inputs=list(node.input),
-        ...     node_outputs=list(node.output),
+        ...     node=node,
         ...     param_getter=self.get_nodeattr,
         ...     param_setter=self.set_nodeattr,
-        ...     node_name=node.name
         ... )
         >>> design_space = builder.build(context)
         >>> point = design_space.configure({"SIMD": 64, "PE": 1})
@@ -195,12 +191,12 @@ def build(self, ctx: BuildContext) -> KernelDesignSpace:
         self._ctx = ctx
         self._interfaces: dict[str, Any] = {}
 
-        logger.debug(f"Building KernelDesignSpace for {ctx.node_name}")
+        logger.debug(f"Building KernelDesignSpace for {ctx.node.name}")
 
         # Build input interfaces from ONNX graph
         inputs: dict[str, InterfaceDesignSpace] = {}
 
-        for i, inp_name in enumerate(ctx.node_inputs):
+        for i, inp_name in enumerate(ctx.node.input):
             if not inp_name:
                 continue
 
@@ -248,7 +244,7 @@ def build(self, ctx: BuildContext) -> KernelDesignSpace:
         # Build output interfaces (may derive datatypes from inputs)
         outputs: dict[str, InterfaceDesignSpace] = {}
 
-        for i, out_name in enumerate(ctx.node_outputs):
+        for i, out_name in enumerate(ctx.node.output):
             if i >= len(ctx.schema.outputs):
                 logger.warning(
                     f"Node has output {i} but schema only defines {len(ctx.schema.outputs)} outputs"
@@ -294,7 +290,7 @@ def build(self, ctx: BuildContext) -> KernelDesignSpace:
                 if (e := c.check(validation_ctx))
             ]
             if failed:
-                raise ValueError(f"{ctx.node_name} validation failed:\n" + "\n".join(failed))
+                raise ValueError(f"{ctx.node.name} validation failed:\n" + "\n".join(failed))
 
             logger.debug(f"  All {len(structural_constraints)} structural constraints passed")
 
@@ -317,7 +313,7 @@ def build(self, ctx: BuildContext) -> KernelDesignSpace:
             parameters=all_dimensions,
         )
 
-        logger.debug(f"KernelDesignSpace built successfully for {ctx.node_name}")
+        logger.debug(f"KernelDesignSpace built successfully for {ctx.node.name}")
         return design_space
 
     def _resolve_datatype(
@@ -696,8 +692,42 @@ def _compute_dimension_ranges(
                 f"{ordered_count} ordered, {discrete_count} discrete"
             )
 
-        # Combine tiling + DSE dimensions
-        all_dimensions = {**tiling_dimensions, **dse_dimensions}
+        # Generate input<idx>MemType parameters from mem_modes
+        mem_mode_dimensions = {}
+        for idx, inp in enumerate(schema.inputs):
+            if inp.mem_modes is None:
+                continue
+
+            param_name = f"input{idx}MemType"
+
+            # Check if InferKernel marked this input as a weight
+            # Attribute presence indicates weight; absence indicates pure streaming input
+            attr = get_by_name(self._ctx.node.attribute, param_name)
+            if attr is None:
+                # Not a weight - skip parameter creation
+                logger.debug(f"Skipping {param_name}: not marked as weight by InferKernel")
+                continue
+
+            values = inp.mem_modes
+
+            # Support callable for context-aware filtering (e.g., MLO)
+            if callable(values):
+                values = values(self._ctx)
+
+            # Ensure frozenset for discrete parameter
+            if not isinstance(values, frozenset):
+                values = frozenset(values)
+
+            mem_mode_dimensions[param_name] = values
+
+        if mem_mode_dimensions:
+            logger.debug(
+                f"Added {len(mem_mode_dimensions)} mem_mode dimensions: "
+                + ", ".join(f"{k}={v}" for k, v in mem_mode_dimensions.items())
+            )
+
+        # Combine tiling + DSE + mem_mode dimensions
+        all_dimensions = {**tiling_dimensions, **dse_dimensions, **mem_mode_dimensions}
 
         return all_dimensions
 

diff --git a/brainsmith/dataflow/dse_models.py b/brainsmith/dataflow/dse_models.py
@@ -66,6 +66,7 @@ class InterfaceDesignSpace:
         datatype: Interface datatype
         is_weight: Whether this is a weight tensor (constant)
         tensor_name: ONNX tensor name for initializer lookups
+        mem_mode: Memory mode for weight inputs (embedded/decoupled/dynamic)
         parallelism_dimension: OrderedParameter for stream parameter (None if no parallelism)
         parallelism_param: Parameter name for stream dimension (e.g., "SIMD", "PE")
     """
@@ -88,16 +89,18 @@ class InterfaceDesignPoint:
     """Interface instance with resolved parallelization.
 
     Flyweight pattern: references parent design space, stores only configuration-
-    specific stream_shape. Delegates tensor_shape, block_shape, and datatype
-    to design space for minimal memory overhead.
+    specific stream_shape and mem_mode. Delegates tensor_shape, block_shape, and
+    datatype to design space for minimal memory overhead.
 
     Attributes:
         design_space: Parent InterfaceDesignSpace
         stream_shape: Resolved stream dimensions for this configuration
+        mem_mode: Memory mode for weight inputs (embedded/decoupled/dynamic)
     """
 
     design_space: InterfaceDesignSpace
     stream_shape: Shape
+    mem_mode: str | None = None  # Memory mode (embedded/decoupled/dynamic) for weight inputs
 
     # Convenience properties (delegate to design space)
     @property
@@ -399,7 +402,7 @@ def _instantiate_interfaces(
         from .template_resolution import resolve_template
 
         configured = {}
-        for interface in interfaces.values():
+        for idx, interface in enumerate(interfaces.values()):
             stream_shape = (
                 interface.block_shape
                 if interface.stream_tiling is None
@@ -413,8 +416,12 @@ def _instantiate_interfaces(
                 )
             )
 
+            # Extract mem_mode from params if this is an input with mem_modes
+            mem_mode_param = f"input{idx}MemType"
+            mem_mode = params.get(mem_mode_param)
+
             configured_interface = InterfaceDesignPoint(
-                design_space=interface, stream_shape=stream_shape
+                design_space=interface, stream_shape=stream_shape, mem_mode=mem_mode
             )
             configured[interface.name] = configured_interface
             interface_lookup[interface.name] = configured_interface

diff --git a/brainsmith/dataflow/kernel_op.py b/brainsmith/dataflow/kernel_op.py
@@ -300,11 +300,9 @@ def _ensure_ready(self, model_w: ModelWrapper) -> None:
             build_ctx = BuildContext(
                 schema=self.kernel_schema,
                 model_w=model_w,
-                node_inputs=list(self.onnx_node.input),
-                node_outputs=list(self.onnx_node.output),
+                node=self.onnx_node,
                 param_getter=self.get_nodeattr,
                 param_setter=self.set_nodeattr,
-                node_name=self.onnx_node.name,
             )
 
             try:
@@ -324,6 +322,12 @@ def _ensure_ready(self, model_w: ModelWrapper) -> None:
                         # OrderedParameter: use get_default() (explicit default or minimum)
                         initial_value = param.get_default()
                     else:  # frozenset
+                        # Defensive: skip empty parameter sets (shouldn't happen with new design)
+                        if len(param) == 0:
+                            logger.debug(
+                                f"{self.onnx_node.name}: Skipping empty parameter {param_name}"
+                            )
+                            continue
                         # Discrete: use sorted first value
                         initial_value = sorted(param)[0]
 

diff --git a/brainsmith/dataflow/schemas.py b/brainsmith/dataflow/schemas.py
@@ -255,6 +255,9 @@ class InputSchema:
         stream_tiling: Stream tiling specification (e.g., ["SIMD"], [1, 1, 1, "PE"])
         datatype: Datatype spec (None to use from ONNX, or DatatypeSpec union type to derive/optimize)
         required_layout: Expected input layout (e.g., "NHWC", "NCHW"), None if no requirement
+        mem_modes: Memory mode options for weight inputs (frozenset or callable returning frozenset).
+                   Valid modes: "embedded" (compile-time constant), "decoupled" (separate memory),
+                   "dynamic"/"external" (streaming). Generates input<idx>MemType DSE parameter.
     """
 
     # Identity
@@ -268,6 +271,9 @@ class InputSchema:
     # Transformation requirements (NEW - embedded in interface)
     required_layout: str | None = None
 
+    # Memory mode specification for weight inputs
+    mem_modes: frozenset[str] | Callable | None = None
+
     def __post_init__(self):
         """Validate interface requirements."""
         if self.required_layout and self.required_layout not in {"NCHW", "NHWC"}:
@@ -276,6 +282,21 @@ def __post_init__(self):
                 f"Must be 'NCHW' or 'NHWC'."
             )
 
+        # Validate mem_modes if specified
+        if self.mem_modes is not None and not callable(self.mem_modes):
+            VALID_MEM_MODES = {"embedded", "decoupled", "dynamic", "external"}
+            if not isinstance(self.mem_modes, frozenset):
+                raise TypeError(
+                    f"mem_modes for input '{self.name}' must be frozenset or callable, "
+                    f"got {type(self.mem_modes).__name__}"
+                )
+            invalid = self.mem_modes - VALID_MEM_MODES
+            if invalid:
+                raise ValueError(
+                    f"Invalid mem_modes {invalid} for input '{self.name}'. "
+                    f"Valid modes: {VALID_MEM_MODES}"
+                )
+
     @property
     def tiling_attrs(self) -> list[str]:
         """Extract unique template parameter names from tiling specs."""
@@ -461,6 +482,12 @@ def build_nodeattr_registry(self) -> dict[str, tuple]:
         for param in template_params:
             attrs[param] = ("i", False, 1)  # Default 1, will be computed from factoring
 
+        # Memory mode parameters (input<idx>MemType) - auto-extracted from mem_modes
+        for idx, inp in enumerate(self.inputs):
+            if inp.mem_modes is not None:
+                # Add input<idx>MemType as a string parameter
+                attrs[f"input{idx}MemType"] = ("s", False, "embedded")
+
         # DSE parameters (resource parameters)
         for param_name, param_spec in self.dse_parameters.items():
             attrs[param_name] = _infer_nodeattr_type(param_spec)