Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions brainsmith/kernels/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

# Backends
from brainsmith.kernels.layernorm.layernorm_hls import LayerNorm_hls
from brainsmith.kernels.layernorm.layernorm_rtl import LayerNorm_rtl
from brainsmith.kernels.crop.crop_hls import Crop_hls
from brainsmith.kernels.softmax.hwsoftmax_hls import Softmax_hls
from brainsmith.kernels.shuffle.shuffle_hls import Shuffle_hls
Expand All @@ -27,6 +28,7 @@
'Shuffle',
# Backends
'LayerNorm_hls',
'LayerNorm_rtl',
'Crop_hls',
'Softmax_hls',
'Shuffle_hls',
Expand Down
3 changes: 2 additions & 1 deletion brainsmith/kernels/layernorm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Components auto-register via decorators
from .layernorm import LayerNorm
from .layernorm_hls import LayerNorm_hls as LayerNormHLS
from .layernorm_rtl import LayerNorm_rtl as LayerNormRTL
from .infer_layernorm import InferLayerNorm

__all__ = ["LayerNorm", "LayerNormHLS", "InferLayerNorm"]
__all__ = ["LayerNorm", "LayerNormHLS", "LayerNormRTL", "InferLayerNorm"]
125 changes: 125 additions & 0 deletions brainsmith/kernels/layernorm/layernorm_rtl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
############################################################################
# Copyright (C) 2025, Advanced Micro Devices, Inc.
# All rights reserved.
#
# SPDX-License-Identifier: MIT
#
# @author Jakoba Petri-Koenig <jakoba.petri-koenig@amd.com>
############################################################################

import os
import shutil

from finn.custom_op.fpgadataflow.rtlbackend import RTLBackend
from brainsmith.kernels.layernorm.layernorm import LayerNorm
from brainsmith.registry import backend


@backend(name='LayerNorm_rtl', target_kernel='brainsmith:LayerNorm', language='rtl')
class LayerNorm_rtl(LayerNorm, RTLBackend):
"""RTL backend implementation for LayerNorm kernel.

Generates RTL code for hardware synthesis of LayerNorm operations.

Metadata for registry (namespace-based component registry):
- target_kernel: Which kernel this backend implements
- language: Backend language (hls/rtl/etc)
"""

# Metadata for namespace-based registry
target_kernel = 'brainsmith:LayerNorm'
language = 'rtl'

def __init__(self, onnx_node, **kwargs):
super().__init__(onnx_node, **kwargs)

def get_nodeattr_types(self):
my_attrs = {}
my_attrs.update(RTLBackend.get_nodeattr_types(self))
my_attrs.update(LayerNorm.get_nodeattr_types(self))
return my_attrs


def generate_hdl(self, model, fpgapart, clk):
rtlsrc = os.environ["BSMITH_DIR"] + "/deps/finn/finn-rtllib/layernorm/"
template_path = rtlsrc + "layernorm_wrapper_template.v"
simd = self.get_nodeattr("SIMD")
topname = self.get_verilog_top_module_name()
code_gen_dict = {
"$N$": int(self.get_normal_input_shape()[-1]),
"$SIMD$": int(simd),
"$TOP_MODULE_NAME$": topname,
}
# save top module name so we can refer to it after this node has been renamed
# (e.g. by GiveUniqueNodeNames(prefix) during MakeZynqProject)
self.set_nodeattr("gen_top_module", self.get_verilog_top_module_name())

# apply code generation to templates
code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")
with open(template_path, "r") as f:
template = f.read()
for key in code_gen_dict:
template = template.replace(key, str(code_gen_dict[key]))

with open(
os.path.join(code_gen_dir, self.get_verilog_top_module_name() + ".v"),
"w",
) as f:
f.write(template)

sv_files = ["layernorm.sv", "queue.sv", "accuf.sv", "binopf.sv", "rsqrtf.sv"]
for sv_file in sv_files:
shutil.copy(rtlsrc + sv_file, code_gen_dir)
# set ipgen_path and ip_path so that HLS-Synth transformation
# and stich_ip transformation do not complain
self.set_nodeattr("ipgen_path", code_gen_dir)
self.set_nodeattr("ip_path", code_gen_dir)

def get_rtl_file_list(self, abspath=False):
if abspath:
code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + "/"
rtllib_dir = rtlsrc = os.environ["BSMITH_DIR"] + "/deps/finn/finn-rtllib/layernorm/"
else:
code_gen_dir = ""
rtllib_dir = ""

verilog_files = [
rtllib_dir + "layernorm.sv",
rtllib_dir + "queue.sv",
rtllib_dir + "accuf.sv",
rtllib_dir + "binopf.sv",
rtllib_dir + "rsqrtf.sv",
code_gen_dir + self.get_nodeattr("gen_top_module") + ".v",
]
return verilog_files

def code_generation_ipi(self):
"""Constructs and returns the TCL for node instantiation in Vivado IPI."""
code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen")

sourcefiles = [
"layernorm.sv",
"queue.sv",
"accuf.sv",
"binopf.sv",
"rsqrtf.sv",
self.get_nodeattr("gen_top_module") + ".v",
]

sourcefiles = [os.path.join(code_gen_dir, f) for f in sourcefiles]

cmd = []
for f in sourcefiles:
cmd += ["add_files -norecurse %s" % (f)]
cmd += [
"create_bd_cell -type module -reference %s %s"
% (self.get_nodeattr("gen_top_module"), self.onnx_node.name)
]
return cmd

def execute_node(self, context, graph):
mode = self.get_nodeattr("exec_mode")
if mode == "cppsim":
LayerNorm.execute_node(self, context, graph)
elif mode == "rtlsim":
RTLBackend.execute_node(self, context, graph)
21 changes: 21 additions & 0 deletions brainsmith/kernels/rtl/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

# flake8: noqa
# Disable linting from here, as all imports will be flagged E402 and maybe F401

"""
Brainsmith RTL Kernel Imports

This is a TEMPORARY measure to ensure RTL variants are properly registered
in the kernels.rtl namepace until backend refactoring is complete.

Similar to how FINN imports its RTL variants in:
deps/finn/src/finn/custom_op/fpgadataflow/rtl/__init__.py
"""

# Import all RTL custom ops - they will be discovered automatically via namespace
# Note: Using absolute imports to ensure proper registration

# Import Brainsmith RTL kernels
from brainsmith.kernels.layernorm.layernorm_rtl import LayerNorm_rtl
2 changes: 1 addition & 1 deletion docker/fetch-repos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ fi
declare -A GIT_DEPS=(
["brevitas"]="https://github.com/Xilinx/brevitas.git@95edaa0bdc8e639e39b1164466278c59df4877be"
["qonnx"]="https://github.com/fastmachinelearning/qonnx.git@f2c4ccd3e71795c9f116ee5a0c87a7dfd590c6d0"
["finn"]="https://github.com/tafk7/finn.git@custom/transformer"
["finn"]="https://github.com/Xilinx/finn.git@custom/transformer"
["onnxscript"]="https://github.com/jsmonson/onnxscript.git@62c7110aba46554432ce8e82ba2d8a086bd6227c"
["finn-experimental"]="https://github.com/Xilinx/finn-experimental.git@0724be21111a21f0d81a072fccc1c446e053f851"
["dataset-loading"]="https://github.com/fbcotter/dataset_loading.git@0.0.4"
Expand Down
117 changes: 117 additions & 0 deletions tests/unit/test_fpgadataflow_layernorm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
############################################################################
# Copyright (C) 2025, Advanced Micro Devices, Inc.
# All rights reserved.
#
# SPDX-License-Identifier: MIT
#
############################################################################

from typing import Tuple
import pytest
import torch
import onnx
import torch.nn as nn
import brevitas.nn as qnn
import finn.core.onnx_exec as oxe
from qonnx.util.cleanup import cleanup as qonnx_cleanup
from onnx import TensorProto, helper
from qonnx.core.datatype import DataType
from qonnx.core.modelwrapper import ModelWrapper
from qonnx.custom_op.registry import getCustomOp
from qonnx.transformation.infer_shapes import InferShapes
from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model
from qonnx.transformation.infer_datatypes import InferDataTypes
import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw
from brainsmith.kernels.layernorm.infer_layernorm import InferLayerNorm
from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim
from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP
from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim
from finn.transformation.fpgadataflow.prepare_ip import PrepareIP
from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim
from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode
from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers
from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN
from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP
from finn.transformation.fpgadataflow.create_dataflow_partition import (
CreateDataflowPartition,
)
from brainsmith.primitives.transforms.expand_norms import ExpandNorms

# Debugging dependencies, to remove
import os

from qonnx.transformation.fold_constants import FoldConstants

from qonnx.transformation.general import (
ApplyConfig,
GiveUniqueNodeNames,
)

import numpy as np

test_fpga_part = "xcv80-lsva4737-2MHP-e-s"
target_clk_ns = 5

def create_layernorm_model(epsilon):

tshape = [1, 128, 384]
scale_bias_shape = tshape[-1]
inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, tshape)
outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, tshape)
LayerNorm_scale = helper.make_tensor_value_info("LayerNorm_Scale", TensorProto.FLOAT, [scale_bias_shape])
LayerNorm_bias = helper.make_tensor_value_info("LayerNorm_Bias", TensorProto.FLOAT, [scale_bias_shape])

ln_node = helper.make_node(
'LayerNormalization',
inputs=["inp", "LayerNorm_Scale", "LayerNorm_Bias"],
outputs=["outp"],
name='Layernorm_0',
epsilon=epsilon,
axis=-1,
stash_type=1,
)

# Create model
graph = helper.make_graph(
nodes=[ln_node], name="LayerNorm_graph", inputs=[inp], outputs=[outp]
)
model = qonnx_make_model(graph, producer_name="LayerNorm_graph")
model = ModelWrapper(model)

# Tensor initializers
max_scale = 2**(8/2)
max_bias = 2**(8/2)
model.set_initializer("LayerNorm_Scale", (max_scale*np.random.rand(scale_bias_shape)).astype(np.float32))
model.set_initializer("LayerNorm_Bias", (max_bias*np.random.rand(scale_bias_shape)).astype(np.float32))

return model

def test_fpgadataflow_layernorm():
model = create_layernorm_model(epsilon=9.999999960041972e-13)

# reference calculation
input = gen_finn_dt_tensor(DataType["FLOAT32"], [1, 128, 384])
input_t = {model.graph.input[0].name: input}

y_ref = oxe.execute_onnx(model, input_t)[model.graph.output[0].name]

model = model.transform(ExpandNorms())
model = model.transform(InferShapes())
model = model.transform(InferDataTypes())
model = model.transform(InferLayerNorm())
model = model.transform(to_hw.InferElementwiseBinaryOperation())
model = model.transform(SpecializeLayers(test_fpga_part))
model = model.transform(GiveUniqueNodeNames())
getCustomOp(model.graph.node[0]).set_nodeattr("SIMD", 8)

# Execute
model = model.transform(SetExecMode("rtlsim"))
model = model.transform(PrepareIP(test_fpga_part, target_clk_ns))
model = model.transform(HLSSynthIP())
model = model.transform(PrepareRTLSim())

input_t = {model.graph.input[0].name: input}

y_hw = oxe.execute_onnx(model, input_t)[model.graph.output[0].name]

assert np.allclose(y_ref, y_hw, rtol=1e-3, atol=2**-4)
Loading