diff --git a/brainsmith/kernels/__init__.py b/brainsmith/kernels/__init__.py index 5be5f4f1..52969c0e 100644 --- a/brainsmith/kernels/__init__.py +++ b/brainsmith/kernels/__init__.py @@ -15,6 +15,7 @@ # Backends from brainsmith.kernels.layernorm.layernorm_hls import LayerNorm_hls +from brainsmith.kernels.layernorm.layernorm_rtl import LayerNorm_rtl from brainsmith.kernels.crop.crop_hls import Crop_hls from brainsmith.kernels.softmax.hwsoftmax_hls import Softmax_hls from brainsmith.kernels.shuffle.shuffle_hls import Shuffle_hls @@ -27,6 +28,7 @@ 'Shuffle', # Backends 'LayerNorm_hls', + 'LayerNorm_rtl', 'Crop_hls', 'Softmax_hls', 'Shuffle_hls', diff --git a/brainsmith/kernels/layernorm/__init__.py b/brainsmith/kernels/layernorm/__init__.py index 755295e9..65f5c1fd 100644 --- a/brainsmith/kernels/layernorm/__init__.py +++ b/brainsmith/kernels/layernorm/__init__.py @@ -5,6 +5,7 @@ # Components auto-register via decorators from .layernorm import LayerNorm from .layernorm_hls import LayerNorm_hls as LayerNormHLS +from .layernorm_rtl import LayerNorm_rtl as LayerNormRTL from .infer_layernorm import InferLayerNorm -__all__ = ["LayerNorm", "LayerNormHLS", "InferLayerNorm"] \ No newline at end of file +__all__ = ["LayerNorm", "LayerNormHLS", "LayerNormRTL", "InferLayerNorm"] diff --git a/brainsmith/kernels/layernorm/layernorm_rtl.py b/brainsmith/kernels/layernorm/layernorm_rtl.py new file mode 100644 index 00000000..83ea9914 --- /dev/null +++ b/brainsmith/kernels/layernorm/layernorm_rtl.py @@ -0,0 +1,125 @@ +############################################################################ +# Copyright (C) 2025, Advanced Micro Devices, Inc. +# All rights reserved. +# +# SPDX-License-Identifier: MIT +# +# @author Jakoba Petri-Koenig +############################################################################ + +import os +import shutil + +from finn.custom_op.fpgadataflow.rtlbackend import RTLBackend +from brainsmith.kernels.layernorm.layernorm import LayerNorm +from brainsmith.registry import backend + + +@backend(name='LayerNorm_rtl', target_kernel='brainsmith:LayerNorm', language='rtl') +class LayerNorm_rtl(LayerNorm, RTLBackend): + """RTL backend implementation for LayerNorm kernel. + + Generates RTL code for hardware synthesis of LayerNorm operations. + + Metadata for registry (namespace-based component registry): + - target_kernel: Which kernel this backend implements + - language: Backend language (hls/rtl/etc) + """ + + # Metadata for namespace-based registry + target_kernel = 'brainsmith:LayerNorm' + language = 'rtl' + + def __init__(self, onnx_node, **kwargs): + super().__init__(onnx_node, **kwargs) + + def get_nodeattr_types(self): + my_attrs = {} + my_attrs.update(RTLBackend.get_nodeattr_types(self)) + my_attrs.update(LayerNorm.get_nodeattr_types(self)) + return my_attrs + + + def generate_hdl(self, model, fpgapart, clk): + rtlsrc = os.environ["BSMITH_DIR"] + "/deps/finn/finn-rtllib/layernorm/" + template_path = rtlsrc + "layernorm_wrapper_template.v" + simd = self.get_nodeattr("SIMD") + topname = self.get_verilog_top_module_name() + code_gen_dict = { + "$N$": int(self.get_normal_input_shape()[-1]), + "$SIMD$": int(simd), + "$TOP_MODULE_NAME$": topname, + } + # save top module name so we can refer to it after this node has been renamed + # (e.g. by GiveUniqueNodeNames(prefix) during MakeZynqProject) + self.set_nodeattr("gen_top_module", self.get_verilog_top_module_name()) + + # apply code generation to templates + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + with open(template_path, "r") as f: + template = f.read() + for key in code_gen_dict: + template = template.replace(key, str(code_gen_dict[key])) + + with open( + os.path.join(code_gen_dir, self.get_verilog_top_module_name() + ".v"), + "w", + ) as f: + f.write(template) + + sv_files = ["layernorm.sv", "queue.sv", "accuf.sv", "binopf.sv", "rsqrtf.sv"] + for sv_file in sv_files: + shutil.copy(rtlsrc + sv_file, code_gen_dir) + # set ipgen_path and ip_path so that HLS-Synth transformation + # and stich_ip transformation do not complain + self.set_nodeattr("ipgen_path", code_gen_dir) + self.set_nodeattr("ip_path", code_gen_dir) + + def get_rtl_file_list(self, abspath=False): + if abspath: + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + "/" + rtllib_dir = rtlsrc = os.environ["BSMITH_DIR"] + "/deps/finn/finn-rtllib/layernorm/" + else: + code_gen_dir = "" + rtllib_dir = "" + + verilog_files = [ + rtllib_dir + "layernorm.sv", + rtllib_dir + "queue.sv", + rtllib_dir + "accuf.sv", + rtllib_dir + "binopf.sv", + rtllib_dir + "rsqrtf.sv", + code_gen_dir + self.get_nodeattr("gen_top_module") + ".v", + ] + return verilog_files + + def code_generation_ipi(self): + """Constructs and returns the TCL for node instantiation in Vivado IPI.""" + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + + sourcefiles = [ + "layernorm.sv", + "queue.sv", + "accuf.sv", + "binopf.sv", + "rsqrtf.sv", + self.get_nodeattr("gen_top_module") + ".v", + ] + + sourcefiles = [os.path.join(code_gen_dir, f) for f in sourcefiles] + + cmd = [] + for f in sourcefiles: + cmd += ["add_files -norecurse %s" % (f)] + cmd += [ + "create_bd_cell -type module -reference %s %s" + % (self.get_nodeattr("gen_top_module"), self.onnx_node.name) + ] + return cmd + + def execute_node(self, context, graph): + mode = self.get_nodeattr("exec_mode") + if mode == "cppsim": + LayerNorm.execute_node(self, context, graph) + elif mode == "rtlsim": + RTLBackend.execute_node(self, context, graph) diff --git a/brainsmith/kernels/rtl/__init__.py b/brainsmith/kernels/rtl/__init__.py new file mode 100644 index 00000000..48a7d2a5 --- /dev/null +++ b/brainsmith/kernels/rtl/__init__.py @@ -0,0 +1,21 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +# flake8: noqa +# Disable linting from here, as all imports will be flagged E402 and maybe F401 + +""" +Brainsmith RTL Kernel Imports + +This is a TEMPORARY measure to ensure RTL variants are properly registered +in the kernels.rtl namepace until backend refactoring is complete. + +Similar to how FINN imports its RTL variants in: +deps/finn/src/finn/custom_op/fpgadataflow/rtl/__init__.py +""" + +# Import all RTL custom ops - they will be discovered automatically via namespace +# Note: Using absolute imports to ensure proper registration + +# Import Brainsmith RTL kernels +from brainsmith.kernels.layernorm.layernorm_rtl import LayerNorm_rtl diff --git a/docker/fetch-repos.sh b/docker/fetch-repos.sh index cf8fbab2..3382fbea 100755 --- a/docker/fetch-repos.sh +++ b/docker/fetch-repos.sh @@ -79,7 +79,7 @@ fi declare -A GIT_DEPS=( ["brevitas"]="https://github.com/Xilinx/brevitas.git@95edaa0bdc8e639e39b1164466278c59df4877be" ["qonnx"]="https://github.com/fastmachinelearning/qonnx.git@f2c4ccd3e71795c9f116ee5a0c87a7dfd590c6d0" - ["finn"]="https://github.com/tafk7/finn.git@custom/transformer" + ["finn"]="https://github.com/Xilinx/finn.git@custom/transformer" ["onnxscript"]="https://github.com/jsmonson/onnxscript.git@62c7110aba46554432ce8e82ba2d8a086bd6227c" ["finn-experimental"]="https://github.com/Xilinx/finn-experimental.git@0724be21111a21f0d81a072fccc1c446e053f851" ["dataset-loading"]="https://github.com/fbcotter/dataset_loading.git@0.0.4" diff --git a/tests/unit/test_fpgadataflow_layernorm.py b/tests/unit/test_fpgadataflow_layernorm.py new file mode 100644 index 00000000..b2ce93e3 --- /dev/null +++ b/tests/unit/test_fpgadataflow_layernorm.py @@ -0,0 +1,117 @@ +############################################################################ +# Copyright (C) 2025, Advanced Micro Devices, Inc. +# All rights reserved. +# +# SPDX-License-Identifier: MIT +# +############################################################################ + +from typing import Tuple +import pytest +import torch +import onnx +import torch.nn as nn +import brevitas.nn as qnn +import finn.core.onnx_exec as oxe +from qonnx.util.cleanup import cleanup as qonnx_cleanup +from onnx import TensorProto, helper +from qonnx.core.datatype import DataType +from qonnx.core.modelwrapper import ModelWrapper +from qonnx.custom_op.registry import getCustomOp +from qonnx.transformation.infer_shapes import InferShapes +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model +from qonnx.transformation.infer_datatypes import InferDataTypes +import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw +from brainsmith.kernels.layernorm.infer_layernorm import InferLayerNorm +from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode +from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers +from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN +from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP +from finn.transformation.fpgadataflow.create_dataflow_partition import ( + CreateDataflowPartition, +) +from brainsmith.primitives.transforms.expand_norms import ExpandNorms + +# Debugging dependencies, to remove +import os + +from qonnx.transformation.fold_constants import FoldConstants + +from qonnx.transformation.general import ( + ApplyConfig, + GiveUniqueNodeNames, +) + +import numpy as np + +test_fpga_part = "xcv80-lsva4737-2MHP-e-s" +target_clk_ns = 5 + +def create_layernorm_model(epsilon): + + tshape = [1, 128, 384] + scale_bias_shape = tshape[-1] + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, tshape) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, tshape) + LayerNorm_scale = helper.make_tensor_value_info("LayerNorm_Scale", TensorProto.FLOAT, [scale_bias_shape]) + LayerNorm_bias = helper.make_tensor_value_info("LayerNorm_Bias", TensorProto.FLOAT, [scale_bias_shape]) + + ln_node = helper.make_node( + 'LayerNormalization', + inputs=["inp", "LayerNorm_Scale", "LayerNorm_Bias"], + outputs=["outp"], + name='Layernorm_0', + epsilon=epsilon, + axis=-1, + stash_type=1, + ) + + # Create model + graph = helper.make_graph( + nodes=[ln_node], name="LayerNorm_graph", inputs=[inp], outputs=[outp] + ) + model = qonnx_make_model(graph, producer_name="LayerNorm_graph") + model = ModelWrapper(model) + + # Tensor initializers + max_scale = 2**(8/2) + max_bias = 2**(8/2) + model.set_initializer("LayerNorm_Scale", (max_scale*np.random.rand(scale_bias_shape)).astype(np.float32)) + model.set_initializer("LayerNorm_Bias", (max_bias*np.random.rand(scale_bias_shape)).astype(np.float32)) + + return model + +def test_fpgadataflow_layernorm(): + model = create_layernorm_model(epsilon=9.999999960041972e-13) + + # reference calculation + input = gen_finn_dt_tensor(DataType["FLOAT32"], [1, 128, 384]) + input_t = {model.graph.input[0].name: input} + + y_ref = oxe.execute_onnx(model, input_t)[model.graph.output[0].name] + + model = model.transform(ExpandNorms()) + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + model = model.transform(InferLayerNorm()) + model = model.transform(to_hw.InferElementwiseBinaryOperation()) + model = model.transform(SpecializeLayers(test_fpga_part)) + model = model.transform(GiveUniqueNodeNames()) + getCustomOp(model.graph.node[0]).set_nodeattr("SIMD", 8) + + # Execute + model = model.transform(SetExecMode("rtlsim")) + model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) + model = model.transform(HLSSynthIP()) + model = model.transform(PrepareRTLSim()) + + input_t = {model.graph.input[0].name: input} + + y_hw = oxe.execute_onnx(model, input_t)[model.graph.output[0].name] + + assert np.allclose(y_ref, y_hw, rtol=1e-3, atol=2**-4)