From 68cbb1a93d45b863b503fc321744450ebe2062a7 Mon Sep 17 00:00:00 2001 From: auphelia Date: Wed, 5 Nov 2025 16:24:15 +0000 Subject: [PATCH 1/5] [Layernorm] Add scaffolding for RTL layernorm --- brainsmith/kernels/layernorm/layernorm_rtl.py | 118 ++++++++++++++++++ .../layernorm/layernorm_wrapper_template.v | 37 ++++++ 2 files changed, 155 insertions(+) create mode 100644 brainsmith/kernels/layernorm/layernorm_rtl.py create mode 100644 brainsmith/kernels/layernorm/layernorm_wrapper_template.v diff --git a/brainsmith/kernels/layernorm/layernorm_rtl.py b/brainsmith/kernels/layernorm/layernorm_rtl.py new file mode 100644 index 00000000..d27536ea --- /dev/null +++ b/brainsmith/kernels/layernorm/layernorm_rtl.py @@ -0,0 +1,118 @@ +############################################################################ +# Copyright (C) 2025, Advanced Micro Devices, Inc. +# All rights reserved. +# +# SPDX-License-Identifier: MIT +# +# @author Jakoba Petri-Koenig +############################################################################ + +import os + +from finn.custom_op.fpgadataflow.rtlbackend import RTLBackend +from brainsmith.kernels.layernorm.layernorm import LayerNorm +from brainsmith.registry import backend + + +@backend(name='LayerNorm_rtl', target_kernel='brainsmith:LayerNorm', language='rtl') +class LayerNorm_hls(LayerNorm, RTLBackend): + """RTL backend implementation for LayerNorm kernel. + + Generates RTL code for hardware synthesis of LayerNorm operations. + + Metadata for registry (namespace-based component registry): + - target_kernel: Which kernel this backend implements + - language: Backend language (hls/rtl/etc) + """ + + # Metadata for namespace-based registry + target_kernel = 'brainsmith:LayerNorm' + language = 'rtl' + + def __init__(self, onnx_node, **kwargs): + super().__init__(onnx_node, **kwargs) + + def get_nodeattr_types(self): + my_attrs = {} + my_attrs.update(RTLBackend.get_nodeattr_types(self)) + my_attrs.update(LayerNorm.get_nodeattr_types(self)) + return my_attrs + + + def generate_hdl(self, model, fpgapart, clk): + # wrapper file is in the same directory as this file + rtlsrc = os.path.dirname(os.path.abspath(__file__)) + template_path = rtlsrc + "/layernorm_template_wrapper.v" + simd = self.get_nodeattr("SIMD") + topname = self.get_verilog_top_module_name() + code_gen_dict = { + "$N$": int(chans), + "$SIMD$": int(simd), + "$TOP_MODULE_NAME$": topname, + } + # save top module name so we can refer to it after this node has been renamed + # (e.g. by GiveUniqueNodeNames(prefix) during MakeZynqProject) + self.set_nodeattr("gen_top_module", self.get_verilog_top_module_name()) + + # apply code generation to templates + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + with open(template_path, "r") as f: + template = f.read() + for key in code_gen_dict: + template = template.replace(key, str(code_gen_dict[key])) + + with open( + os.path.join(code_gen_dir, self.get_verilog_top_module_name() + ".v"), + "w", + ) as f: + f.write(template) + + # TODO: add sv files here + sv_files = [] + for sv_file in sv_files: + shutil.copy(rtlsrc + "/" + sv_file, code_gen_dir) + # set ipgen_path and ip_path so that HLS-Synth transformation + # and stich_ip transformation do not complain + self.set_nodeattr("ipgen_path", code_gen_dir) + self.set_nodeattr("ip_path", code_gen_dir) + + def get_rtl_file_list(self, abspath=False): + if abspath: + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + "/" + rtllib_dir = os.path.dirname(os.path.abspath(__file__)) + else: + code_gen_dir = "" + rtllib_dir = "" + + # TODO: add sv files + verilog_files = [ + code_gen_dir + self.get_nodeattr("gen_top_module") + ".v", + ] + return verilog_files + + def code_generation_ipi(self): + """Constructs and returns the TCL for node instantiation in Vivado IPI.""" + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + + # TODO: needs to be expanded when code is checked in + sourcefiles = [ + self.get_nodeattr("gen_top_module") + ".v", + ] + + sourcefiles = [os.path.join(code_gen_dir, f) for f in sourcefiles] + + cmd = [] + for f in sourcefiles: + cmd += ["add_files -norecurse %s" % (f)] + cmd += [ + "create_bd_cell -type module -reference %s %s" + % (self.get_nodeattr("gen_top_module"), self.onnx_node.name) + ] + return cmd + + def execute_node(self, context, graph): + mode = self.get_nodeattr("exec_mode") + if mode == "cppsim": + LayerNorm.execute_node(self, context, graph) + elif mode == "rtlsim": + RTLBackend.execute_node(self, context, graph) diff --git a/brainsmith/kernels/layernorm/layernorm_wrapper_template.v b/brainsmith/kernels/layernorm/layernorm_wrapper_template.v new file mode 100644 index 00000000..c35473da --- /dev/null +++ b/brainsmith/kernels/layernorm/layernorm_wrapper_template.v @@ -0,0 +1,37 @@ +module $TOP_MODULE_NAME$( +//- Global Control ------------------ +(* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out0_V, ASSOCIATED_RESET = ap_rst_n" *) +(* X_INTERFACE_INFO = "xilinx.com:signal:clock:1.0 ap_clk CLK" *) +input ap_clk, +(* X_INTERFACE_PARAMETER = "POLARITY ACTIVE_LOW" *) +input ap_rst_n, + +//- AXI Stream - Input ------------------- +output in0_V_TREADY, +input in0_V_TVALID, +input [$SIMD$-1:0][31:0] in0_V_TDATA, + +//- AXI Stream - Output ------------------ +input out0_V_TREADY, +output out0_V_TVALID, +output [$SIMD$-1:0][31:0] out0_V_TDATA +); + + +layernorm #( + .N($N$), + .SIMD($SIMD$) +) +impl +( + .clk(ap_clk), + .rst(ap_rst_n), + .xdat(in0_V_TDATA), + .xvld(in0_V_TVALID), + .xrdy(in0_V_TREADY), + .ydat(out0_V_TDATA), + .yvld(out0_V_TVALID), + .yrdy(out0_V_TREADY), +); + +endmodule From b2c87dfe23d5eeded5295b827439c1c45b85d5cc Mon Sep 17 00:00:00 2001 From: auphelia Date: Thu, 6 Nov 2025 17:30:47 +0000 Subject: [PATCH 2/5] [Layernom] Add test and kernel integration --- brainsmith/kernels/__init__.py | 2 + brainsmith/kernels/layernorm/__init__.py | 3 +- brainsmith/kernels/layernorm/layernorm_rtl.py | 22 ++-- .../layernorm/layernorm_wrapper_template.v | 12 +- brainsmith/kernels/rtl/__init__.py | 21 ++++ tests/unit/test_fpgadataflow_layernorm.py | 113 ++++++++++++++++++ 6 files changed, 163 insertions(+), 10 deletions(-) create mode 100644 brainsmith/kernels/rtl/__init__.py create mode 100644 tests/unit/test_fpgadataflow_layernorm.py diff --git a/brainsmith/kernels/__init__.py b/brainsmith/kernels/__init__.py index 5be5f4f1..52969c0e 100644 --- a/brainsmith/kernels/__init__.py +++ b/brainsmith/kernels/__init__.py @@ -15,6 +15,7 @@ # Backends from brainsmith.kernels.layernorm.layernorm_hls import LayerNorm_hls +from brainsmith.kernels.layernorm.layernorm_rtl import LayerNorm_rtl from brainsmith.kernels.crop.crop_hls import Crop_hls from brainsmith.kernels.softmax.hwsoftmax_hls import Softmax_hls from brainsmith.kernels.shuffle.shuffle_hls import Shuffle_hls @@ -27,6 +28,7 @@ 'Shuffle', # Backends 'LayerNorm_hls', + 'LayerNorm_rtl', 'Crop_hls', 'Softmax_hls', 'Shuffle_hls', diff --git a/brainsmith/kernels/layernorm/__init__.py b/brainsmith/kernels/layernorm/__init__.py index 755295e9..65f5c1fd 100644 --- a/brainsmith/kernels/layernorm/__init__.py +++ b/brainsmith/kernels/layernorm/__init__.py @@ -5,6 +5,7 @@ # Components auto-register via decorators from .layernorm import LayerNorm from .layernorm_hls import LayerNorm_hls as LayerNormHLS +from .layernorm_rtl import LayerNorm_rtl as LayerNormRTL from .infer_layernorm import InferLayerNorm -__all__ = ["LayerNorm", "LayerNormHLS", "InferLayerNorm"] \ No newline at end of file +__all__ = ["LayerNorm", "LayerNormHLS", "LayerNormRTL", "InferLayerNorm"] diff --git a/brainsmith/kernels/layernorm/layernorm_rtl.py b/brainsmith/kernels/layernorm/layernorm_rtl.py index d27536ea..aaf6eab9 100644 --- a/brainsmith/kernels/layernorm/layernorm_rtl.py +++ b/brainsmith/kernels/layernorm/layernorm_rtl.py @@ -8,6 +8,7 @@ ############################################################################ import os +import shutil from finn.custom_op.fpgadataflow.rtlbackend import RTLBackend from brainsmith.kernels.layernorm.layernorm import LayerNorm @@ -15,7 +16,7 @@ @backend(name='LayerNorm_rtl', target_kernel='brainsmith:LayerNorm', language='rtl') -class LayerNorm_hls(LayerNorm, RTLBackend): +class LayerNorm_rtl(LayerNorm, RTLBackend): """RTL backend implementation for LayerNorm kernel. Generates RTL code for hardware synthesis of LayerNorm operations. @@ -42,11 +43,11 @@ def get_nodeattr_types(self): def generate_hdl(self, model, fpgapart, clk): # wrapper file is in the same directory as this file rtlsrc = os.path.dirname(os.path.abspath(__file__)) - template_path = rtlsrc + "/layernorm_template_wrapper.v" + template_path = rtlsrc + "/layernorm_wrapper_template.v" simd = self.get_nodeattr("SIMD") topname = self.get_verilog_top_module_name() code_gen_dict = { - "$N$": int(chans), + "$N$": int(self.get_normal_input_shape()[-1]), "$SIMD$": int(simd), "$TOP_MODULE_NAME$": topname, } @@ -67,8 +68,7 @@ def generate_hdl(self, model, fpgapart, clk): ) as f: f.write(template) - # TODO: add sv files here - sv_files = [] + sv_files = ["layernorm.sv", "queue.sv", "accuf.sv", "binopf.sv", "rsqrtf.sv"] for sv_file in sv_files: shutil.copy(rtlsrc + "/" + sv_file, code_gen_dir) # set ipgen_path and ip_path so that HLS-Synth transformation @@ -84,8 +84,12 @@ def get_rtl_file_list(self, abspath=False): code_gen_dir = "" rtllib_dir = "" - # TODO: add sv files verilog_files = [ + rtllib_dir + "/layernorm.sv", + rtllib_dir + "/queue.sv", + rtllib_dir + "/accuf.sv", + rtllib_dir + "/binopf.sv", + rtllib_dir + "/rsqrtf.sv", code_gen_dir + self.get_nodeattr("gen_top_module") + ".v", ] return verilog_files @@ -94,8 +98,12 @@ def code_generation_ipi(self): """Constructs and returns the TCL for node instantiation in Vivado IPI.""" code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") - # TODO: needs to be expanded when code is checked in sourcefiles = [ + "layernorm.sv", + "queue.sv", + "accuf.sv", + "binopf.sv", + "rsqrtf.sv", self.get_nodeattr("gen_top_module") + ".v", ] diff --git a/brainsmith/kernels/layernorm/layernorm_wrapper_template.v b/brainsmith/kernels/layernorm/layernorm_wrapper_template.v index c35473da..8b49cf53 100644 --- a/brainsmith/kernels/layernorm/layernorm_wrapper_template.v +++ b/brainsmith/kernels/layernorm/layernorm_wrapper_template.v @@ -1,6 +1,14 @@ +/**************************************************************************** + * Copyright (C) 2025, Advanced Micro Devices, Inc. + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + * + ***************************************************************************/ + module $TOP_MODULE_NAME$( //- Global Control ------------------ -(* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out0_V, ASSOCIATED_RESET = ap_rst_n" *) +(* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out0_V, ASSOCIATED_RESET ap_rst_n" *) (* X_INTERFACE_INFO = "xilinx.com:signal:clock:1.0 ap_clk CLK" *) input ap_clk, (* X_INTERFACE_PARAMETER = "POLARITY ACTIVE_LOW" *) @@ -31,7 +39,7 @@ impl .xrdy(in0_V_TREADY), .ydat(out0_V_TDATA), .yvld(out0_V_TVALID), - .yrdy(out0_V_TREADY), + .yrdy(out0_V_TREADY) ); endmodule diff --git a/brainsmith/kernels/rtl/__init__.py b/brainsmith/kernels/rtl/__init__.py new file mode 100644 index 00000000..48a7d2a5 --- /dev/null +++ b/brainsmith/kernels/rtl/__init__.py @@ -0,0 +1,21 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +# flake8: noqa +# Disable linting from here, as all imports will be flagged E402 and maybe F401 + +""" +Brainsmith RTL Kernel Imports + +This is a TEMPORARY measure to ensure RTL variants are properly registered +in the kernels.rtl namepace until backend refactoring is complete. + +Similar to how FINN imports its RTL variants in: +deps/finn/src/finn/custom_op/fpgadataflow/rtl/__init__.py +""" + +# Import all RTL custom ops - they will be discovered automatically via namespace +# Note: Using absolute imports to ensure proper registration + +# Import Brainsmith RTL kernels +from brainsmith.kernels.layernorm.layernorm_rtl import LayerNorm_rtl diff --git a/tests/unit/test_fpgadataflow_layernorm.py b/tests/unit/test_fpgadataflow_layernorm.py new file mode 100644 index 00000000..cb52c7dd --- /dev/null +++ b/tests/unit/test_fpgadataflow_layernorm.py @@ -0,0 +1,113 @@ +############################################################################ +# Copyright (C) 2025, Advanced Micro Devices, Inc. +# All rights reserved. +# +# SPDX-License-Identifier: MIT +# +############################################################################ + +from typing import Tuple +import pytest +import torch +import onnx +import torch.nn as nn +import brevitas.nn as qnn +import finn.core.onnx_exec as oxe +from qonnx.util.cleanup import cleanup as qonnx_cleanup +from onnx import TensorProto, helper +from qonnx.core.datatype import DataType +from qonnx.core.modelwrapper import ModelWrapper +from qonnx.custom_op.registry import getCustomOp +from qonnx.transformation.infer_shapes import InferShapes +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model +from qonnx.transformation.infer_datatypes import InferDataTypes +import finn.transformation.fpgadataflow.convert_to_hw_layers as to_hw +from brainsmith.kernels.layernorm.infer_layernorm import InferLayerNorm +from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode +from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers +from finn.transformation.qonnx.convert_qonnx_to_finn import ConvertQONNXtoFINN +from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP +from finn.transformation.fpgadataflow.create_dataflow_partition import ( + CreateDataflowPartition, +) +from brainsmith.primitives.transforms.expand_norms import ExpandNorms + +# Debugging dependencies, to remove +import os + +from qonnx.transformation.fold_constants import FoldConstants + +from qonnx.transformation.general import ( + ApplyConfig, + GiveUniqueNodeNames, +) + +import numpy as np + +test_fpga_part = "xcv80-lsva4737-2MHP-e-s" +target_clk_ns = 5 + +def create_layernorm_model(epsilon): + + tshape = [1, 128, 384] + scale_bias_shape = tshape[-1] + inp = helper.make_tensor_value_info("inp", TensorProto.FLOAT, tshape) + outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, tshape) + LayerNorm_scale = helper.make_tensor_value_info("LayerNorm_Scale", TensorProto.FLOAT, [scale_bias_shape]) + LayerNorm_bias = helper.make_tensor_value_info("LayerNorm_Bias", TensorProto.FLOAT, [scale_bias_shape]) + + ln_node = helper.make_node( + 'LayerNormalization', + inputs=["inp", "LayerNorm_Scale", "LayerNorm_Bias"], + outputs=["outp"], + name='Layernorm_0', + epsilon=epsilon, + axis=-1, + stash_type=1, + ) + + # Create model + graph = helper.make_graph( + nodes=[ln_node], name="LayerNorm_graph", inputs=[inp], outputs=[outp] + ) + model = qonnx_make_model(graph, producer_name="LayerNorm_graph") + model = ModelWrapper(model) + + # Tensor initializers + max_scale = 2**(8/2) + max_bias = 2**(8/2) + model.set_initializer("LayerNorm_Scale", (max_scale*np.random.rand(scale_bias_shape)).astype(np.float32)) + model.set_initializer("LayerNorm_Bias", (max_bias*np.random.rand(scale_bias_shape)).astype(np.float32)) + + return model + +def test_fpgadataflow_layernorm(): + model = create_layernorm_model(epsilon=9.999999960041972e-13) + model.save("layernorm.onnx") + model = model.transform(ExpandNorms()) + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + model = model.transform(InferLayerNorm()) + model = model.transform(to_hw.InferElementwiseBinaryOperation()) + model = model.transform(SpecializeLayers(test_fpga_part)) + model = model.transform(GiveUniqueNodeNames()) + model.save("test.onnx") + import pdb; pdb.set_trace() + + # Execute + model = model.transform(SetExecMode("rtlsim")) + model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) + model = model.transform(HLSSynthIP()) + model = model.transform(PrepareRTLSim()) + + input = gen_finn_dt_tensor(DataType["FLOAT32"], [1, 128, 384]) + in_name = model.graph.input[0].name + input_t = {in_name: input} + + y_hw = oxe.execute_onnx(model, input_t)[model.graph.output[0].name] + import pdb; pdb.set_trace() From 648845189c122ea7d9e2464f2e6ee1341bc75146 Mon Sep 17 00:00:00 2001 From: auphelia Date: Fri, 7 Nov 2025 09:43:41 +0000 Subject: [PATCH 3/5] [Layernorm] Verilog file location and set SIMD to 8 in test case --- brainsmith/kernels/layernorm/layernorm_rtl.py | 19 ++++---- .../layernorm/layernorm_wrapper_template.v | 45 ------------------- docker/fetch-repos.sh | 2 +- tests/unit/test_fpgadataflow_layernorm.py | 6 +-- 4 files changed, 12 insertions(+), 60 deletions(-) delete mode 100644 brainsmith/kernels/layernorm/layernorm_wrapper_template.v diff --git a/brainsmith/kernels/layernorm/layernorm_rtl.py b/brainsmith/kernels/layernorm/layernorm_rtl.py index aaf6eab9..83ea9914 100644 --- a/brainsmith/kernels/layernorm/layernorm_rtl.py +++ b/brainsmith/kernels/layernorm/layernorm_rtl.py @@ -41,9 +41,8 @@ def get_nodeattr_types(self): def generate_hdl(self, model, fpgapart, clk): - # wrapper file is in the same directory as this file - rtlsrc = os.path.dirname(os.path.abspath(__file__)) - template_path = rtlsrc + "/layernorm_wrapper_template.v" + rtlsrc = os.environ["BSMITH_DIR"] + "/deps/finn/finn-rtllib/layernorm/" + template_path = rtlsrc + "layernorm_wrapper_template.v" simd = self.get_nodeattr("SIMD") topname = self.get_verilog_top_module_name() code_gen_dict = { @@ -70,7 +69,7 @@ def generate_hdl(self, model, fpgapart, clk): sv_files = ["layernorm.sv", "queue.sv", "accuf.sv", "binopf.sv", "rsqrtf.sv"] for sv_file in sv_files: - shutil.copy(rtlsrc + "/" + sv_file, code_gen_dir) + shutil.copy(rtlsrc + sv_file, code_gen_dir) # set ipgen_path and ip_path so that HLS-Synth transformation # and stich_ip transformation do not complain self.set_nodeattr("ipgen_path", code_gen_dir) @@ -79,17 +78,17 @@ def generate_hdl(self, model, fpgapart, clk): def get_rtl_file_list(self, abspath=False): if abspath: code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + "/" - rtllib_dir = os.path.dirname(os.path.abspath(__file__)) + rtllib_dir = rtlsrc = os.environ["BSMITH_DIR"] + "/deps/finn/finn-rtllib/layernorm/" else: code_gen_dir = "" rtllib_dir = "" verilog_files = [ - rtllib_dir + "/layernorm.sv", - rtllib_dir + "/queue.sv", - rtllib_dir + "/accuf.sv", - rtllib_dir + "/binopf.sv", - rtllib_dir + "/rsqrtf.sv", + rtllib_dir + "layernorm.sv", + rtllib_dir + "queue.sv", + rtllib_dir + "accuf.sv", + rtllib_dir + "binopf.sv", + rtllib_dir + "rsqrtf.sv", code_gen_dir + self.get_nodeattr("gen_top_module") + ".v", ] return verilog_files diff --git a/brainsmith/kernels/layernorm/layernorm_wrapper_template.v b/brainsmith/kernels/layernorm/layernorm_wrapper_template.v deleted file mode 100644 index 8b49cf53..00000000 --- a/brainsmith/kernels/layernorm/layernorm_wrapper_template.v +++ /dev/null @@ -1,45 +0,0 @@ -/**************************************************************************** - * Copyright (C) 2025, Advanced Micro Devices, Inc. - * All rights reserved. - * - * SPDX-License-Identifier: BSD-3-Clause - * - ***************************************************************************/ - -module $TOP_MODULE_NAME$( -//- Global Control ------------------ -(* X_INTERFACE_PARAMETER = "ASSOCIATED_BUSIF in0_V:out0_V, ASSOCIATED_RESET ap_rst_n" *) -(* X_INTERFACE_INFO = "xilinx.com:signal:clock:1.0 ap_clk CLK" *) -input ap_clk, -(* X_INTERFACE_PARAMETER = "POLARITY ACTIVE_LOW" *) -input ap_rst_n, - -//- AXI Stream - Input ------------------- -output in0_V_TREADY, -input in0_V_TVALID, -input [$SIMD$-1:0][31:0] in0_V_TDATA, - -//- AXI Stream - Output ------------------ -input out0_V_TREADY, -output out0_V_TVALID, -output [$SIMD$-1:0][31:0] out0_V_TDATA -); - - -layernorm #( - .N($N$), - .SIMD($SIMD$) -) -impl -( - .clk(ap_clk), - .rst(ap_rst_n), - .xdat(in0_V_TDATA), - .xvld(in0_V_TVALID), - .xrdy(in0_V_TREADY), - .ydat(out0_V_TDATA), - .yvld(out0_V_TVALID), - .yrdy(out0_V_TREADY) -); - -endmodule diff --git a/docker/fetch-repos.sh b/docker/fetch-repos.sh index cf8fbab2..fe4b4209 100755 --- a/docker/fetch-repos.sh +++ b/docker/fetch-repos.sh @@ -79,7 +79,7 @@ fi declare -A GIT_DEPS=( ["brevitas"]="https://github.com/Xilinx/brevitas.git@95edaa0bdc8e639e39b1164466278c59df4877be" ["qonnx"]="https://github.com/fastmachinelearning/qonnx.git@f2c4ccd3e71795c9f116ee5a0c87a7dfd590c6d0" - ["finn"]="https://github.com/tafk7/finn.git@custom/transformer" + ["finn"]="https://github.com/Xilinx/finn.git@custom/transformer_layernorm" ["onnxscript"]="https://github.com/jsmonson/onnxscript.git@62c7110aba46554432ce8e82ba2d8a086bd6227c" ["finn-experimental"]="https://github.com/Xilinx/finn-experimental.git@0724be21111a21f0d81a072fccc1c446e053f851" ["dataset-loading"]="https://github.com/fbcotter/dataset_loading.git@0.0.4" diff --git a/tests/unit/test_fpgadataflow_layernorm.py b/tests/unit/test_fpgadataflow_layernorm.py index cb52c7dd..be26f9b5 100644 --- a/tests/unit/test_fpgadataflow_layernorm.py +++ b/tests/unit/test_fpgadataflow_layernorm.py @@ -88,7 +88,6 @@ def create_layernorm_model(epsilon): def test_fpgadataflow_layernorm(): model = create_layernorm_model(epsilon=9.999999960041972e-13) - model.save("layernorm.onnx") model = model.transform(ExpandNorms()) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) @@ -96,10 +95,9 @@ def test_fpgadataflow_layernorm(): model = model.transform(to_hw.InferElementwiseBinaryOperation()) model = model.transform(SpecializeLayers(test_fpga_part)) model = model.transform(GiveUniqueNodeNames()) - model.save("test.onnx") - import pdb; pdb.set_trace() + getCustomOp(model.graph.node[0]).set_nodeattr("SIMD", 8) - # Execute + # Execute model = model.transform(SetExecMode("rtlsim")) model = model.transform(PrepareIP(test_fpga_part, target_clk_ns)) model = model.transform(HLSSynthIP()) From 6620640b2dbb126c9796af6ac46868cad8b36a76 Mon Sep 17 00:00:00 2001 From: auphelia Date: Fri, 7 Nov 2025 14:37:44 +0000 Subject: [PATCH 4/5] [Layernorm] Add functional verification to test --- tests/unit/test_fpgadataflow_layernorm.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_fpgadataflow_layernorm.py b/tests/unit/test_fpgadataflow_layernorm.py index be26f9b5..b2ce93e3 100644 --- a/tests/unit/test_fpgadataflow_layernorm.py +++ b/tests/unit/test_fpgadataflow_layernorm.py @@ -88,6 +88,13 @@ def create_layernorm_model(epsilon): def test_fpgadataflow_layernorm(): model = create_layernorm_model(epsilon=9.999999960041972e-13) + + # reference calculation + input = gen_finn_dt_tensor(DataType["FLOAT32"], [1, 128, 384]) + input_t = {model.graph.input[0].name: input} + + y_ref = oxe.execute_onnx(model, input_t)[model.graph.output[0].name] + model = model.transform(ExpandNorms()) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) @@ -103,9 +110,8 @@ def test_fpgadataflow_layernorm(): model = model.transform(HLSSynthIP()) model = model.transform(PrepareRTLSim()) - input = gen_finn_dt_tensor(DataType["FLOAT32"], [1, 128, 384]) - in_name = model.graph.input[0].name - input_t = {in_name: input} + input_t = {model.graph.input[0].name: input} y_hw = oxe.execute_onnx(model, input_t)[model.graph.output[0].name] - import pdb; pdb.set_trace() + + assert np.allclose(y_ref, y_hw, rtol=1e-3, atol=2**-4) From 16da2187c88be77317d98eaa453a1225b285463c Mon Sep 17 00:00:00 2001 From: auphelia Date: Fri, 7 Nov 2025 18:09:03 +0000 Subject: [PATCH 5/5] [Deps] Reset finn commit to custom/transformer --- docker/fetch-repos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/fetch-repos.sh b/docker/fetch-repos.sh index fe4b4209..3382fbea 100755 --- a/docker/fetch-repos.sh +++ b/docker/fetch-repos.sh @@ -79,7 +79,7 @@ fi declare -A GIT_DEPS=( ["brevitas"]="https://github.com/Xilinx/brevitas.git@95edaa0bdc8e639e39b1164466278c59df4877be" ["qonnx"]="https://github.com/fastmachinelearning/qonnx.git@f2c4ccd3e71795c9f116ee5a0c87a7dfd590c6d0" - ["finn"]="https://github.com/Xilinx/finn.git@custom/transformer_layernorm" + ["finn"]="https://github.com/Xilinx/finn.git@custom/transformer" ["onnxscript"]="https://github.com/jsmonson/onnxscript.git@62c7110aba46554432ce8e82ba2d8a086bd6227c" ["finn-experimental"]="https://github.com/Xilinx/finn-experimental.git@0724be21111a21f0d81a072fccc1c446e053f851" ["dataset-loading"]="https://github.com/fbcotter/dataset_loading.git@0.0.4"