From 1c39d1fc1fcda72635facf5ab61f1110d0091b9c Mon Sep 17 00:00:00 2001 From: Maarten Vandersteegen Date: Fri, 6 Mar 2026 09:12:43 +0100 Subject: [PATCH 1/4] Added unittests folder * Added test cases for pulp_matmul_fp32 * Fixed one bug --- lib/sources/pulp_matmul_fp32.c | 4 +- unittests/README.md | 51 +++ unittests/generic/support/pmsis.h | 45 +++ unittests/generic/support/pmsis_stub.c | 52 +++ unittests/generic/test_pulp_matmul_fp32.c | 455 ++++++++++++++++++++++ unittests/project.yml | 392 +++++++++++++++++++ 6 files changed, 997 insertions(+), 2 deletions(-) create mode 100644 unittests/README.md create mode 100644 unittests/generic/support/pmsis.h create mode 100644 unittests/generic/support/pmsis_stub.c create mode 100644 unittests/generic/test_pulp_matmul_fp32.c create mode 100644 unittests/project.yml diff --git a/lib/sources/pulp_matmul_fp32.c b/lib/sources/pulp_matmul_fp32.c index 7d0f4b76..00e22266 100644 --- a/lib/sources/pulp_matmul_fp32.c +++ b/lib/sources/pulp_matmul_fp32.c @@ -2119,9 +2119,9 @@ void mm_M_u2(void *matMul_args) { // =====> B IS TRANSPOSED <===== else { for (uint32_t i = 0; i < N; i++) { - float temp = 0; - for (uint32_t j = start; j < stop; j++) { + float temp = 0; + for (uint32_t k = 0; k < (K & 0xfffffffe); k = k + 2) { temp += A[i * K + k] * B[k + j * K]; temp += A[i * K + k + 1] * B[k + 1 + j * K]; diff --git a/unittests/README.md b/unittests/README.md new file mode 100644 index 00000000..0802323b --- /dev/null +++ b/unittests/README.md @@ -0,0 +1,51 @@ +# Unit tests + +This folder contains all unit tests + +## Setup + +``` +gem install ceedling +``` + +To support code coverage reports: + +``` +pip install gcovr +``` + +## Run + +Ceedling takes care of building, running and test coverage report generation. + +Running all tests: + +``` +ceedling test:all +``` + +Running all tests with code coverage + +``` +ceedling gcov:all +``` + +HTML report can be found at `build/artifacts/gcov/gcovr/GcovCoverageResults.html`. + +Running one specific test module or test case: + +``` +ceedling test:test_pulp_matmul_fp32 +ceedling test:test_pulp_matmul_fp32 --test-case=test_pulp_matmul_fp32_mm_M_u2_transp +``` + +## Debug + +You can debug a test with gdb in a specific module with: + +``` +ceedling test:test_pulp_matmul_fp32 +gdb --tui --args build/test/out/test_pulp_matmul_fp32/test_pulp_matmul_fp32.out +``` + +Once in the debugger set for example a breakpoint (`b test_pulp_matmul_fp32.c:306`) and hit `r`. diff --git a/unittests/generic/support/pmsis.h b/unittests/generic/support/pmsis.h new file mode 100644 index 00000000..561b3ddd --- /dev/null +++ b/unittests/generic/support/pmsis.h @@ -0,0 +1,45 @@ +#ifndef PMSIS_H +#define PMSIS_H + +#include + +/* + * typedefs + */ +typedef uint16_t float16alt; + +typedef enum { + PI_CL_DMA_DIR_LOC2EXT = 0, + PI_CL_DMA_DIR_EXT2LOC = 1 +} pi_cl_dma_dir_e; + +typedef struct +{ + uint32_t ext; + uint32_t loc; + uint32_t id; + uint16_t size; + pi_cl_dma_dir_e dir; \ + uint8_t merge; + // 2d transfers args + uint32_t stride; + uint32_t length; +} pi_cl_dma_copy_2d_t; + +/* + * functions + */ +int pi_core_id(void); +void pi_cl_team_fork(int nb_cores, void (*entry)(void *), void *arg); +void pi_cl_team_barrier(void); + +void pi_cl_dma_memcpy_2d(pi_cl_dma_copy_2d_t *copy); +void pi_cl_dma_wait(void *copy); + +void pi_perf_conf(unsigned int events); +void pi_perf_start(void); +void pi_perf_stop(void); +void pi_perf_reset(void); +unsigned int pi_perf_read(int event); + +#endif /* PMSIS_H */ diff --git a/unittests/generic/support/pmsis_stub.c b/unittests/generic/support/pmsis_stub.c new file mode 100644 index 00000000..0b05a26d --- /dev/null +++ b/unittests/generic/support/pmsis_stub.c @@ -0,0 +1,52 @@ +#include "pmsis.h" + +static int core_id = 0; + +int pi_core_id(void) +{ + return core_id; +} + +void pi_cl_team_fork(int nb_cores, void (*entry)(void *), void *arg) +{ + // execute in time instead of parallel + for (core_id=0; core_id Date: Mon, 9 Mar 2026 16:20:53 +0100 Subject: [PATCH 2/4] Added test cases for pulp_conv2d_fp32.c Fixed a few bugs mainly related to bias support Moved project.yml one directory up to support correct gcovr report generation. Updated test pmsis.h with DMA stuff Also fixed bias support for pulp_fp32_linear.c, no test cases yet! --- .gitignore | 1 + lib/sources/pulp_conv2d_fp32.c | 110 +++---- lib/sources/pulp_conv_naive_fp32.c | 5 +- lib/sources/pulp_linear_fp32.c | 24 +- unittests/project.yml => project.yml | 22 +- unittests/README.md | 4 +- unittests/generic/support/pmsis.h | 5 +- unittests/generic/test_pulp_conv2d_fp32.c | 374 ++++++++++++++++++++++ 8 files changed, 462 insertions(+), 83 deletions(-) rename unittests/project.yml => project.yml (97%) create mode 100644 unittests/generic/test_pulp_conv2d_fp32.c diff --git a/.gitignore b/.gitignore index 20d3ea18..396f3c4a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ **/.vscode/ **/__pycache__/ .idea/ +build diff --git a/lib/sources/pulp_conv2d_fp32.c b/lib/sources/pulp_conv2d_fp32.c index 6166d88c..7abb84ce 100644 --- a/lib/sources/pulp_conv2d_fp32.c +++ b/lib/sources/pulp_conv2d_fp32.c @@ -295,10 +295,10 @@ void pulp_conv2d_fp32_bw_param_grads_cl(void *Conv2D_args) { im2col_args.c = C2D_args->coeff; im2col_args.output = C2D_args->output; im2col_args.pBuffer = i2c_buffer; - im2col_args.Lpad = 0; //Lpad; - im2col_args.Rpad = 0; //Rpad; - im2col_args.Upad = 0; //Upad; - im2col_args.Dpad = 0; //Dpad; + im2col_args.Lpad = Lpad; + im2col_args.Rpad = Rpad; + im2col_args.Upad = Upad; + im2col_args.Dpad = Dpad; im2col_args.mod = 0; im2col_args.stride_w = stride_w; im2col_args.stride_h = stride_h; @@ -319,8 +319,16 @@ void pulp_conv2d_fp32_bw_param_grads_cl(void *Conv2D_args) { matMul_args.bias = biasDiff; matMul_args.USE_BIASES = USE_BIASES; - matMul_args.pH = H_out; - matMul_args.pW = W_out; + matMul_args.H = H_in; + matMul_args.W = W_in; + matMul_args.pH = pH; + matMul_args.pW = pW; + matMul_args.Lpad = Lpad; + matMul_args.Rpad = Rpad; + matMul_args.Upad = Upad; + matMul_args.Dpad = Dpad; + matMul_args.stride_h = stride_h; + matMul_args.stride_w = stride_w; matMul_args.bias_dim = bias_dim; @@ -366,7 +374,7 @@ void pulp_conv2d_fp32_bw_param_grads_cl(void *Conv2D_args) { pi_cl_team_fork(NUM_CORES, transpose, &tr_args); - matMul_args.A = tr_buffer; // outDiff; + matMul_args.A = tr_buffer; // outDiff transposed; matMul_args.B = i2c_buffer; matMul_args.C = coeffDiff; matMul_args.N = C_out; @@ -378,8 +386,16 @@ void pulp_conv2d_fp32_bw_param_grads_cl(void *Conv2D_args) { matMul_args.bias = biasDiff; matMul_args.USE_BIASES = USE_BIASES; - matMul_args.pH = H_out; - matMul_args.pW = W_out; + matMul_args.H = H_in; + matMul_args.W = W_in; + matMul_args.pH = pH; + matMul_args.pW = pW; + matMul_args.Lpad = Lpad; + matMul_args.Rpad = Rpad; + matMul_args.Upad = Upad; + matMul_args.Dpad = Dpad; + matMul_args.stride_h = stride_h; + matMul_args.stride_w = stride_w; matMul_args.bias_dim = bias_dim; @@ -691,9 +707,6 @@ void im2col_conv2d_fw_kernel(void *void_args) { uint32_t Upad = args->Upad; uint32_t Dpad = args->Dpad; - // const uint32_t H_out = (H_in - pH + Upad + Dpad) / h_str + 1; - // const uint32_t W_out = (W_in - pW + Lpad + Rpad) / w_str + 1; - const uint32_t H_out = pH; const uint32_t W_out = pW; @@ -747,38 +760,8 @@ void im2col_conv2d_param_grad_kernel(void *void_args) { struct mm_manager_args *man_args = (struct mm_manager_args *) void_args; struct matMul_args *args = man_args->mm_args; - float *__restrict__ inData = args->A; - float *__restrict__ coeffDiff = args->B; - float *__restrict__ outDiff = args->C; - - float *__restrict__ biasDiff = args->bias; const uint32_t USE_BIASES = args->USE_BIASES; - const uint32_t H_in = args->H; - const uint32_t W_in = args->W; - const uint32_t pW = args->pW; - const uint32_t pH = args->pH; - const uint32_t C_in = args->pCin; - const uint32_t C_out = args->N; - - uint32_t h_str = args->stride_h; - uint32_t w_str = args->stride_w; - uint32_t Lpad = args->Lpad; - uint32_t Rpad = args->Rpad; - uint32_t Upad = args->Upad; - uint32_t Dpad = args->Dpad; - - const uint32_t H_out = (H_in - pH + Upad + Dpad) / h_str + 1; - const uint32_t W_out = (W_in - pW + Lpad + Rpad) / w_str + 1; - - const uint32_t blockSize = (C_out + NUM_CORES - 1) / NUM_CORES; - const uint32_t start = pi_core_id() * blockSize; - const uint32_t stop = start + blockSize > C_out ? C_out : start + blockSize; - - const uint32_t HWC = args->HWC; - - int padding = Lpad + Rpad + Upad + Dpad; - // Perform simple matrix multiplication #ifndef OPTIMIZE mm(args); @@ -787,36 +770,41 @@ void im2col_conv2d_param_grad_kernel(void *void_args) { #endif // Handle biases - if (USE_BIASES == 1 && HWC == 0) { - for (uint32_t co = start; co < stop; co++) { - float temp = 0; - for (uint32_t ho = 0; ho < H_out; ho++) { - for (uint32_t wo = 0; wo < W_out; wo++) { - temp += outDiff[wo + ho * H_out + co * H_out * W_out]; - } - } - biasDiff[co] = temp; - } - } + if (USE_BIASES == 1) { + float *__restrict__ outDiff = args->A; + float *__restrict__ biasDiff = args->bias; + + const uint32_t H_in = args->H; + const uint32_t W_in = args->W; + const uint32_t pW = args->pW; + const uint32_t pH = args->pH; + const uint32_t C_out = args->N; + + uint32_t h_str = args->stride_h; + uint32_t w_str = args->stride_w; + uint32_t Lpad = args->Lpad; + uint32_t Rpad = args->Rpad; + uint32_t Upad = args->Upad; + uint32_t Dpad = args->Dpad; + + const uint32_t H_out = (H_in - pH + Upad + Dpad) / h_str + 1; + const uint32_t W_out = (W_in - pW + Lpad + Rpad) / w_str + 1; + + const uint32_t blockSize = (C_out + NUM_CORES - 1) / NUM_CORES; + const uint32_t start = pi_core_id() * blockSize; + const uint32_t stop = start + blockSize > C_out ? C_out : start + blockSize; - else if (USE_BIASES == 1 && HWC == 1) { for (uint32_t co = start; co < stop; co++) { float temp = 0; for (uint32_t ho = 0; ho < H_out; ho++) { for (uint32_t wo = 0; wo < W_out; wo++) { - temp += outDiff[wo * C_out + ho * C_out * W_out + co]; + temp += outDiff[wo + ho * W_out + co * H_out * W_out]; } } biasDiff[co] = temp; } } - if (HWC != 0 && HWC != 1) { - // Unsupported layout - printf("[im2col_conv2d_param_grad_kernel:] Invalid selection of the HWC layout (1 for HWC, 0 for CHW). Actual value: %d. Biases not used, even if provided!\n", - HWC); - } - if (USE_BIASES != 0 && USE_BIASES != 1) { printf("[im2col_conv2d_param_grad_kernel:] Invalid selection of the bias option (1 or 0 - use biases or not). Actual value: %d. Biases not used, even if provided!\n", USE_BIASES); diff --git a/lib/sources/pulp_conv_naive_fp32.c b/lib/sources/pulp_conv_naive_fp32.c index 9930bb16..b7c97e79 100644 --- a/lib/sources/pulp_conv_naive_fp32.c +++ b/lib/sources/pulp_conv_naive_fp32.c @@ -376,17 +376,18 @@ void naive_conv2d_param_grad_kernel_CHW(void *matMul_args) { // Pad conditions int pad_cond_h = h_str * ho + hk - Upad; int pad_cond_w = w_str * wo + wk - Lpad; + int out_idx = wo + ho * W_out + co * H_out * W_out; if ((pad_cond_h >= 0) && (pad_cond_w >= 0) && (pad_cond_h < H_in) && (pad_cond_w < W_in)) { - int out_idx = wo + ho * W_out + co * H_out * W_out; int in_idx = (w_str * wo + wk - Lpad) + (h_str * ho + hk - Upad) * W_in + ci * H_in * W_in; temp += outDiff[out_idx] * inData[in_idx]; - if (USE_BIASES == 1) bias_temp += outDiff[out_idx]; } + + if (USE_BIASES == 1) bias_temp += outDiff[out_idx]; } } coeffDiff[wk + hk * pW + ci * pH * pW + co * pH * pW * C_in] = temp; diff --git a/lib/sources/pulp_linear_fp32.c b/lib/sources/pulp_linear_fp32.c index 69c5b88a..99b5e72b 100644 --- a/lib/sources/pulp_linear_fp32.c +++ b/lib/sources/pulp_linear_fp32.c @@ -217,18 +217,18 @@ void pulp_linear_fp32_fw_cl_kernel( void * man_args ) { mm_manager(manager_args); #endif - // pi_cl_team_barrier(); - - // if (USE_BIASES == 1) { - // const uint32_t outputSize = N * M; - // const uint32_t blockSize = (outputSize + NUM_CORES - 1) / NUM_CORES; - // const uint32_t start = pi_core_id() * blockSize; - // const uint32_t stop = start + blockSize > outputSize ? outputSize : start + blockSize; - - // for (uint32_t i = start; i < stop; i++) { - // outData[i] += biasData[i % M]; - // } - // } + pi_cl_team_barrier(); + + if (USE_BIASES == 1) { + const uint32_t outputSize = N * M; + const uint32_t blockSize = (outputSize + NUM_CORES - 1) / NUM_CORES; + const uint32_t start = pi_core_id() * blockSize; + const uint32_t stop = start + blockSize > outputSize ? outputSize : start + blockSize; + + for (uint32_t i = start; i < stop; i++) { + outData[i] += biasData[i]; + } + } } void pulp_linear_fp32_bw_param_grads_cl_kernel( void * man_args ) diff --git a/unittests/project.yml b/project.yml similarity index 97% rename from unittests/project.yml rename to project.yml index b840aaa1..742001f6 100644 --- a/unittests/project.yml +++ b/project.yml @@ -97,14 +97,14 @@ # see documentation for the many options for specifying this. :paths: :test: - - +:generic/** - - -:generic/support + - +:unittests/generic/** + - -:unittests/generic/support :source: - - ../lib/sources/** + - lib/sources/** :include: - - ../lib/include/** + - lib/include/** :support: - - generic/support + - unittests/generic/support # You can even specify specific files to add or remove from your test # and release collections. Usually it's better to use paths and let @@ -127,6 +127,16 @@ # Enable to inject name of a test as a unique compilation symbol into its respective executable build. :use_test_definition: FALSE +# Enable address sanitizer +#:flags: +# :test: +# :compile: +# - -fsanitize=address +# - -fno-omit-frame-pointer +# - -g +# :link: +# - -fsanitize=address + # Configuration Options specific to CMock. See CMock docs for details :cmock: # Core conffiguration @@ -187,6 +197,7 @@ # Configuration options specific to Unity. :unity: :defines: + :use_param_tests: true # You can optionally have ceedling create environment variables for you before # performing the rest of its tasks. @@ -203,6 +214,7 @@ # These libraries are automatically injected into the build process. Those specified as # common will be used in all types of builds. Otherwise, libraries can be injected in just # tests or releases. These options are MERGED with the options in supplemental yaml files. + :libraries: :placement: :end :flag: "-l${1}" diff --git a/unittests/README.md b/unittests/README.md index 0802323b..86a19e76 100644 --- a/unittests/README.md +++ b/unittests/README.md @@ -18,6 +18,8 @@ pip install gcovr Ceedling takes care of building, running and test coverage report generation. +NOTE: all cmds below must be executed from the root directory of this git repository. + Running all tests: ``` @@ -48,4 +50,4 @@ ceedling test:test_pulp_matmul_fp32 gdb --tui --args build/test/out/test_pulp_matmul_fp32/test_pulp_matmul_fp32.out ``` -Once in the debugger set for example a breakpoint (`b test_pulp_matmul_fp32.c:306`) and hit `r`. +Once in the debugger set for example a breakpoint (`b test_pulp_matmul_fp32_mm_M_u2_transp`) and hit `r`. diff --git a/unittests/generic/support/pmsis.h b/unittests/generic/support/pmsis.h index 561b3ddd..1817807b 100644 --- a/unittests/generic/support/pmsis.h +++ b/unittests/generic/support/pmsis.h @@ -2,8 +2,9 @@ #define PMSIS_H #include +#include -/* +/* * typedefs */ typedef uint16_t float16alt; @@ -17,7 +18,7 @@ typedef struct { uint32_t ext; uint32_t loc; - uint32_t id; + uint32_t id; uint16_t size; pi_cl_dma_dir_e dir; \ uint8_t merge; diff --git a/unittests/generic/test_pulp_conv2d_fp32.c b/unittests/generic/test_pulp_conv2d_fp32.c new file mode 100644 index 00000000..f2fe03cb --- /dev/null +++ b/unittests/generic/test_pulp_conv2d_fp32.c @@ -0,0 +1,374 @@ +#ifdef TEST + +#include +#include +#include "unity.h" + +#include "pmsis.h" +#include "pulp_train_defines.h" +#include "pulp_train_utils_fp32.h" +#include "pulp_conv2d_fp32.h" +#include "pulp_matmul_fp32.h" +#include "pulp_conv_naive_fp32.h" +#include "pulp_im2col_fp32.h" + +#define DELTA 1e-12 + +// known parameters +static float WEIGHTS[] = {0.0f, 0.009999999776482582f, 0.019999999552965164f, 0.009999999776482582f, 0.019999999552965164f, 0.029999999329447746f, 0.019999999552965164f, 0.029999999329447746f, 0.03999999910593033f, 0.009999999776482582f, 0.019999999552965164f, 0.029999999329447746f, 0.019999999552965164f, 0.029999999329447746f, 0.03999999910593033f, 0.029999999329447746f, 0.03999999910593033f, 0.05000000074505806f, 0.019999999552965164f, 0.029999999329447746f, 0.03999999910593033f, 0.029999999329447746f, 0.03999999910593033f, 0.05000000074505806f, 0.03999999910593033f, 0.05000000074505806f, 0.05999999865889549f, 0.009999999776482582f, 0.019999999552965164f, 0.029999999329447746f, 0.019999999552965164f, 0.029999999329447746f, 0.03999999910593033f, 0.029999999329447746f, 0.03999999910593033f, 0.05000000074505806f, 0.019999999552965164f, 0.029999999329447746f, 0.03999999910593033f, 0.029999999329447746f, 0.03999999910593033f, 0.05000000074505806f, 0.03999999910593033f, 0.05000000074505806f, 0.05999999865889549f, 0.029999999329447746f, 0.03999999910593033f, 0.05000000074505806f, 0.03999999910593033f, 0.05000000074505806f, 0.05999999865889549f, 0.05000000074505806f, 0.05999999865889549f, 0.07000000029802322f, 0.019999999552965164f, 0.029999999329447746f, 0.03999999910593033f, 0.029999999329447746f, 0.03999999910593033f, 0.05000000074505806f, 0.03999999910593033f, 0.05000000074505806f, 0.05999999865889549f, 0.029999999329447746f, 0.03999999910593033f, 0.05000000074505806f, 0.03999999910593033f, 0.05000000074505806f, 0.05999999865889549f, 0.05000000074505806f, 0.05999999865889549f, 0.07000000029802322f, 0.03999999910593033f, 0.05000000074505806f, 0.05999999865889549f, 0.05000000074505806f, 0.05999999865889549f, 0.07000000029802322f, 0.05999999865889549f, 0.07000000029802322f, 0.07999999821186066f, 0.029999999329447746f, 0.03999999910593033f, 0.05000000074505806f, 0.03999999910593033f, 0.05000000074505806f, 0.05999999865889549f, 0.05000000074505806f, 0.05999999865889549f, 0.07000000029802322f, 0.03999999910593033f, 0.05000000074505806f, 0.05999999865889549f, 0.05000000074505806f, 0.05999999865889549f, 0.07000000029802322f, 0.05999999865889549f, 0.07000000029802322f, 0.07999999821186066f, 0.05000000074505806f, 0.05999999865889549f, 0.07000000029802322f, 0.05999999865889549f, 0.07000000029802322f, 0.07999999821186066f, 0.07000000029802322f, 0.07999999821186066f, 0.09000000357627869f, }; +static float BIASES[] = {0.0f, 0.009999999776482582f, 0.019999999552965164f, 0.029999999329447746f, }; + +// known input and output gradient data +static float INPUT_HWC[] = {0.0010000000474974513f, 0.0010100000072270632f, 0.0010400000028312206f, 0.0009900000877678394f, 0.0010000000474974513f, 0.0010300000431016088f, 0.0009600000339560211f, 0.0009700000518932939f, 0.0010000000474974513f, 0.000910000060684979f, 0.0009200000204145908f, 0.0009500000742264092f, 0.0010100000072270632f, 0.0010400000028312206f, 0.0010900000343099236f, 0.0010000000474974513f, 0.0010300000431016088f, 0.0010800000745803118f, 0.0009700000518932939f, 0.0010000000474974513f, 0.0010500000789761543f, 0.0009200000204145908f, 0.0009500000742264092f, 0.0010000000474974513f, 0.0010400000028312206f, 0.0010900000343099236f, 0.0011599999852478504f, 0.0010300000431016088f, 0.0010800000745803118f, 0.0011500000255182385f, 0.0010000000474974513f, 0.0010500000789761543f, 0.001120000029914081f, 0.0009500000742264092f, 0.0010000000474974513f, 0.001069999998435378f, 0.0010900000343099236f, 0.0011599999852478504f, 0.0012500000884756446f, 0.0010800000745803118f, 0.0011500000255182385f, 0.0012400000123307109f, 0.0010500000789761543f, 0.001120000029914081f, 0.0012100000167265534f, 0.0010000000474974513f, 0.001069999998435378f, 0.0011599999852478504f, }; +static float INPUT_CHW[] = {0.0010000000474974513f, 0.0009900000877678394f, 0.0009600000339560211f, 0.000910000060684979f, 0.0010100000072270632f, 0.0010000000474974513f, 0.0009700000518932939f, 0.0009200000204145908f, 0.0010400000028312206f, 0.0010300000431016088f, 0.0010000000474974513f, 0.0009500000742264092f, 0.0010900000343099236f, 0.0010800000745803118f, 0.0010500000789761543f, 0.0010000000474974513f, 0.0010100000072270632f, 0.0010000000474974513f, 0.0009700000518932939f, 0.0009200000204145908f, 0.0010400000028312206f, 0.0010300000431016088f, 0.0010000000474974513f, 0.0009500000742264092f, 0.0010900000343099236f, 0.0010800000745803118f, 0.0010500000789761543f, 0.0010000000474974513f, 0.0011599999852478504f, 0.0011500000255182385f, 0.001120000029914081f, 0.001069999998435378f, 0.0010400000028312206f, 0.0010300000431016088f, 0.0010000000474974513f, 0.0009500000742264092f, 0.0010900000343099236f, 0.0010800000745803118f, 0.0010500000789761543f, 0.0010000000474974513f, 0.0011599999852478504f, 0.0011500000255182385f, 0.001120000029914081f, 0.001069999998435378f, 0.0012500000884756446f, 0.0012400000123307109f, 0.0012100000167265534f, 0.0011599999852478504f, }; +static float OUTPUT_GRAD_HWC_PAD0[] = {-0.1248936876654625f, -0.12360870093107224f, -0.12232371419668198f, -0.12103872746229172f, -0.12502218782901764f, -0.12373820692300797f, -0.12245423346757889f, -0.12117025256156921f, -0.12488772720098495f, -0.1236010491847992f, -0.12231437861919403f, -0.12102770060300827f, -0.1250162124633789f, -0.12373055517673492f, -0.12244489043951035f, -0.12115923315286636f, }; +static float OUTPUT_GRAD_CHW_PAD0[] = {-0.1248936876654625f, -0.12502218782901764f, -0.12488772720098495f, -0.1250162124633789f, -0.12360870093107224f, -0.12373820692300797f, -0.1236010491847992f, -0.12373055517673492f, -0.12232371419668198f, -0.12245423346757889f, -0.12231437861919403f, -0.12244489043951035f, -0.12103872746229172f, -0.12117025256156921f, -0.12102770060300827f, -0.12115923315286636f, }; +static float OUTPUT_GRAD_HWC_PAD1[] = {-0.031234506517648697f, -0.03091815672814846f, -0.030601806938648224f, -0.030285457149147987f, -0.03126119077205658f, -0.03094298020005226f, -0.030624769628047943f, -0.030306560918688774f, -0.031293101608753204f, -0.030975062400102615f, -0.030657021328806877f, -0.03033898025751114f, -0.031332820653915405f, -0.031016694381833076f, -0.030700569972395897f, -0.030384443700313568f, -0.03122907504439354f, -0.030910678207874298f, -0.030592281371355057f, -0.030273884534835815f, -0.03125467151403427f, -0.030933426693081856f, -0.03061218000948429f, -0.030290933325886726f, -0.03128679469227791f, -0.03096579946577549f, -0.03064480610191822f, -0.030323810875415802f, -0.031329624354839325f, -0.031011562794446945f, -0.030693503096699715f, -0.030375445261597633f, -0.031227940693497658f, -0.030909262597560883f, -0.030590584501624107f, -0.030271906405687332f, -0.031253181397914886f, -0.030931513756513596f, -0.030609846115112305f, -0.030288176611065865f, -0.031285300850868225f, -0.03096388652920723f, -0.030642470344901085f, -0.03032105602324009f, -0.03132877126336098f, -0.031010428443551064f, -0.0306920874863863f, -0.030373748391866684f, -0.03123709373176098f, -0.030920369550585747f, -0.030603643506765366f, -0.030286919325590134f, -0.03126528486609459f, -0.030946513637900352f, -0.030627742409706116f, -0.03030896931886673f, -0.03129703179001808f, -0.030978428199887276f, -0.03065982460975647f, -0.030341221019625664f, -0.031335558742284775f, -0.0310190562158823f, -0.030702557414770126f, -0.030386056751012802f, }; +static float OUTPUT_GRAD_CHW_PAD1[] = {-0.031234506517648697f, -0.03126119077205658f, -0.031293101608753204f, -0.031332820653915405f, -0.03122907504439354f, -0.03125467151403427f, -0.03128679469227791f, -0.031329624354839325f, -0.031227940693497658f, -0.031253181397914886f, -0.031285300850868225f, -0.03132877126336098f, -0.03123709373176098f, -0.03126528486609459f, -0.03129703179001808f, -0.031335558742284775f, -0.03091815672814846f, -0.03094298020005226f, -0.030975062400102615f, -0.031016694381833076f, -0.030910678207874298f, -0.030933426693081856f, -0.03096579946577549f, -0.031011562794446945f, -0.030909262597560883f, -0.030931513756513596f, -0.03096388652920723f, -0.031010428443551064f, -0.030920369550585747f, -0.030946513637900352f, -0.030978428199887276f, -0.0310190562158823f, -0.030601806938648224f, -0.030624769628047943f, -0.030657021328806877f, -0.030700569972395897f, -0.030592281371355057f, -0.03061218000948429f, -0.03064480610191822f, -0.030693503096699715f, -0.030590584501624107f, -0.030609846115112305f, -0.030642470344901085f, -0.0306920874863863f, -0.030603643506765366f, -0.030627742409706116f, -0.03065982460975647f, -0.030702557414770126f, -0.030285457149147987f, -0.030306560918688774f, -0.03033898025751114f, -0.030384443700313568f, -0.030273884534835815f, -0.030290933325886726f, -0.030323810875415802f, -0.030375445261597633f, -0.030271906405687332f, -0.030288176611065865f, -0.03032105602324009f, -0.030373748391866684f, -0.030286919325590134f, -0.03030896931886673f, -0.030341221019625664f, -0.030386056751012802f, }; + +// expected output and expected weight/bias/input gradients +static float EXPECTED_OUTPUT_HWC_PAD0[] = {0.0008505000150762498f, 0.011130400002002716f, 0.021410299465060234f, 0.03169019892811775f, 0.0008226000354625285f, 0.011094399727880955f, 0.021366199478507042f, 0.031638000160455704f, 0.0008981999708339572f, 0.011191600002348423f, 0.021484998986124992f, 0.031778398901224136f, 0.0008703000494278967f, 0.011155599728226662f, 0.0214408989995718f, 0.03172620013356209f, }; +static float EXPECTED_OUTPUT_CHW_PAD0[] = {0.0008505000150762498f, 0.0008226000354625285f, 0.0008981999708339572f, 0.0008703000494278967f, 0.011130400002002716f, 0.011094399727880955f, 0.011191600002348423f, 0.011155599728226662f, 0.021410299465060234f, 0.021366199478507042f, 0.021484998986124992f, 0.0214408989995718f, 0.03169019892811775f, 0.031638000160455704f, 0.031778398901224136f, 0.03172620013356209f, }; +static float EXPECTED_OUTPUT_HWC_PAD1[] = {0.0004958000499755144f, 0.010618999600410461f, 0.020742200314998627f, 0.030865399166941643f, 0.0006419999990612268f, 0.010824699886143208f, 0.021007400006055832f, 0.03119009919464588f, 0.0006207000697031617f, 0.010797999799251556f, 0.020975299179553986f, 0.031152598559856415f, 0.0003498000151012093f, 0.010465799830853939f, 0.020581800490617752f, 0.03069780021905899f, 0.0006695999763906002f, 0.010858300141990185f, 0.021046999841928482f, 0.031235698610544205f, 0.0008505000732839108f, 0.011130400002002716f, 0.021410299465060234f, 0.03169019892811775f, 0.0008226001518778503f, 0.011094399727880955f, 0.021366199478507042f, 0.031638000160455704f, 0.00045210003736428916f, 0.010629999451339245f, 0.0208078995347023f, 0.030985798686742783f, 0.0007059000199660659f, 0.010903599672019482f, 0.021101299673318863f, 0.03129899874329567f, 0.000898200087249279f, 0.011191600002348423f, 0.021484998986124992f, 0.031778398901224136f, 0.0008702999912202358f, 0.011155599728226662f, 0.0214408989995718f, 0.03172620013356209f, 0.0004794000124093145f, 0.010666299611330032f, 0.020853199064731598f, 0.031040098518133163f, 0.0004130000015720725f, 0.010548199526965618f, 0.020683400332927704f, 0.030818600207567215f, 0.0005108999903313816f, 0.010711600072681904f, 0.020912298932671547f, 0.031112998723983765f, 0.0004949999856762588f, 0.010690299794077873f, 0.02088559977710247f, 0.031080899760127068f, 0.0002621999883558601f, 0.010390199720859528f, 0.020518198609352112f, 0.030646199360489845f, }; +static float EXPECTED_OUTPUT_CHW_PAD1[] = {0.0004958000499755144f, 0.0006419999990612268f, 0.0006207000697031617f, 0.0003498000151012093f, 0.0006695999763906002f, 0.0008505000732839108f, 0.0008226001518778503f, 0.00045210003736428916f, 0.0007059000199660659f, 0.000898200087249279f, 0.0008702999912202358f, 0.0004794000124093145f, 0.0004130000015720725f, 0.0005108999903313816f, 0.0004949999856762588f, 0.0002621999883558601f, 0.010618999600410461f, 0.010824699886143208f, 0.010797999799251556f, 0.010465799830853939f, 0.010858300141990185f, 0.011130400002002716f, 0.011094399727880955f, 0.010629999451339245f, 0.010903599672019482f, 0.011191600002348423f, 0.011155599728226662f, 0.010666299611330032f, 0.010548199526965618f, 0.010711600072681904f, 0.010690299794077873f, 0.010390199720859528f, 0.020742200314998627f, 0.021007400006055832f, 0.020975299179553986f, 0.020581800490617752f, 0.021046999841928482f, 0.021410299465060234f, 0.021366199478507042f, 0.0208078995347023f, 0.021101299673318863f, 0.021484998986124992f, 0.0214408989995718f, 0.020853199064731598f, 0.020683400332927704f, 0.020912298932671547f, 0.02088559977710247f, 0.020518198609352112f, 0.030865399166941643f, 0.03119009919464588f, 0.031152598559856415f, 0.03069780021905899f, 0.031235698610544205f, 0.03169019892811775f, 0.031638000160455704f, 0.030985798686742783f, 0.03129899874329567f, 0.031778398901224136f, 0.03172620013356209f, 0.031040098518133163f, 0.030818600207567215f, 0.031112998723983765f, 0.031080899760127068f, 0.030646199360489845f, }; +static float EXPECTED_WEIGHT_GRAD_HWC_PAD0[] = {-0.0004998184740543365f, -0.0005098147667013109f, -0.0005298074684105814f, -0.0004898195038549602f, -0.0004998157965019345f, -0.000519808498211205f, -0.00046982415369711816f, -0.00047982041724026203f, -0.0004998131189495325f, -0.0005098147667013109f, -0.0005298074684105814f, -0.0005597965209744871f, -0.0004998157965019345f, -0.000519808498211205f, -0.0005497975507751107f, -0.00047982041724026203f, -0.0004998131189495325f, -0.0005298021715134382f, -0.0005298074684105814f, -0.0005597965209744871f, -0.0005997819826006889f, -0.000519808498211205f, -0.0005497975507751107f, -0.0005897830124013126f, -0.0004998131189495325f, -0.0005298021715134382f, -0.0005697876331396401f, -0.0004946771659888327f, -0.0005045706056989729f, -0.0005243575433269143f, -0.00048478099051862955f, -0.0004946744302287698f, -0.0005144614260643721f, -0.0004649912880267948f, -0.0004748846695292741f, -0.0004946716944687068f, -0.0005045706056989729f, -0.0005243575433269143f, -0.0005540381534956396f, -0.0004946744302287698f, -0.0005144614260643721f, -0.0005441419780254364f, -0.0004748846695292741f, -0.0004946716944687068f, -0.0005243522464297712f, -0.0005243575433269143f, -0.0005540381534956396f, -0.000593612261582166f, -0.0005144614260643721f, -0.0005441419780254364f, -0.0005837160861119628f, -0.0004946716944687068f, -0.0005243522464297712f, -0.0005639263545162976f, -0.0004895358579233289f, -0.0004993263864889741f, -0.0005189076764509082f, -0.0004797424771822989f, -0.000489533063955605f, -0.0005091143539175391f, -0.00046015839325264096f, -0.00046994895092211664f, -0.0004895302699878812f, -0.0004993263864889741f, -0.0005189076764509082f, -0.0005482797278091311f, -0.000489533063955605f, -0.0005091143539175391f, -0.0005384864052757621f, -0.00046994895092211664f, -0.0004895302699878812f, -0.0005189022631384432f, -0.0005189076764509082f, -0.0005482797278091311f, -0.000587442540563643f, -0.0005091143539175391f, -0.0005384864052757621f, -0.0005776492180302739f, -0.0004895302699878812f, -0.0005189022631384432f, -0.0005580650758929551f, -0.0004843945207539946f, -0.0004940821672789752f, -0.0005134578095749021f, -0.00047470396384596825f, -0.0004843916685786098f, -0.0005037672817707062f, -0.00045532549847848713f, -0.0004650132032111287f, -0.00048438881640322506f, -0.0004940821672789752f, -0.0005134578095749021f, -0.0005425213603302836f, -0.0004843916685786098f, -0.0005037672817707062f, -0.0005328307743184268f, -0.0004650132032111287f, -0.00048438881640322506f, -0.0005134523380547762f, -0.0005134578095749021f, -0.0005425213603302836f, -0.00058127281954512f, -0.0005037672817707062f, -0.0005328307743184268f, -0.0005715822335332632f, -0.00048438881640322506f, -0.0005134523380547762f, -0.0005522037972696126f, }; +static float EXPECTED_WEIGHT_GRAD_CHW_PAD0[] = {-0.0004998184740543365f, -0.0004898195038549602f, -0.00046982415369711816f, -0.0005098147667013109f, -0.0004998157965019345f, -0.00047982041724026203f, -0.0005298074684105814f, -0.000519808498211205f, -0.0004998131189495325f, -0.0005098147667013109f, -0.0004998157965019345f, -0.00047982041724026203f, -0.0005298074684105814f, -0.000519808498211205f, -0.0004998131189495325f, -0.0005597965209744871f, -0.0005497975507751107f, -0.0005298021715134382f, -0.0005298074684105814f, -0.000519808498211205f, -0.0004998131189495325f, -0.0005597965209744871f, -0.0005497975507751107f, -0.0005298021715134382f, -0.0005997819826006889f, -0.0005897830124013126f, -0.0005697876331396401f, -0.0004946771659888327f, -0.00048478099051862955f, -0.0004649912880267948f, -0.0005045706056989729f, -0.0004946744302287698f, -0.0004748846695292741f, -0.0005243575433269143f, -0.0005144614260643721f, -0.0004946716944687068f, -0.0005045706056989729f, -0.0004946744302287698f, -0.0004748846695292741f, -0.0005243575433269143f, -0.0005144614260643721f, -0.0004946716944687068f, -0.0005540381534956396f, -0.0005441419780254364f, -0.0005243522464297712f, -0.0005243575433269143f, -0.0005144614260643721f, -0.0004946716944687068f, -0.0005540381534956396f, -0.0005441419780254364f, -0.0005243522464297712f, -0.000593612261582166f, -0.0005837160861119628f, -0.0005639263545162976f, -0.0004895358579233289f, -0.0004797424771822989f, -0.00046015839325264096f, -0.0004993263864889741f, -0.000489533063955605f, -0.00046994895092211664f, -0.0005189076764509082f, -0.0005091143539175391f, -0.0004895302699878812f, -0.0004993263864889741f, -0.000489533063955605f, -0.00046994895092211664f, -0.0005189076764509082f, -0.0005091143539175391f, -0.0004895302699878812f, -0.0005482797278091311f, -0.0005384864052757621f, -0.0005189022631384432f, -0.0005189076764509082f, -0.0005091143539175391f, -0.0004895302699878812f, -0.0005482797278091311f, -0.0005384864052757621f, -0.0005189022631384432f, -0.000587442540563643f, -0.0005776492180302739f, -0.0005580650758929551f, -0.0004843945207539946f, -0.00047470396384596825f, -0.00045532549847848713f, -0.0004940821672789752f, -0.0004843916685786098f, -0.0004650132032111287f, -0.0005134578095749021f, -0.0005037672817707062f, -0.00048438881640322506f, -0.0004940821672789752f, -0.0004843916685786098f, -0.0004650132032111287f, -0.0005134578095749021f, -0.0005037672817707062f, -0.00048438881640322506f, -0.0005425213603302836f, -0.0005328307743184268f, -0.0005134523380547762f, -0.0005134578095749021f, -0.0005037672817707062f, -0.00048438881640322506f, -0.0005425213603302836f, -0.0005328307743184268f, -0.0005134523380547762f, -0.00058127281954512f, -0.0005715822335332632f, -0.0005522037972696126f, }; +static float EXPECTED_WEIGHT_GRAD_HWC_PAD1[] = {-0.00028163238312117755f, -0.0002900820109061897f, -0.00030416433583013713f, -0.00036843473208136857f, -0.0003796953533310443f, -0.00039846255094744265f, -0.00027288994169794023f, -0.0002813306055031717f, -0.0002953980292659253f, -0.0003824028535746038f, -0.00039742409717291594f, -0.00041995575884357095f, -0.0005004326812922955f, -0.0005204511107876897f, -0.0005504785222001374f, -0.0003707402211148292f, -0.00038574551581405103f, -0.0004082533123437315f, -0.0002900689432863146f, -0.00030414978391490877f, -0.00032386320526711643f, -0.00037967925891280174f, -0.00039844459388405085f, -0.0004247162432875484f, -0.00028131791623309255f, -0.0002953837683890015f, -0.000315076089464128f, -0.00027875672094523907f, -0.0002871202304959297f, -0.00030105889891274273f, -0.000364675564924255f, -0.0003758214006666094f, -0.00039439729880541563f, -0.00027010016492567956f, -0.0002784546813927591f, -0.0002923783322330564f, -0.00037850451190024614f, -0.0003933725238312036f, -0.00041567449807189405f, -0.0004953343886882067f, -0.0005151488003320992f, -0.0005448703304864466f, -0.00036695622839033604f, -0.0003818082623183727f, -0.0004040862841065973f, -0.0002871080650947988f, -0.00030104504548944533f, -0.0003205570683348924f, -0.0003758064704015851f, -0.00039438018575310707f, -0.0004203836724627763f, -0.0002784428361337632f, -0.0002923647698480636f, -0.00031185566331259906f, -0.00027588108787313104f, -0.00028415845008566976f, -0.0002979534910991788f, -0.000360916368663311f, -0.0003719475062098354f, -0.0003903320466633886f, -0.00026731035904958844f, -0.00027557872817851603f, -0.00028935872251167893f, -0.00037460619932971895f, -0.0003893210960086435f, -0.00041139329550787807f, -0.0004902360960841179f, -0.0005098464898765087f, -0.000539262022357434f, -0.000363172177458182f, -0.0003778710088226944f, -0.00039991919766180217f, -0.0002841472451109439f, -0.0002979403070639819f, -0.0003172509605064988f, -0.000371933652786538f, -0.0003903157194145024f, -0.0004160511016380042f, -0.0002755677269306034f, -0.0002893457130994648f, -0.00030863520805723965f, -0.00027300542569719255f, -0.00028119664057157934f, -0.0002948480541817844f, -0.0003571571141947061f, -0.0003680735535454005f, -0.00038626688183285296f, -0.0002645205822773278f, -0.000272702774964273f, -0.000286339083686471f, -0.00037070788675919175f, -0.00038526958087459207f, -0.0004071120056323707f, -0.00048513777437619865f, -0.0005045442376285791f, -0.0005336537724360824f, -0.0003593881556298584f, -0.0003739337553270161f, -0.0003957521403208375f, -0.0002811863669194281f, -0.00029483556863851845f, -0.00031394485267810524f, -0.0003680608351714909f, -0.0003862513112835586f, -0.00041171853081323206f, -0.00027269264683127403f, -0.0002863266854546964f, -0.0003054147236980498f, }; +static float EXPECTED_WEIGHT_GRAD_CHW_PAD1[] = {-0.00028163238312117755f, -0.00036843473208136857f, -0.00027288994169794023f, -0.0003824028535746038f, -0.0005004326812922955f, -0.0003707402211148292f, -0.0002900689432863146f, -0.00037967925891280174f, -0.00028131791623309255f, -0.0002900820109061897f, -0.0003796953533310443f, -0.0002813306055031717f, -0.00039742409717291594f, -0.0005204511107876897f, -0.00038574551581405103f, -0.00030414978391490877f, -0.00039844459388405085f, -0.0002953837683890015f, -0.00030416433583013713f, -0.00039846255094744265f, -0.0002953980292659253f, -0.00041995575884357095f, -0.0005504785222001374f, -0.0004082533123437315f, -0.00032386320526711643f, -0.0004247162432875484f, -0.000315076089464128f, -0.00027875672094523907f, -0.000364675564924255f, -0.00027010016492567956f, -0.00037850451190024614f, -0.0004953343886882067f, -0.00036695622839033604f, -0.0002871080650947988f, -0.0003758064704015851f, -0.0002784428361337632f, -0.0002871202304959297f, -0.0003758214006666094f, -0.0002784546813927591f, -0.0003933725238312036f, -0.0005151488003320992f, -0.0003818082623183727f, -0.00030104504548944533f, -0.00039438018575310707f, -0.0002923647698480636f, -0.00030105889891274273f, -0.00039439729880541563f, -0.0002923783322330564f, -0.00041567449807189405f, -0.0005448703304864466f, -0.0004040862841065973f, -0.0003205570683348924f, -0.0004203836724627763f, -0.00031185566331259906f, -0.00027588108787313104f, -0.000360916368663311f, -0.00026731035904958844f, -0.00037460619932971895f, -0.0004902360960841179f, -0.000363172177458182f, -0.0002841472451109439f, -0.000371933652786538f, -0.0002755677269306034f, -0.00028415845008566976f, -0.0003719475062098354f, -0.00027557872817851603f, -0.0003893210960086435f, -0.0005098464898765087f, -0.0003778710088226944f, -0.0002979403070639819f, -0.0003903157194145024f, -0.0002893457130994648f, -0.0002979534910991788f, -0.0003903320466633886f, -0.00028935872251167893f, -0.00041139329550787807f, -0.000539262022357434f, -0.00039991919766180217f, -0.0003172509605064988f, -0.0004160511016380042f, -0.00030863520805723965f, -0.00027300542569719255f, -0.0003571571141947061f, -0.0002645205822773278f, -0.00037070788675919175f, -0.00048513777437619865f, -0.0003593881556298584f, -0.0002811863669194281f, -0.0003680608351714909f, -0.00027269264683127403f, -0.00028119664057157934f, -0.0003680735535454005f, -0.000272702774964273f, -0.00038526958087459207f, -0.0005045442376285791f, -0.0003739337553270161f, -0.00029483556863851845f, -0.0003862513112835586f, -0.0002863266854546964f, -0.0002948480541817844f, -0.00038626688183285296f, -0.000286339083686471f, -0.0004071120056323707f, -0.0005336537724360824f, -0.0003957521403208375f, -0.00031394485267810524f, -0.00041171853081323206f, -0.0003054147236980498f, }; +static float EXPECTED_BIAS_GRAD_PAD0[] = {-0.499819815158844f, -0.4946784973144531f, -0.48953720927238464f, -0.48439592123031616f, }; +static float EXPECTED_BIAS_GRAD_PAD1[] = {-0.5004519820213318f, -0.4953538179397583f, -0.4902556836605072f, -0.4851575791835785f, }; +static float EXPECTED_INPUT_GRAD_HWC_PAD0[] = {-0.0073137227445840836f, -0.012232370674610138f, -0.01715102046728134f, -0.01955394446849823f, -0.02939644455909729f, -0.03923893719911575f, -0.02939644455909729f, -0.03923893719911575f, -0.04908143728971481f, -0.017169270664453506f, -0.022093120962381363f, -0.02701696939766407f, -0.019545499235391617f, -0.029382457956671715f, -0.039219412952661514f, -0.04894886165857315f, -0.06863316893577576f, -0.08831749111413956f, -0.06863316893577576f, -0.08831749111413956f, -0.10800180584192276f, -0.039261117577552795f, -0.04910847544670105f, -0.058955833315849304f, -0.029382457956671715f, -0.039219412952661514f, -0.04905637353658676f, -0.06863316893577576f, -0.08831749111413956f, -0.10800180584192276f, -0.08831749111413956f, -0.10800180584192276f, -0.12768611311912537f, -0.04910847544670105f, -0.058955833315849304f, -0.06880319118499756f, -0.017149746417999268f, -0.022068055346608162f, -0.026986364275217056f, -0.039236053824424744f, -0.049077872186899185f, -0.05891969054937363f, -0.049077872186899185f, -0.05891969054937363f, -0.06876150518655777f, -0.027015015482902527f, -0.031938523054122925f, -0.03686203435063362f, }; +static float EXPECTED_INPUT_GRAD_CHW_PAD0[] = {-0.0073137227445840836f, -0.01955394446849823f, -0.02939644455909729f, -0.017169270664453506f, -0.019545499235391617f, -0.04894886165857315f, -0.06863316893577576f, -0.039261117577552795f, -0.029382457956671715f, -0.06863316893577576f, -0.08831749111413956f, -0.04910847544670105f, -0.017149746417999268f, -0.039236053824424744f, -0.049077872186899185f, -0.027015015482902527f, -0.012232370674610138f, -0.02939644455909729f, -0.03923893719911575f, -0.022093120962381363f, -0.029382457956671715f, -0.06863316893577576f, -0.08831749111413956f, -0.04910847544670105f, -0.039219412952661514f, -0.08831749111413956f, -0.10800180584192276f, -0.058955833315849304f, -0.022068055346608162f, -0.049077872186899185f, -0.05891969054937363f, -0.031938523054122925f, -0.01715102046728134f, -0.03923893719911575f, -0.04908143728971481f, -0.02701696939766407f, -0.039219412952661514f, -0.08831749111413956f, -0.10800180584192276f, -0.058955833315849304f, -0.04905637353658676f, -0.10800180584192276f, -0.12768611311912537f, -0.06880319118499756f, -0.026986364275217056f, -0.05891969054937363f, -0.06876150518655777f, -0.03686203435063362f, }; +static float EXPECTED_INPUT_GRAD_HWC_PAD1[] = {-0.012242590077221394f, -0.01716531626880169f, -0.02208804152905941f, -0.02206328697502613f, -0.02945086546242237f, -0.03683844581246376f, -0.02208544686436653f, -0.029481014236807823f, -0.036876581609249115f, -0.01720142923295498f, -0.022134728729724884f, -0.02706803008913994f, -0.022054532542824745f, -0.02943808026611805f, -0.0368216335773468f, -0.03863302618265152f, -0.049713555723428726f, -0.06079408526420593f, -0.038672227412462234f, -0.04976480081677437f, -0.0608573742210865f, -0.029500313103199005f, -0.03689979016780853f, -0.04429926723241806f, -0.022053170949220657f, -0.029436931014060974f, -0.03682069107890129f, -0.03863108903169632f, -0.04971195012331009f, -0.060792818665504456f, -0.03867059573531151f, -0.0497635118663311f, -0.060856424272060394f, -0.029499433934688568f, -0.03689911961555481f, -0.04429881274700165f, -0.017164483666419983f, -0.022087272256612778f, -0.027010060846805573f, -0.029449626803398132f, -0.03683731332421303f, -0.04422499239444733f, -0.0294799767434597f, -0.036875661462545395f, -0.04427134618163109f, -0.022134138271212578f, -0.027067512273788452f, -0.032000888139009476f, }; +static float EXPECTED_INPUT_GRAD_CHW_PAD1[] = {-0.012242590077221394f, -0.02206328697502613f, -0.02208544686436653f, -0.01720142923295498f, -0.022054532542824745f, -0.03863302618265152f, -0.038672227412462234f, -0.029500313103199005f, -0.022053170949220657f, -0.03863108903169632f, -0.03867059573531151f, -0.029499433934688568f, -0.017164483666419983f, -0.029449626803398132f, -0.0294799767434597f, -0.022134138271212578f, -0.01716531626880169f, -0.02945086546242237f, -0.029481014236807823f, -0.022134728729724884f, -0.02943808026611805f, -0.049713555723428726f, -0.04976480081677437f, -0.03689979016780853f, -0.029436931014060974f, -0.04971195012331009f, -0.0497635118663311f, -0.03689911961555481f, -0.022087272256612778f, -0.03683731332421303f, -0.036875661462545395f, -0.027067512273788452f, -0.02208804152905941f, -0.03683844581246376f, -0.036876581609249115f, -0.02706803008913994f, -0.0368216335773468f, -0.06079408526420593f, -0.0608573742210865f, -0.04429926723241806f, -0.03682069107890129f, -0.060792818665504456f, -0.060856424272060394f, -0.04429881274700165f, -0.027010060846805573f, -0.04422499239444733f, -0.04427134618163109f, -0.032000888139009476f, }; + + +struct TestVector +{ + struct blob in; // in + expected in grad + struct blob out; // out_grad + expected out + struct blob weight; // weight + expected weight grad + struct blob bias; // bias + expected bias grad +}; + +static struct TestVector test_vectors[] = { + { // HWC = 0, PAD = 0 + .in = { + .data = INPUT_CHW, + .diff = EXPECTED_INPUT_GRAD_CHW_PAD0, + .H = 4, .W = 4, .C = 3 + }, + .out = { + .diff = OUTPUT_GRAD_CHW_PAD0, + .data = EXPECTED_OUTPUT_CHW_PAD0, + .H = 2, .W = 2, .C = 4 + }, + .weight = { + .data = WEIGHTS, + .diff = EXPECTED_WEIGHT_GRAD_CHW_PAD0, + .H = 3, .W = 3, .C = 3 + }, + .bias = { + .data = BIASES, + .diff = EXPECTED_BIAS_GRAD_PAD0, + .dim = 4 + } + }, + { // HWC = 1, PAD = 0 + .in = { + .data = INPUT_HWC, + .diff = EXPECTED_INPUT_GRAD_HWC_PAD0, + .H = 4, .W = 4, .C = 3 + }, + .out = { + .diff = OUTPUT_GRAD_HWC_PAD0, + .data = EXPECTED_OUTPUT_HWC_PAD0, + .H = 2, .W = 2, .C = 4 + }, + .weight = { + .data = WEIGHTS, + .diff = EXPECTED_WEIGHT_GRAD_HWC_PAD0, + .H = 3, .W = 3, .C = 3 + }, + .bias = { + .data = BIASES, + .diff = EXPECTED_BIAS_GRAD_PAD0, + .dim = 4 + } + }, + { // HWC = 0, PAD = 1 + .in = { + .data = INPUT_CHW, + .diff = EXPECTED_INPUT_GRAD_CHW_PAD1, + .H = 4, .W = 4, .C = 3 + }, + .out = { + .diff = OUTPUT_GRAD_CHW_PAD1, + .data = EXPECTED_OUTPUT_CHW_PAD1, + .H = 4, .W = 4, .C = 4 + }, + .weight = { + .data = WEIGHTS, + .diff = EXPECTED_WEIGHT_GRAD_CHW_PAD1, + .H = 3, .W = 3, .C = 3 + }, + .bias = { + .data = BIASES, + .diff = EXPECTED_BIAS_GRAD_PAD1, + .dim = 4 + } + }, + { // HWC = 1, PAD = 1 + .in = { + .data = INPUT_HWC, + .diff = EXPECTED_INPUT_GRAD_HWC_PAD1, + .H = 4, .W = 4, .C = 3 + }, + .out = { + .diff = OUTPUT_GRAD_HWC_PAD1, + .data = EXPECTED_OUTPUT_HWC_PAD1, + .H = 4, .W = 4, .C = 4 + }, + .weight = { + .data = WEIGHTS, + .diff = EXPECTED_WEIGHT_GRAD_HWC_PAD1, + .H = 3, .W = 3, .C = 3 + }, + .bias = { + .data = BIASES, + .diff = EXPECTED_BIAS_GRAD_PAD1, + .dim = 4 + } + } +}; + +void set_array(float *array, size_t size, float value) +{ + for (int i = 0; i < size; i++) { + array[i] = value; + } +} + +// create a deep copy of a test vector +void copy_test_vector(const struct TestVector *src, struct TestVector* dst) +{ + *dst = *src; + + dst->in.data = malloc(src->in.dim * sizeof(float)); + dst->in.diff = malloc(src->in.dim * sizeof(float)); + dst->out.data = malloc(src->out.dim * sizeof(float)); + dst->out.diff = malloc(src->out.dim * sizeof(float)); + dst->weight.data = malloc(src->weight.dim * sizeof(float)); + dst->weight.diff = malloc(src->weight.dim * sizeof(float)); + dst->bias.data = malloc(src->bias.dim * sizeof(float)); + dst->bias.diff = malloc(src->bias.dim * sizeof(float)); + memcpy(dst->in.data, src->in.data, src->in.dim * sizeof(float)); + memcpy(dst->in.diff, src->in.diff, src->in.dim * sizeof(float)); + memcpy(dst->out.diff, src->out.diff, src->out.dim * sizeof(float)); + memcpy(dst->out.data, src->out.data, src->out.dim * sizeof(float)); + memcpy(dst->weight.data, src->weight.data, src->weight.dim * sizeof(float)); + memcpy(dst->weight.diff, src->weight.diff, src->weight.dim * sizeof(float)); + memcpy(dst->bias.data, src->bias.data, src->bias.dim * sizeof(float)); + memcpy(dst->bias.diff, src->bias.diff, src->bias.dim * sizeof(float)); +} + +// free a copied test vector +void free_test_vector(struct TestVector *v) +{ + free(v->in.data); + free(v->in.diff); + free(v->out.data); + free(v->out.diff); + free(v->weight.data); + free(v->weight.diff); + free(v->bias.data); + free(v->bias.diff); +} + +void create_test_vectors(int hwc, int use_im2col, int padding, struct Conv2D_args* args, struct TestVector* expected) +{ + int idx = hwc + 2*padding; + static struct TestVector a; + struct TestVector v = test_vectors[idx]; + + // populate .dim fields + v.in.dim = v.in.C * v.in.H * v.in.W; + v.out.dim = v.out.C * v.out.H * v.out.W; + v.weight.dim = v.weight.C * v.weight.H * v.weight.W * v.out.C; + + // create two deep copies of the test vector so that we don't overwrite the + // original one. One copy to populate args and one as expected values + copy_test_vector(&v, expected); + copy_test_vector(&v, &a); + + // set some buffers to zero for the argument blobs + set_array(a.in.diff, a.in.dim, 0.0); + set_array(a.out.data, a.out.dim, 0.0); + set_array(a.weight.diff, a.weight.dim, 0.0); + set_array(a.bias.diff, a.bias.dim, 0.0); + + // allocate workspace buffers in case needed + int im2col_buffer_size = v.in.dim * v.out.C * v.weight.W * v.weight.H; + int bt_buffer_size = (v.weight.H * v.weight.W * v.weight.C * v.out.C) > + (v.out.H * v.out.W * v.out.C) ? + (v.weight.H * v.weight.W * v.weight.C * v.out.C) : + (v.out.H * v.out.W * v.out.C); + float *im2col_buffer = calloc(im2col_buffer_size, sizeof(float)); + float *bt_buffer = calloc(bt_buffer_size, sizeof(float)); + + // potulate conv2d parameter struct + args->input = &a.in; + args->coeff = &a.weight; + args->bias = &a.bias; + args->output = &a.out; + args->Lpad = padding; + args->Rpad = padding; + args->Upad = padding; + args->Dpad = padding; + args->stride_h = 1; + args->stride_w = 1; + args->i2c_buffer = im2col_buffer; + args->bt_buffer = bt_buffer; + args->skip_wg_grad = 0; + args->skip_in_grad = 0; + args->HWC = hwc; + args->opt_matmul_type_fw = 0; + args->opt_matmul_type_wg = 0; + args->opt_matmul_type_ig = 0; + args->USE_IM2COL = use_im2col; + args->USE_DMA_IM2COL = 0; + args->USE_BIASES = 1; +} + +void free_test_vectors(struct Conv2D_args* args, struct TestVector* expected) +{ + free(args->input->data); + free(args->input->diff); + free(args->output->data); + free(args->output->diff); + free(args->coeff->data); + free(args->coeff->diff); + free(args->bias->data); + free(args->bias->diff); + free(args->i2c_buffer); + free(args->bt_buffer); + free_test_vector(expected); +} + +// called before each test +void setUp(void) +{ +} + +// called after each test +void tearDown(void) +{ +} + +TEST_CASE(1, 1, 0) // HWC, im2col, no padding +TEST_CASE(0, 1, 0) // CHW, im2col, no padding +TEST_CASE(0, 0, 0) // CHW, naive, no padding +TEST_CASE(1, 0, 0) // HWC, naive, no padding +//TEST_CASE(1, 1, 1) // HWC, im2col, same padding --> fails: Padding not implemented for HWC im2col without DMA! +TEST_CASE(0, 1, 1) // CHW, im2col, same padding +TEST_CASE(0, 0, 1) // CHW, naive, same padding +TEST_CASE(1, 0, 1) // HWC, naive, same padding +void test_pulp_conv2d_fp32_fw_cl(int hwc, int use_im2col, int padding) +{ + struct Conv2D_args args; + struct TestVector expected; + create_test_vectors(hwc, use_im2col, padding, &args, &expected); + + // for the naive HWC case, we haven't implemented it yet so expect zeros + if (hwc == 1 && use_im2col == 0) { + set_array(expected.out.data, expected.out.dim, 0.0); + } + + pulp_conv2d_fp32_fw_cl(&args); + TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.out.data, args.output->data, args.output->dim); + + free_test_vectors(&args, &expected); +} + +TEST_CASE(1, 1, 0) // HWC, im2col, no padding +TEST_CASE(0, 1, 0) // CHW, im2col, no padding +TEST_CASE(0, 0, 0) // CHW, naive, no padding +TEST_CASE(1, 0, 0) // HWC, naive, no padding +//TEST_CASE(1, 1, 1) // HWC, im2col, same padding --> fails: Padding not implemented for HWC im2col without DMA! +TEST_CASE(0, 1, 1) // CHW, im2col, same padding +TEST_CASE(0, 0, 1) // CHW, naive, same padding +TEST_CASE(1, 0, 1) // HWC, naive, same padding +void test_pulp_conv2d_fp32_bw_param_grads_cl(int hwc, int use_im2col, int padding) +{ + struct Conv2D_args args; + struct TestVector expected; + create_test_vectors(hwc, use_im2col, padding, &args, &expected); + + // for the naive HWC case, we haven't implemented it yet so expect zeros + if (hwc == 1 && use_im2col == 0) { + set_array(expected.weight.diff, expected.weight.dim, 0.0); + set_array(expected.bias.diff, expected.bias.dim, 0.0); + } + + pulp_conv2d_fp32_bw_param_grads_cl(&args); + TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.weight.diff, args.coeff->diff, args.coeff->dim); + TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.bias.diff, args.bias->diff, args.bias->dim); + + free_test_vectors(&args, &expected); +} + +TEST_CASE(1, 1, 0) // HWC, im2col, no padding +TEST_CASE(0, 1, 0) // CHW, im2col, no padding +TEST_CASE(0, 0, 0) // CHW, naive, no padding +TEST_CASE(1, 0, 0) // HWC, naive, no padding +//TEST_CASE(1, 1, 1) // HWC, im2col, same padding --> fails +//TEST_CASE(0, 1, 1) // CHW, im2col, same padding --> fails +TEST_CASE(0, 0, 1) // CHW, naive, same padding +TEST_CASE(1, 0, 1) // HWC, naive, same padding +void test_pulp_conv2d_fp32_bw_input_grads_cl(int hwc, int use_im2col, int padding) +{ + struct Conv2D_args args; + struct TestVector expected; + create_test_vectors(hwc, use_im2col, padding, &args, &expected); + + // for the naive HWC case, we haven't implemented it yet so expect zeros + if (hwc == 1 && use_im2col == 0) { + set_array(expected.in.diff, expected.in.dim, 0.0); + } + + pulp_conv2d_fp32_bw_input_grads_cl(&args); + TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.in.diff, args.input->diff, args.input->dim); + + free_test_vectors(&args, &expected); +} + +TEST_CASE(0, 0) // calculate both weight and input gradients +TEST_CASE(1, 0) // skip weight gradient calculation +TEST_CASE(0, 1) // skip input gradient calculation +void test_pulp_conv2d_fp32_bw_cl(int skip_wg_grad, int skip_in_grad) +{ + struct Conv2D_args args; + struct TestVector expected; + create_test_vectors(0, 0, 0, &args, &expected); + + // test skip grad calculations + args.skip_wg_grad = skip_wg_grad; + args.skip_in_grad = skip_in_grad; + + if (skip_wg_grad) { + set_array(expected.weight.diff, expected.weight.dim, 0); + set_array(expected.bias.diff, expected.bias.dim, 0); + } + if (skip_in_grad) { + set_array(expected.in.diff, expected.in.dim, 0); + } + + pulp_conv2d_fp32_bw_cl(&args); + TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.in.diff, args.input->diff, args.input->dim); + TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.weight.diff, args.coeff->diff, args.coeff->dim); + TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.bias.diff, args.bias->diff, args.bias->dim); + + free_test_vectors(&args, &expected); +} + +#endif // TEST From 660226e9a18c5c1c8806f5d60283b03868c5392c Mon Sep 17 00:00:00 2001 From: Maarten Vandersteegen Date: Tue, 10 Mar 2026 10:56:12 +0100 Subject: [PATCH 3/4] Added unit tests for pulp_linear_fp32 Also introduced test_utils.c for common unittest functions Refactored pulp_matmul_fp32 tests with conditional test cases --- unittests/generic/support/test_utils.c | 10 + unittests/generic/support/test_utils.h | 8 + unittests/generic/test_pulp_conv2d_fp32.c | 30 +- unittests/generic/test_pulp_linear_fp32.c | 219 +++++++++ unittests/generic/test_pulp_matmul_fp32.c | 521 ++++++++-------------- 5 files changed, 447 insertions(+), 341 deletions(-) create mode 100644 unittests/generic/support/test_utils.c create mode 100644 unittests/generic/support/test_utils.h create mode 100644 unittests/generic/test_pulp_linear_fp32.c diff --git a/unittests/generic/support/test_utils.c b/unittests/generic/support/test_utils.c new file mode 100644 index 00000000..a0c97ab5 --- /dev/null +++ b/unittests/generic/support/test_utils.c @@ -0,0 +1,10 @@ +#include "test_utils.h" + + +void set_array_fp32(float *array, size_t size, float value) +{ + for (int i = 0; i < size; i++) { + array[i] = value; + } +} + diff --git a/unittests/generic/support/test_utils.h b/unittests/generic/support/test_utils.h new file mode 100644 index 00000000..16786cb8 --- /dev/null +++ b/unittests/generic/support/test_utils.h @@ -0,0 +1,8 @@ +#ifndef TEST_UTILS_H +#define TEST_UTILS_H + +#include + +void set_array_fp32(float *array, size_t size, float value); + +#endif /* TEST_UTILS_H */ diff --git a/unittests/generic/test_pulp_conv2d_fp32.c b/unittests/generic/test_pulp_conv2d_fp32.c index f2fe03cb..63487054 100644 --- a/unittests/generic/test_pulp_conv2d_fp32.c +++ b/unittests/generic/test_pulp_conv2d_fp32.c @@ -3,6 +3,7 @@ #include #include #include "unity.h" +#include "test_utils.h" #include "pmsis.h" #include "pulp_train_defines.h" @@ -142,13 +143,6 @@ static struct TestVector test_vectors[] = { } }; -void set_array(float *array, size_t size, float value) -{ - for (int i = 0; i < size; i++) { - array[i] = value; - } -} - // create a deep copy of a test vector void copy_test_vector(const struct TestVector *src, struct TestVector* dst) { @@ -202,10 +196,10 @@ void create_test_vectors(int hwc, int use_im2col, int padding, struct Conv2D_arg copy_test_vector(&v, &a); // set some buffers to zero for the argument blobs - set_array(a.in.diff, a.in.dim, 0.0); - set_array(a.out.data, a.out.dim, 0.0); - set_array(a.weight.diff, a.weight.dim, 0.0); - set_array(a.bias.diff, a.bias.dim, 0.0); + set_array_fp32(a.in.diff, a.in.dim, 0.0); + set_array_fp32(a.out.data, a.out.dim, 0.0); + set_array_fp32(a.weight.diff, a.weight.dim, 0.0); + set_array_fp32(a.bias.diff, a.bias.dim, 0.0); // allocate workspace buffers in case needed int im2col_buffer_size = v.in.dim * v.out.C * v.weight.W * v.weight.H; @@ -281,7 +275,7 @@ void test_pulp_conv2d_fp32_fw_cl(int hwc, int use_im2col, int padding) // for the naive HWC case, we haven't implemented it yet so expect zeros if (hwc == 1 && use_im2col == 0) { - set_array(expected.out.data, expected.out.dim, 0.0); + set_array_fp32(expected.out.data, expected.out.dim, 0.0); } pulp_conv2d_fp32_fw_cl(&args); @@ -306,8 +300,8 @@ void test_pulp_conv2d_fp32_bw_param_grads_cl(int hwc, int use_im2col, int paddin // for the naive HWC case, we haven't implemented it yet so expect zeros if (hwc == 1 && use_im2col == 0) { - set_array(expected.weight.diff, expected.weight.dim, 0.0); - set_array(expected.bias.diff, expected.bias.dim, 0.0); + set_array_fp32(expected.weight.diff, expected.weight.dim, 0.0); + set_array_fp32(expected.bias.diff, expected.bias.dim, 0.0); } pulp_conv2d_fp32_bw_param_grads_cl(&args); @@ -333,7 +327,7 @@ void test_pulp_conv2d_fp32_bw_input_grads_cl(int hwc, int use_im2col, int paddin // for the naive HWC case, we haven't implemented it yet so expect zeros if (hwc == 1 && use_im2col == 0) { - set_array(expected.in.diff, expected.in.dim, 0.0); + set_array_fp32(expected.in.diff, expected.in.dim, 0.0); } pulp_conv2d_fp32_bw_input_grads_cl(&args); @@ -356,11 +350,11 @@ void test_pulp_conv2d_fp32_bw_cl(int skip_wg_grad, int skip_in_grad) args.skip_in_grad = skip_in_grad; if (skip_wg_grad) { - set_array(expected.weight.diff, expected.weight.dim, 0); - set_array(expected.bias.diff, expected.bias.dim, 0); + set_array_fp32(expected.weight.diff, expected.weight.dim, 0); + set_array_fp32(expected.bias.diff, expected.bias.dim, 0); } if (skip_in_grad) { - set_array(expected.in.diff, expected.in.dim, 0); + set_array_fp32(expected.in.diff, expected.in.dim, 0); } pulp_conv2d_fp32_bw_cl(&args); diff --git a/unittests/generic/test_pulp_linear_fp32.c b/unittests/generic/test_pulp_linear_fp32.c new file mode 100644 index 00000000..c526dd51 --- /dev/null +++ b/unittests/generic/test_pulp_linear_fp32.c @@ -0,0 +1,219 @@ +#ifdef TEST + +#include +#include +#include "unity.h" +#include "test_utils.h" + +#include "pmsis.h" +#include "pulp_train_defines.h" +#include "pulp_train_utils_fp32.h" +#include "pulp_linear_fp32.h" +#include "pulp_matmul_fp32.h" + +#define DELTA 1e-12 + + +// known parameters +static float WEIGHTS[] = {0.009999999776482582f, 0.019999999552965164f, 0.029999999329447746f, 0.03999999910593033f, 0.05000000074505806f, 0.05999999865889549f, 0.07000000029802322f, 0.07999999821186066f, 0.09000000357627869f, 0.10000000149011612f, 0.10999999940395355f, 0.11999999731779099f, 0.12999999523162842f, 0.14000000059604645f, 0.15000000596046448f, 0.1599999964237213f, 0.17000000178813934f, 0.18000000715255737f, 0.1899999976158142f, 0.20000000298023224f, 0.20999999344348907f, 0.2199999988079071f, 0.23000000417232513f, 0.23999999463558197f, 0.25f, 0.25999999046325684f, 0.27000001072883606f, 0.2800000011920929f, 0.28999999165534973f, 0.30000001192092896f, 0.3100000023841858f, 0.3199999928474426f, 0.33000001311302185f, 0.3400000035762787f, 0.3499999940395355f, 0.36000001430511475f, 0.3700000047683716f, 0.3799999952316284f, 0.38999998569488525f, 0.4000000059604645f, 0.4099999964237213f, 0.41999998688697815f, 0.4300000071525574f, 0.4399999976158142f, 0.44999998807907104f, 0.46000000834465027f, 0.4699999988079071f, 0.47999998927116394f, 0.49000000953674316f, 0.5f, 0.5099999904632568f, 0.5199999809265137f, 0.5299999713897705f, 0.5400000214576721f, 0.550000011920929f, 0.5600000023841858f, 0.5699999928474426f, 0.5799999833106995f, 0.5899999737739563f, 0.6000000238418579f, 0.6100000143051147f, 0.6200000047683716f, 0.6299999952316284f, 0.6399999856948853f}; +static float BIASES[] = {0.5f, 1.0f, 1.5f, 2.0f, 2.5f, 3.0f, 3.5f, 4.0f}; + +// known input and output gradient data +static float INPUT[] = {0.6613521575927734f, 0.266924113035202f, 0.06167725846171379f, 0.6213173270225525f, -0.4519059658050537f, -0.16613022983074188f, -1.522768497467041f, 0.38168391585350037f}; +static float OUTPUT_GRAD[] = {-0.14249178767204285f, -0.02044878900051117f, 0.10159420967102051f, 0.223637193441391f, 0.34568023681640625f, 0.4677232503890991f, 0.5897662043571472f, 0.7118092179298401f}; + +// expected output and expected weight/bias/input gradients +static float EXPECTED_OUTPUT[] = {0.4300328195095062f, 0.9182048439979553f, 1.406376838684082f, 1.894548773765564f, 2.382720947265625f, 2.8708930015563965f, 3.359064817428589f, 3.8472368717193604f}; +static float EXPECTED_WEIGHT_GRAD[] = {-0.09423725306987762f, -0.03803449496626854f, -0.008788502775132656f, -0.08853261917829514f, 0.06439288705587387f, 0.0236721932888031f, 0.21698200702667236f, -0.05438682436943054f, -0.013523850589990616f, -0.005458274856209755f, -0.0012612252030521631f, -0.012705187313258648f, 0.00924092996865511f, 0.0033971620723605156f, 0.031138772144913673f, -0.007804973982274532f, 0.06718955188989639f, 0.02711794339120388f, 0.00626605236902833f, 0.06312224268913269f, -0.0459110289812088f, -0.01687786914408207f, -0.15470446646213531f, 0.03877687454223633f, 0.1479029357433319f, 0.059694159775972366f, 0.013793328776955605f, 0.1389496624469757f, -0.10106298327445984f, -0.03715289756655693f, -0.34054768085479736f, 0.08535871654748917f, 0.2286163717508316f, 0.0922703891992569f, 0.021320609375834465f, 0.2147771269083023f, -0.15621496737003326f, -0.057427939027547836f, -0.5263909697532654f, 0.1319405883550644f, 0.3093297779560089f, 0.12484661489725113f, 0.028847888112068176f, 0.2906045615673065f, -0.2113669216632843f, -0.07770296931266785f, -0.7122342586517334f, 0.17852243781089783f, 0.39004313945770264f, 0.15742282569408417f, 0.03637516126036644f, 0.36643195152282715f, -0.26651886105537415f, -0.09797799587249756f, -0.8980773687362671f, 0.22510427236557007f, 0.47075656056404114f, 0.1899990439414978f, 0.0439024418592453f, 0.44225940108299255f, -0.3216708302497864f, -0.11825302988290787f, -1.0839205980300903f, 0.2716861367225647f}; +static float EXPECTED_BIAS_GRAD[] = {-0.14249178767204285f, -0.02044878900051117f, 0.10159420967102051f, 0.223637193441391f, 0.34568023681640625f, 0.4677232503890991f, 0.5897662043571472f, 0.7118092179298401f}; +static float EXPECTED_INPUT_GRAD[] = {1.0704727172851562f, 1.0932453870773315f, 1.1160180568695068f, 1.1387908458709717f, 1.161563515663147f, 1.1843361854553223f, 1.2071088552474976f, 1.2298816442489624f}; + +struct TestVector +{ + struct blob in; // in + expected in grad + struct blob out; // out_grad + expected out + struct blob weight; // weight + expected weight grad + struct blob bias; // bias + expected bias grad +}; + +static struct TestVector test_vectors[] = { + { // in = 8, out = 8 + .in = { + .data = INPUT, + .diff = EXPECTED_INPUT_GRAD, + .dim = 8 + }, + .out = { + .diff = OUTPUT_GRAD, + .data = EXPECTED_OUTPUT, + .dim = 8 + }, + .weight = { + .data = WEIGHTS, + .diff = EXPECTED_WEIGHT_GRAD, + .dim = 8*8 + }, + .bias = { + .data = BIASES, + .diff = EXPECTED_BIAS_GRAD, + .dim = 8 + } + }, +}; + +// create a deep copy of a test vector +void copy_test_vector(const struct TestVector *src, struct TestVector* dst) +{ + *dst = *src; + + dst->in.data = malloc(src->in.dim * sizeof(float)); + dst->in.diff = malloc(src->in.dim * sizeof(float)); + dst->out.data = malloc(src->out.dim * sizeof(float)); + dst->out.diff = malloc(src->out.dim * sizeof(float)); + dst->weight.data = malloc(src->weight.dim * sizeof(float)); + dst->weight.diff = malloc(src->weight.dim * sizeof(float)); + dst->bias.data = malloc(src->bias.dim * sizeof(float)); + dst->bias.diff = malloc(src->bias.dim * sizeof(float)); + memcpy(dst->in.data, src->in.data, src->in.dim * sizeof(float)); + memcpy(dst->in.diff, src->in.diff, src->in.dim * sizeof(float)); + memcpy(dst->out.diff, src->out.diff, src->out.dim * sizeof(float)); + memcpy(dst->out.data, src->out.data, src->out.dim * sizeof(float)); + memcpy(dst->weight.data, src->weight.data, src->weight.dim * sizeof(float)); + memcpy(dst->weight.diff, src->weight.diff, src->weight.dim * sizeof(float)); + memcpy(dst->bias.data, src->bias.data, src->bias.dim * sizeof(float)); + memcpy(dst->bias.diff, src->bias.diff, src->bias.dim * sizeof(float)); +} + +// free a copied test vector +void free_test_vector(struct TestVector *v) +{ + free(v->in.data); + free(v->in.diff); + free(v->out.data); + free(v->out.diff); + free(v->weight.data); + free(v->weight.diff); + free(v->bias.data); + free(v->bias.diff); +} + +void create_test_vectors(struct Linear_args* args, struct TestVector* expected) +{ + static struct TestVector a; + struct TestVector v = test_vectors[0]; + + // create two deep copies of the test vector so that we don't overwrite the + // original one. One copy to populate args and one as expected values + copy_test_vector(&v, expected); + copy_test_vector(&v, &a); + + // set some buffers to zero for the argument blobs + set_array_fp32(a.in.diff, a.in.dim, 0.0); + set_array_fp32(a.out.data, a.out.dim, 0.0); + set_array_fp32(a.weight.diff, a.weight.dim, 0.0); + set_array_fp32(a.bias.diff, a.bias.dim, 0.0); + + // potulate linear parameter struct + args->input = &a.in; + args->coeff = &a.weight; + args->bias = &a.bias; + args->output = &a.out; + args->skip_wg_grad = 0; + args->skip_in_grad = 0; + args->opt_matmul_type_fw = 0; + args->opt_matmul_type_wg = 0; + args->opt_matmul_type_ig = 0; + args->use_biases = 1; +} + +void free_test_vectors(struct Linear_args* args, struct TestVector* expected) +{ + free(args->input->data); + free(args->input->diff); + free(args->output->data); + free(args->output->diff); + free(args->coeff->data); + free(args->coeff->diff); + free(args->bias->data); + free(args->bias->diff); + free_test_vector(expected); +} + +// called before each test +void setUp(void) +{ +} + +// called after each test +void tearDown(void) +{ +} + +void test_pulp_linear_fp32_fw_cl(void) +{ + struct Linear_args args; + struct TestVector expected; + create_test_vectors(&args, &expected); + + pulp_linear_fp32_fw_cl(&args); + TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.out.data, args.output->data, args.output->dim); + + free_test_vectors(&args, &expected); +} + +void test_pulp_linear_fp32_bw_param_grads_cl(void) +{ + struct Linear_args args; + struct TestVector expected; + create_test_vectors(&args, &expected); + + pulp_linear_fp32_bw_param_grads_cl(&args); + TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.weight.diff, args.coeff->diff, args.coeff->dim); + TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.bias.diff, args.bias->diff, args.bias->dim); + + free_test_vectors(&args, &expected); +} + +void test_pulp_linear_fp32_bw_input_grads_cl(void) +{ + struct Linear_args args; + struct TestVector expected; + create_test_vectors(&args, &expected); + + pulp_linear_fp32_bw_input_grads_cl(&args); + TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.in.diff, args.input->diff, args.input->dim); + + free_test_vectors(&args, &expected); +} + +TEST_CASE(0, 0) // calculate both weight and input gradients +TEST_CASE(1, 0) // skip weight gradient calculation +TEST_CASE(0, 1) // skip input gradient calculation +void test_pulp_linear_fp32_bw_cl(int skip_wg_grad, int skip_in_grad) +{ + struct Linear_args args; + struct TestVector expected; + create_test_vectors(&args, &expected); + + // test skip grad calculations + args.skip_wg_grad = skip_wg_grad; + args.skip_in_grad = skip_in_grad; + + if (skip_wg_grad) { + set_array_fp32(expected.weight.diff, expected.weight.dim, 0); + set_array_fp32(expected.bias.diff, expected.bias.dim, 0); + } + if (skip_in_grad) { + set_array_fp32(expected.in.diff, expected.in.dim, 0); + } + + pulp_linear_fp32_bw_cl(&args); + TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.in.diff, args.input->diff, args.input->dim); + TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.weight.diff, args.coeff->diff, args.coeff->dim); + TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.bias.diff, args.bias->diff, args.bias->dim); + + free_test_vectors(&args, &expected); +} + + +#endif // TEST diff --git a/unittests/generic/test_pulp_matmul_fp32.c b/unittests/generic/test_pulp_matmul_fp32.c index 6079af30..74290b68 100644 --- a/unittests/generic/test_pulp_matmul_fp32.c +++ b/unittests/generic/test_pulp_matmul_fp32.c @@ -1,6 +1,8 @@ #ifdef TEST +#include #include "unity.h" +#include "test_utils.h" #include "pmsis.h" #include "pulp_train_defines.h" @@ -9,447 +11,320 @@ #define DELTA 1e-12 -#define IN_CH 9 -#define MID_CH 9 -#define OUT_CH 9 - -static float A[IN_CH*MID_CH] = { 1.0, 4.3, 2.1, 0.9, -1.5, 2.9, -1.2, 7.8, 9.3, -2.3, 5.8, 0.6, 1.4, 8.5, -8.6, -8.3, -9.6, 6.7, 5.6, 7.4, 9.6, 6.0, -0.8, 5.6, -7.6, 2.8, -7.1, 8.9, 0.4, -1.7, -4.7, 5.5, -0.9, 1.4, -9.6, 2.4, 2.2, 2.3, 8.9, 3.6, -2.8, -1.3, 4.0, -8.8, 3.3, 3.4, -5.8, -7.4, -3.7, -2.7, 1.4, -1.2, 9.8, -8.0, -5.8, -6.8, 3.1, -4.9, -0.7, -5.1, -6.8, -7.8, 3.1, -7.2, -6.1, -2.6, 6.4, -8.1, 6.8, -8.1, 9.5, -0.6, 9.5, 2.1, 4.8, -9.2, -4.3, -7.6, -4.1, -7.6, -3.6 }; -static float B[MID_CH*OUT_CH] = { -1.7, -8.7, 3.8, 1.3, -4.7, 0.5, -8.1, 1.5, 8.6, -3.6, 3.3, -7.4, 4.3, -4.2, -6.3, 1.7, -9.6, 6.6, -9.9, 3.6, -4.6, 4.7, 9.2, -5.0, 1.5, 1.8, 1.4, -5.5, 9.1, -1.1, 6.9, 4.0, -4.1, 6.3, -2.1, 7.6, 1.6, 7.6, 3.9, 4.5, 0.0, 9.1, 2.9, -1.5, 2.1, -9.6, -4.0, 3.2, -4.2, 2.4, -1.4, -7.3, -4.0, 1.4, 1.8, 1.5, 3.1, 3.0, -1.4, 7.9, -2.6, -1.3, 7.8, 6.1, 4.1, -8.0, 8.4, 4.3, 10.0, -7.0, 7.4, -6.8, 2.3, -7.5, 7.0, 6.1, 1.4, -1.9, -8.6, 3.9, -0.9 }; -static float Bt[MID_CH*OUT_CH] = { -1.7, -3.6, -9.9, -5.5, 1.6, -9.6, 1.8, 6.1, 2.3, -8.7, 3.3, 3.6, 9.1, 7.6, -4.0, 1.5, 4.1, -7.5, 3.8, -7.4, -4.6, -1.1, 3.9, 3.2, 3.1, -8.0, 7.0, 1.3, 4.3, 4.7, 6.9, 4.5, -4.2, 3.0, 8.4, 6.1, -4.7, -4.2, 9.2, 4.0, 0.0, 2.4, -1.4, 4.3, 1.4, 0.5, -6.3, -5.0, -4.1, 9.1, -1.4, 7.9, 10.0, -1.9, -8.1, 1.7, 1.5, 6.3, 2.9, -7.3, -2.6, -7.0, -8.6, 1.5, -9.6, 1.8, -2.1, -1.5, -4.0, -1.3, 7.4, 3.9, 8.6, 6.6, 1.4, 7.6, 2.1, 1.4, 7.8, -6.8, -0.9 }; -static float C[IN_CH*OUT_CH] = { -6.35, -41.33, -36.26, 135.59, 55.36, -7.64, -148.95, 48.31, -23.1, 7.46, 50.99, 47.64, 44.13, -43.35, -131.34, 156.8, -73.46, 30.3, -232.17, 89.71, -165.1, 55.91, 81.1, -150.09, 37.44, -76.25, 90.27, -6.97, -134.86, 160.36, -60.4, -119.99, -8.49, -38.13, -51.83, 125.41, -150.82, -17.66, 37.26, 30.49, 34.34, -158.98, 46.31, -58.37, 154.29, 130.17, -36.77, -50.22, -68.94, -38.52, 167.6, -86.54, 96.56, -120.03, 25.75, -55.76, 63.38, -104.76, 32.82, -92.31, 90.02, 57.11, -139.06, -11.51, 33.93, -92.44, -16.82, 128.81, -29.49, -29.04, 93.08, -191.91, -16.57, -154.75, -9.6, -105.07, -96.33, -124.8, 13.69, -6.05, 35.63 }; - -static float A_k1[IN_CH*MID_CH] = { 4.4, 7.3, 9.5, 7.1, -9.8, -2.8, 4.6, -6.6, 0.4 }; -static float B_k1[MID_CH*OUT_CH] = { -8.9, -6.0, -9.6, 5.9, -5.5, -3.1, 8.6, 4.1, -9.4 }; -static float C_k1[IN_CH*OUT_CH] = { -39.16, -26.4, -42.24, 25.96, -24.2, -13.64, 37.84, 18.04, -41.36, -64.97, -43.8, -70.08, 43.07, -40.15, -22.63, 62.78, 29.93, -68.62, -84.55, -57.0, -91.2, 56.05, -52.25, -29.45, 81.7, 38.95, -89.3, -63.19, -42.6, -68.16, 41.89, -39.05, -22.01, 61.06, 29.11, -66.74, 87.22, 58.8, 94.08, -57.82, 53.9, 30.38, -84.28, -40.18, 92.12, 24.92, 16.8, 26.88, -16.52, 15.4, 8.68, -24.08, -11.48, 26.32, -40.94, -27.6, -44.16, 27.14, -25.3, -14.26, 39.56, 18.86, -43.24, 58.74, 39.6, 63.36, -38.94, 36.3, 20.46, -56.76, -27.06, 62.04, -3.56, -2.4, -3.84, 2.36, -2.2, -1.24, 3.44, 1.64, -3.76 }; +static float A[] = { 1.0, 4.3, 2.1, 0.9, -1.5, 2.9, -1.2, 7.8, 9.3, -2.3, 5.8, 0.6, 1.4, 8.5, -8.6, -8.3, -9.6, 6.7, 5.6, 7.4, 9.6, 6.0, -0.8, 5.6, -7.6, 2.8, -7.1, 8.9, 0.4, -1.7, -4.7, 5.5, -0.9, 1.4, -9.6, 2.4, 2.2, 2.3, 8.9, 3.6, -2.8, -1.3, 4.0, -8.8, 3.3, 3.4, -5.8, -7.4, -3.7, -2.7, 1.4, -1.2, 9.8, -8.0, -5.8, -6.8, 3.1, -4.9, -0.7, -5.1, -6.8, -7.8, 3.1, -7.2, -6.1, -2.6, 6.4, -8.1, 6.8, -8.1, 9.5, -0.6, 9.5, 2.1, 4.8, -9.2, -4.3, -7.6, -4.1, -7.6, -3.6 }; +static float B[] = { -1.7, -8.7, 3.8, 1.3, -4.7, 0.5, -8.1, 1.5, 8.6, -3.6, 3.3, -7.4, 4.3, -4.2, -6.3, 1.7, -9.6, 6.6, -9.9, 3.6, -4.6, 4.7, 9.2, -5.0, 1.5, 1.8, 1.4, -5.5, 9.1, -1.1, 6.9, 4.0, -4.1, 6.3, -2.1, 7.6, 1.6, 7.6, 3.9, 4.5, 0.0, 9.1, 2.9, -1.5, 2.1, -9.6, -4.0, 3.2, -4.2, 2.4, -1.4, -7.3, -4.0, 1.4, 1.8, 1.5, 3.1, 3.0, -1.4, 7.9, -2.6, -1.3, 7.8, 6.1, 4.1, -8.0, 8.4, 4.3, 10.0, -7.0, 7.4, -6.8, 2.3, -7.5, 7.0, 6.1, 1.4, -1.9, -8.6, 3.9, -0.9 }; +static float Bt[] = { -1.7, -3.6, -9.9, -5.5, 1.6, -9.6, 1.8, 6.1, 2.3, -8.7, 3.3, 3.6, 9.1, 7.6, -4.0, 1.5, 4.1, -7.5, 3.8, -7.4, -4.6, -1.1, 3.9, 3.2, 3.1, -8.0, 7.0, 1.3, 4.3, 4.7, 6.9, 4.5, -4.2, 3.0, 8.4, 6.1, -4.7, -4.2, 9.2, 4.0, 0.0, 2.4, -1.4, 4.3, 1.4, 0.5, -6.3, -5.0, -4.1, 9.1, -1.4, 7.9, 10.0, -1.9, -8.1, 1.7, 1.5, 6.3, 2.9, -7.3, -2.6, -7.0, -8.6, 1.5, -9.6, 1.8, -2.1, -1.5, -4.0, -1.3, 7.4, 3.9, 8.6, 6.6, 1.4, 7.6, 2.1, 1.4, 7.8, -6.8, -0.9 }; +static float C[] = { -6.35, -41.33, -36.26, 135.59, 55.36, -7.64, -148.95, 48.31, -23.1, 7.46, 50.99, 47.64, 44.13, -43.35, -131.34, 156.8, -73.46, 30.3, -232.17, 89.71, -165.1, 55.91, 81.1, -150.09, 37.44, -76.25, 90.27, -6.97, -134.86, 160.36, -60.4, -119.99, -8.49, -38.13, -51.83, 125.41, -150.82, -17.66, 37.26, 30.49, 34.34, -158.98, 46.31, -58.37, 154.29, 130.17, -36.77, -50.22, -68.94, -38.52, 167.6, -86.54, 96.56, -120.03, 25.75, -55.76, 63.38, -104.76, 32.82, -92.31, 90.02, 57.11, -139.06, -11.51, 33.93, -92.44, -16.82, 128.81, -29.49, -29.04, 93.08, -191.91, -16.57, -154.75, -9.6, -105.07, -96.33, -124.8, 13.69, -6.05, 35.63 }; + +static float A_k1[] = { 4.4, 7.3, 9.5, 7.1, -9.8, -2.8, 4.6, -6.6, 0.4 }; +static float B_k1[] = { -8.9, -6.0, -9.6, 5.9, -5.5, -3.1, 8.6, 4.1, -9.4 }; +static float C_k1[] = { -39.16, -26.4, -42.24, 25.96, -24.2, -13.64, 37.84, 18.04, -41.36, -64.97, -43.8, -70.08, 43.07, -40.15, -22.63, 62.78, 29.93, -68.62, -84.55, -57.0, -91.2, 56.05, -52.25, -29.45, 81.7, 38.95, -89.3, -63.19, -42.6, -68.16, 41.89, -39.05, -22.01, 61.06, 29.11, -66.74, 87.22, 58.8, 94.08, -57.82, 53.9, 30.38, -84.28, -40.18, 92.12, 24.92, 16.8, 26.88, -16.52, 15.4, 8.68, -24.08, -11.48, 26.32, -40.94, -27.6, -44.16, 27.14, -25.3, -14.26, 39.56, 18.86, -43.24, 58.74, 39.6, 63.36, -38.94, 36.3, 20.46, -56.76, -27.06, 62.04, -3.56, -2.4, -3.84, 2.36, -2.2, -1.24, 3.44, 1.64, -3.76 }; + +static struct matMul_args test_vectors[] = { + { // 9x9 x 9x9 + .A = A, + .B = B, + .C = C, + .N = 9, + .K = 9, + .M = 9, + .trans_B = 0 + }, + { // 9x9 x 9x9 transposed + .A = A, + .B = Bt, + .C = C, + .N = 9, + .K = 9, + .M = 9, + .trans_B = 1 + }, + { // 9x1 x 1x9 + .A = A_k1, + .B = B_k1, + .C = C_k1, + .N = 9, + .K = 1, + .M = 9, + .trans_B = 0 + }, + { // 9x1 x 1x9 transposed + .A = A_k1, + .B = B_k1, + .C = C_k1, + .N = 9, + .K = 1, + .M = 9, + .trans_B = 1 + } +}; static struct matMul_args mm_args; -static struct matMul_args mm_args_k1; -static float result[IN_CH*OUT_CH]; -static float result_k1[IN_CH*OUT_CH]; +static struct matMul_args mm_expected; + +void create_test_vectors(int transp, int use_k1) +{ + int idx = transp + use_k1*2; + mm_expected = test_vectors[idx]; + mm_args = mm_expected; + mm_args.C = malloc(mm_args.N * mm_args.M * sizeof(float)); + set_array_fp32(mm_args.C, mm_args.N * mm_args.M, 0.0); +} -void set_array(float *array, size_t size, float value) +void free_test_vectors(struct matMul_args* args) { - for (int i = 0; i < size; i++) { - array[i] = value; + if (args->C != NULL) { + free(args->C); + args->C = NULL; } } // called before each test void setUp(void) { - mm_args.A = A; - mm_args.B = B; - mm_args.C = result; - mm_args.N = IN_CH; - mm_args.K = MID_CH; - mm_args.M = OUT_CH; - mm_args.trans_B = 0; - - mm_args_k1.A = A_k1; - mm_args_k1.B = B_k1; - mm_args_k1.C = result_k1; - mm_args_k1.N = IN_CH; - mm_args_k1.K = 1; - mm_args_k1.M = OUT_CH; - mm_args_k1.trans_B = 0; - - /* make sure the result buffers always start from a known state */ - set_array(result, IN_CH*OUT_CH, 0.0); - set_array(result_k1, OUT_CH, 0.0); } // called after each test void tearDown(void) { + free_test_vectors(&mm_args); } -void test_pulp_matmul_fp32_mm(void) +TEST_CASE(0,0) +TEST_CASE(1,0) +TEST_CASE(0,1) +TEST_CASE(1,1) +void test_pulp_matmul_fp32_mm(int transp, int use_k1) { + create_test_vectors(transp, use_k1); pi_cl_team_fork(NUM_CORES, mm, &mm_args); - TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, C, result, IN_CH*OUT_CH); - - pi_cl_team_fork(NUM_CORES, mm, &mm_args_k1); - TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, C_k1, result_k1, IN_CH*OUT_CH); -} - -void test_pulp_matmul_fp32_mm_transp(void) -{ - mm_args.trans_B = 1; - mm_args.B = Bt; - pi_cl_team_fork(NUM_CORES, mm, &mm_args); - TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, C, result, IN_CH*OUT_CH); - - mm_args_k1.trans_B = 1; - pi_cl_team_fork(NUM_CORES, mm, &mm_args_k1); - TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, C_k1, result_k1, IN_CH*OUT_CH); -} - -void test_pulp_matmul_fp32_mm_u2(void) -{ - pi_cl_team_fork(NUM_CORES, mm_u2, &mm_args); - TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, C, result, IN_CH*OUT_CH); + TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, mm_expected.C, mm_args.C, mm_args.N*mm_args.M); } -void test_pulp_matmul_fp32_mm_u2_transp(void) +TEST_CASE(0,0) +TEST_CASE(1,0) +void test_pulp_matmul_fp32_mm_u2(int transp, int use_k1) { - mm_args.trans_B = 1; - mm_args.B = Bt; + create_test_vectors(transp, use_k1); pi_cl_team_fork(NUM_CORES, mm_u2, &mm_args); - TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, C, result, IN_CH*OUT_CH); + TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, mm_expected.C, mm_args.C, mm_args.N*mm_args.M); } -void test_pulp_matmul_fp32_mm_add(void) +TEST_CASE(0,0) +TEST_CASE(1,0) +TEST_CASE(0,1) +TEST_CASE(1,1) +void test_pulp_matmul_fp32_mm_add(int transp, int use_k1) { - float expected[IN_CH*OUT_CH]; + create_test_vectors(transp, use_k1); + set_array_fp32(mm_args.C, mm_args.N * mm_args.M, 1.0); - set_array(result, IN_CH*OUT_CH, 1.0); // set result buffer to non-zero - for (int i=0; i Date: Tue, 10 Mar 2026 11:18:42 +0100 Subject: [PATCH 4/4] Simplified conv2d_fp32 and linear_fp32 unittests Moved test vectors to static global variables, which shortens the test cases itself --- unittests/generic/test_pulp_conv2d_fp32.c | 102 ++++++++++------------ unittests/generic/test_pulp_linear_fp32.c | 73 ++++++---------- unittests/generic/test_pulp_matmul_fp32.c | 10 +-- 3 files changed, 77 insertions(+), 108 deletions(-) diff --git a/unittests/generic/test_pulp_conv2d_fp32.c b/unittests/generic/test_pulp_conv2d_fp32.c index 63487054..69f40490 100644 --- a/unittests/generic/test_pulp_conv2d_fp32.c +++ b/unittests/generic/test_pulp_conv2d_fp32.c @@ -143,6 +143,9 @@ static struct TestVector test_vectors[] = { } }; +static struct Conv2D_args args; +static struct TestVector expected; + // create a deep copy of a test vector void copy_test_vector(const struct TestVector *src, struct TestVector* dst) { @@ -179,7 +182,7 @@ void free_test_vector(struct TestVector *v) free(v->bias.diff); } -void create_test_vectors(int hwc, int use_im2col, int padding, struct Conv2D_args* args, struct TestVector* expected) +void create_test_vectors(int hwc, int use_im2col, int padding) { int idx = hwc + 2*padding; static struct TestVector a; @@ -190,9 +193,9 @@ void create_test_vectors(int hwc, int use_im2col, int padding, struct Conv2D_arg v.out.dim = v.out.C * v.out.H * v.out.W; v.weight.dim = v.weight.C * v.weight.H * v.weight.W * v.out.C; - // create two deep copies of the test vector so that we don't overwrite the - // original one. One copy to populate args and one as expected values - copy_test_vector(&v, expected); + // create two deep copies of the test vector so that the original one + // cannot be overwritten. One copy to populate args and one as expected values + copy_test_vector(&v, &expected); copy_test_vector(&v, &a); // set some buffers to zero for the argument blobs @@ -211,42 +214,42 @@ void create_test_vectors(int hwc, int use_im2col, int padding, struct Conv2D_arg float *bt_buffer = calloc(bt_buffer_size, sizeof(float)); // potulate conv2d parameter struct - args->input = &a.in; - args->coeff = &a.weight; - args->bias = &a.bias; - args->output = &a.out; - args->Lpad = padding; - args->Rpad = padding; - args->Upad = padding; - args->Dpad = padding; - args->stride_h = 1; - args->stride_w = 1; - args->i2c_buffer = im2col_buffer; - args->bt_buffer = bt_buffer; - args->skip_wg_grad = 0; - args->skip_in_grad = 0; - args->HWC = hwc; - args->opt_matmul_type_fw = 0; - args->opt_matmul_type_wg = 0; - args->opt_matmul_type_ig = 0; - args->USE_IM2COL = use_im2col; - args->USE_DMA_IM2COL = 0; - args->USE_BIASES = 1; + args.input = &a.in; + args.coeff = &a.weight; + args.bias = &a.bias; + args.output = &a.out; + args.Lpad = padding; + args.Rpad = padding; + args.Upad = padding; + args.Dpad = padding; + args.stride_h = 1; + args.stride_w = 1; + args.i2c_buffer = im2col_buffer; + args.bt_buffer = bt_buffer; + args.skip_wg_grad = 0; + args.skip_in_grad = 0; + args.HWC = hwc; + args.opt_matmul_type_fw = 0; + args.opt_matmul_type_wg = 0; + args.opt_matmul_type_ig = 0; + args.USE_IM2COL = use_im2col; + args.USE_DMA_IM2COL = 0; + args.USE_BIASES = 1; } -void free_test_vectors(struct Conv2D_args* args, struct TestVector* expected) +void free_test_vectors(void) { - free(args->input->data); - free(args->input->diff); - free(args->output->data); - free(args->output->diff); - free(args->coeff->data); - free(args->coeff->diff); - free(args->bias->data); - free(args->bias->diff); - free(args->i2c_buffer); - free(args->bt_buffer); - free_test_vector(expected); + free(args.input->data); + free(args.input->diff); + free(args.output->data); + free(args.output->diff); + free(args.coeff->data); + free(args.coeff->diff); + free(args.bias->data); + free(args.bias->diff); + free(args.i2c_buffer); + free(args.bt_buffer); + free_test_vector(&expected); } // called before each test @@ -257,6 +260,7 @@ void setUp(void) // called after each test void tearDown(void) { + free_test_vectors(); } TEST_CASE(1, 1, 0) // HWC, im2col, no padding @@ -269,9 +273,7 @@ TEST_CASE(0, 0, 1) // CHW, naive, same padding TEST_CASE(1, 0, 1) // HWC, naive, same padding void test_pulp_conv2d_fp32_fw_cl(int hwc, int use_im2col, int padding) { - struct Conv2D_args args; - struct TestVector expected; - create_test_vectors(hwc, use_im2col, padding, &args, &expected); + create_test_vectors(hwc, use_im2col, padding); // for the naive HWC case, we haven't implemented it yet so expect zeros if (hwc == 1 && use_im2col == 0) { @@ -280,8 +282,6 @@ void test_pulp_conv2d_fp32_fw_cl(int hwc, int use_im2col, int padding) pulp_conv2d_fp32_fw_cl(&args); TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.out.data, args.output->data, args.output->dim); - - free_test_vectors(&args, &expected); } TEST_CASE(1, 1, 0) // HWC, im2col, no padding @@ -294,9 +294,7 @@ TEST_CASE(0, 0, 1) // CHW, naive, same padding TEST_CASE(1, 0, 1) // HWC, naive, same padding void test_pulp_conv2d_fp32_bw_param_grads_cl(int hwc, int use_im2col, int padding) { - struct Conv2D_args args; - struct TestVector expected; - create_test_vectors(hwc, use_im2col, padding, &args, &expected); + create_test_vectors(hwc, use_im2col, padding); // for the naive HWC case, we haven't implemented it yet so expect zeros if (hwc == 1 && use_im2col == 0) { @@ -307,8 +305,6 @@ void test_pulp_conv2d_fp32_bw_param_grads_cl(int hwc, int use_im2col, int paddin pulp_conv2d_fp32_bw_param_grads_cl(&args); TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.weight.diff, args.coeff->diff, args.coeff->dim); TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.bias.diff, args.bias->diff, args.bias->dim); - - free_test_vectors(&args, &expected); } TEST_CASE(1, 1, 0) // HWC, im2col, no padding @@ -321,9 +317,7 @@ TEST_CASE(0, 0, 1) // CHW, naive, same padding TEST_CASE(1, 0, 1) // HWC, naive, same padding void test_pulp_conv2d_fp32_bw_input_grads_cl(int hwc, int use_im2col, int padding) { - struct Conv2D_args args; - struct TestVector expected; - create_test_vectors(hwc, use_im2col, padding, &args, &expected); + create_test_vectors(hwc, use_im2col, padding); // for the naive HWC case, we haven't implemented it yet so expect zeros if (hwc == 1 && use_im2col == 0) { @@ -332,8 +326,6 @@ void test_pulp_conv2d_fp32_bw_input_grads_cl(int hwc, int use_im2col, int paddin pulp_conv2d_fp32_bw_input_grads_cl(&args); TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.in.diff, args.input->diff, args.input->dim); - - free_test_vectors(&args, &expected); } TEST_CASE(0, 0) // calculate both weight and input gradients @@ -341,9 +333,7 @@ TEST_CASE(1, 0) // skip weight gradient calculation TEST_CASE(0, 1) // skip input gradient calculation void test_pulp_conv2d_fp32_bw_cl(int skip_wg_grad, int skip_in_grad) { - struct Conv2D_args args; - struct TestVector expected; - create_test_vectors(0, 0, 0, &args, &expected); + create_test_vectors(0, 0, 0); // test skip grad calculations args.skip_wg_grad = skip_wg_grad; @@ -361,8 +351,6 @@ void test_pulp_conv2d_fp32_bw_cl(int skip_wg_grad, int skip_in_grad) TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.in.diff, args.input->diff, args.input->dim); TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.weight.diff, args.coeff->diff, args.coeff->dim); TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.bias.diff, args.bias->diff, args.bias->dim); - - free_test_vectors(&args, &expected); } #endif // TEST diff --git a/unittests/generic/test_pulp_linear_fp32.c b/unittests/generic/test_pulp_linear_fp32.c index c526dd51..ff8c3c37 100644 --- a/unittests/generic/test_pulp_linear_fp32.c +++ b/unittests/generic/test_pulp_linear_fp32.c @@ -61,6 +61,9 @@ static struct TestVector test_vectors[] = { }, }; +static struct Linear_args args; +static struct TestVector expected; + // create a deep copy of a test vector void copy_test_vector(const struct TestVector *src, struct TestVector* dst) { @@ -97,14 +100,14 @@ void free_test_vector(struct TestVector *v) free(v->bias.diff); } -void create_test_vectors(struct Linear_args* args, struct TestVector* expected) +void create_test_vectors(void) { static struct TestVector a; struct TestVector v = test_vectors[0]; // create two deep copies of the test vector so that we don't overwrite the // original one. One copy to populate args and one as expected values - copy_test_vector(&v, expected); + copy_test_vector(&v, &expected); copy_test_vector(&v, &a); // set some buffers to zero for the argument blobs @@ -114,76 +117,60 @@ void create_test_vectors(struct Linear_args* args, struct TestVector* expected) set_array_fp32(a.bias.diff, a.bias.dim, 0.0); // potulate linear parameter struct - args->input = &a.in; - args->coeff = &a.weight; - args->bias = &a.bias; - args->output = &a.out; - args->skip_wg_grad = 0; - args->skip_in_grad = 0; - args->opt_matmul_type_fw = 0; - args->opt_matmul_type_wg = 0; - args->opt_matmul_type_ig = 0; - args->use_biases = 1; + args.input = &a.in; + args.coeff = &a.weight; + args.bias = &a.bias; + args.output = &a.out; + args.skip_wg_grad = 0; + args.skip_in_grad = 0; + args.opt_matmul_type_fw = 0; + args.opt_matmul_type_wg = 0; + args.opt_matmul_type_ig = 0; + args.use_biases = 1; } -void free_test_vectors(struct Linear_args* args, struct TestVector* expected) +void free_test_vectors(void) { - free(args->input->data); - free(args->input->diff); - free(args->output->data); - free(args->output->diff); - free(args->coeff->data); - free(args->coeff->diff); - free(args->bias->data); - free(args->bias->diff); - free_test_vector(expected); + free(args.input->data); + free(args.input->diff); + free(args.output->data); + free(args.output->diff); + free(args.coeff->data); + free(args.coeff->diff); + free(args.bias->data); + free(args.bias->diff); + free_test_vector(&expected); } // called before each test void setUp(void) { + create_test_vectors(); } // called after each test void tearDown(void) { + free_test_vectors(); } void test_pulp_linear_fp32_fw_cl(void) { - struct Linear_args args; - struct TestVector expected; - create_test_vectors(&args, &expected); - pulp_linear_fp32_fw_cl(&args); TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.out.data, args.output->data, args.output->dim); - - free_test_vectors(&args, &expected); } void test_pulp_linear_fp32_bw_param_grads_cl(void) { - struct Linear_args args; - struct TestVector expected; - create_test_vectors(&args, &expected); - pulp_linear_fp32_bw_param_grads_cl(&args); TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.weight.diff, args.coeff->diff, args.coeff->dim); TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.bias.diff, args.bias->diff, args.bias->dim); - - free_test_vectors(&args, &expected); } void test_pulp_linear_fp32_bw_input_grads_cl(void) { - struct Linear_args args; - struct TestVector expected; - create_test_vectors(&args, &expected); - pulp_linear_fp32_bw_input_grads_cl(&args); TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.in.diff, args.input->diff, args.input->dim); - - free_test_vectors(&args, &expected); } TEST_CASE(0, 0) // calculate both weight and input gradients @@ -191,10 +178,6 @@ TEST_CASE(1, 0) // skip weight gradient calculation TEST_CASE(0, 1) // skip input gradient calculation void test_pulp_linear_fp32_bw_cl(int skip_wg_grad, int skip_in_grad) { - struct Linear_args args; - struct TestVector expected; - create_test_vectors(&args, &expected); - // test skip grad calculations args.skip_wg_grad = skip_wg_grad; args.skip_in_grad = skip_in_grad; @@ -211,8 +194,6 @@ void test_pulp_linear_fp32_bw_cl(int skip_wg_grad, int skip_in_grad) TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.in.diff, args.input->diff, args.input->dim); TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.weight.diff, args.coeff->diff, args.coeff->dim); TEST_ASSERT_FLOAT_ARRAY_WITHIN(DELTA, expected.bias.diff, args.bias->diff, args.bias->dim); - - free_test_vectors(&args, &expected); } diff --git a/unittests/generic/test_pulp_matmul_fp32.c b/unittests/generic/test_pulp_matmul_fp32.c index 74290b68..7b6c91f3 100644 --- a/unittests/generic/test_pulp_matmul_fp32.c +++ b/unittests/generic/test_pulp_matmul_fp32.c @@ -72,11 +72,11 @@ void create_test_vectors(int transp, int use_k1) set_array_fp32(mm_args.C, mm_args.N * mm_args.M, 0.0); } -void free_test_vectors(struct matMul_args* args) +void free_test_vectors(void) { - if (args->C != NULL) { - free(args->C); - args->C = NULL; + if (mm_args.C != NULL) { + free(mm_args.C); + mm_args.C = NULL; } } @@ -88,7 +88,7 @@ void setUp(void) // called after each test void tearDown(void) { - free_test_vectors(&mm_args); + free_test_vectors(); } TEST_CASE(0,0)