From 86392ca75840378c782d7a90bf71693168c07751 Mon Sep 17 00:00:00 2001 From: Moritz Scherer Date: Fri, 4 Apr 2025 15:07:15 +0200 Subject: [PATCH 1/6] Fix pmsis dependency --- CMakeLists.txt | 137 ++++++++++++++++++++++++++--------------- inc/pulp_nnx_neureka.h | 1 - neureka/hal/neureka.h | 1 + 3 files changed, 89 insertions(+), 50 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ccc952d..324a091 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,70 +1,109 @@ cmake_minimum_required(VERSION 3.18) project(pulp-nnx - VERSION 0.3.0 - DESCRIPTION "Kernel library for PULP-based NN accelerators." - LANGUAGES C) + VERSION 0.3.0 + DESCRIPTION "Kernel library for PULP-based NN accelerators." + LANGUAGES C) -add_library(pulp-nnx STATIC) +add_library(pulp-nnx INTERFACE) -target_sources(pulp-nnx PRIVATE util/pulp_nnx_util.c util/hwpe.c) -target_include_directories(pulp-nnx PUBLIC inc util) +add_library(pulp-nnx-hal STATIC) + +target_sources(pulp-nnx-hal PRIVATE util/pulp_nnx_util.c util/hwpe.c) +target_include_directories(pulp-nnx-hal PUBLIC inc util) option(USE_NE16 "Use the NE16 accelerator.") option(USE_NEUREKA "Use the N-EUREKA accelerator.") option(USE_NEUREKA_V2 "Use the N-EUREKA v2 accelerator.") +option(ENABLE_BSP "Enable the build of the BSP for your chosen accelerator. Requires the PULP-SDK.") if (NOT ${USE_NE16} AND NOT ${USE_NEUREKA} AND NOT ${USE_NEUREKA_V2}) - message(FATAL_ERROR "[PULP-NNX] No accelerator in use. Please set an appropriate USE_ option.") + message(FATAL_ERROR "[PULP-NNX] No accelerator in use. Please set an appropriate USE_ option.") +endif() + +if(${ENABLE_BSP}) + add_library(pulp-nnx-bsp STATIC) endif() if (${USE_NE16}) - message(STATUS "[PULP-NNX] Using the NE16 accelerator.") - target_sources(pulp-nnx - PRIVATE - ne16/bsp/ne16_pulp_bsp.c - ne16/hal/ne16.c - ne16/hal/ne16_task.c - src/pulp_nnx_ne16.c - ) - target_include_directories(pulp-nnx - PUBLIC - ne16/bsp - ne16/hal - ne16/gvsoc - ) + message(STATUS "[PULP-NNX] Using the NE16 accelerator.") + target_sources(pulp-nnx-hal + PRIVATE + ne16/hal/ne16.c + ne16/hal/ne16_task.c + ) + target_include_directories(pulp-nnx-hal + PUBLIC + ne16/hal + ne16/gvsoc + ) + if(${ENABLE_BSP}) + target_sources(pulp-nnx-bsp + PRIVATE + ne16/bsp/ne16_pulp_bsp.c + src/pulp_nnx_ne16.c + ) + target_include_directories(pulp-nnx-bsp + PUBLIC + ne16/bsp + ) + endif() + endif() if (${USE_NEUREKA}) - message(STATUS "[PULP-NNX] Using the N-EUREKA accelerator.") - target_sources(pulp-nnx - PRIVATE - neureka/bsp/neureka_siracusa_bsp.c - neureka/hal/neureka.c - neureka/hal/neureka_task.c - src/pulp_nnx_neureka.c - ) - target_include_directories(pulp-nnx - PUBLIC - neureka/bsp - neureka/hal - neureka/gvsoc - ) + message(STATUS "[PULP-NNX] Using the N-EUREKA accelerator.") + target_sources(pulp-nnx-hal + PRIVATE + neureka/hal/neureka.c + neureka/hal/neureka_task.c + ) + target_include_directories(pulp-nnx-hal + PUBLIC + neureka/hal + neureka/gvsoc + ) + if(${ENABLE_BSP}) + target_sources(pulp-nnx-bsp + PRIVATE + neureka/bsp/neureka_siracusa_bsp.c + src/pulp_nnx_neureka.c + ) + target_include_directories(pulp-nnx-bsp + PUBLIC + neureka/bsp + ) + endif() + endif() if (${USE_NEUREKA_V2}) - message(STATUS "[PULP-NNX] Using the N-EUREKA v2 accelerator.") - target_sources(pulp-nnx - PRIVATE - neureka_v2/bsp/neureka_v2_siracusa_bsp.c - neureka_v2/hal/neureka_v2.c - neureka_v2/hal/neureka_v2_task.c - src/pulp_nnx_neureka_v2.c - ) - target_include_directories(pulp-nnx - PUBLIC - neureka_v2/bsp - neureka_v2/hal - neureka_v2/gvsoc - ) + message(STATUS "[PULP-NNX] Using the N-EUREKA v2 accelerator.") + target_sources(pulp-nnx-hal + PRIVATE + neureka_v2/hal/neureka_v2.c + neureka_v2/hal/neureka_v2_task.c + ) + target_include_directories(pulp-nnx-hal + PUBLIC + neureka_v2/hal + neureka_v2/gvsoc + ) + if(${ENABLE_BSP}) + target_sources(pulp-nnx-bsp + PRIVATE + neureka_v2/bsp/neureka_v2_pulp_bsp.c + src/pulp_nnx_neureka_v2.c + ) + target_include_directories(pulp-nnx-bsp + PUBLIC + neureka_v2/bsp + ) + endif() + +endif() + +target_link_libraries(pulp-nnx INTERFACE pulp-nnx-hal) +if(${ENABLE_BSP}) + target_link_libraries(pulp-nnx INTERFACE pulp-nnx-bsp) endif() diff --git a/inc/pulp_nnx_neureka.h b/inc/pulp_nnx_neureka.h index fea4bb4..0ddef59 100644 --- a/inc/pulp_nnx_neureka.h +++ b/inc/pulp_nnx_neureka.h @@ -19,7 +19,6 @@ */ #include "neureka.h" -#include "neureka_siracusa_bsp.h" #include "neureka_task.h" #include diff --git a/neureka/hal/neureka.h b/neureka/hal/neureka.h index b17c8b5..4db09f2 100644 --- a/neureka/hal/neureka.h +++ b/neureka/hal/neureka.h @@ -30,6 +30,7 @@ typedef struct neureka_dev_t { hwpe_dev_t hwpe_dev; /* Implements the HWPE device interface */ } neureka_dev_t; + int neureka_task_queue_tasks_in_flight(const neureka_dev_t *dev); int neureka_task_queue_empty(const neureka_dev_t *dev); int neureka_task_queue_full(const neureka_dev_t *dev); From f4e6286bb1dff22ccf358acca08d5c73557af69f Mon Sep 17 00:00:00 2001 From: georg Date: Tue, 8 Apr 2025 19:04:43 +0200 Subject: [PATCH 2/6] fix neureka_task logic and parameters for our architecture --- neureka/hal/neureka_task.c | 15 +++++++++------ neureka/hal/neureka_task_defs.h | 22 ++++++++++++++-------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/neureka/hal/neureka_task.c b/neureka/hal/neureka_task.c index 9a311dc..b024dd2 100644 --- a/neureka/hal/neureka_task.c +++ b/neureka/hal/neureka_task.c @@ -165,15 +165,18 @@ void neureka_task_set_strides(neureka_task_t *task, const uint32_t k_in, .d2 = h_out_stride}; task->data.cfg.output_stride = output_stride; - task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES; if (task->kernel_shape == 1) { // 1x1 - task->data.cfg.weights_stride.d1 = - NEUREKA_WEIGHT_BANDWIDTH_BYTES * num_k_in; + task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1; + task->data.cfg.weights_stride.d1 = + (NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1 / 8) * task->qw * num_k_in; } else if (!task->depthwise) { // 3x3 - task->data.cfg.weights_stride.d1 = - NEUREKA_WEIGHT_BANDWIDTH_BYTES * task->qw * num_k_in; + task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3; + task->data.cfg.weights_stride.d1 = + NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3 * task->qw * num_k_in; + } else { // 3x3 depthwise - task->data.cfg.weights_stride.d1 = 0; + task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3; + task->data.cfg.weights_stride.d1 = 0; } task->data.cfg.weights_stride.d2 = 0; } diff --git a/neureka/hal/neureka_task_defs.h b/neureka/hal/neureka_task_defs.h index fa08289..ffc0c79 100644 --- a/neureka/hal/neureka_task_defs.h +++ b/neureka/hal/neureka_task_defs.h @@ -23,20 +23,26 @@ /* ARHITECTURE */ -#define NEUREKA_SUBTILE_INPUT_HEIGHT_1x1 (6) -#define NEUREKA_SUBTILE_INPUT_WIDTH_1x1 (6) +#define NNX_NEUREKA_PE_H (4) +#define NNX_NEUREKA_PE_W (4) +#define NNX_NEUREKA_BANDWIDTH_1x1 (256) +#define NNX_NEUREKA_BANDWIDTH_3x3 (288) + +#define NEUREKA_SUBTILE_INPUT_HEIGHT_1x1 (NNX_NEUREKA_PE_H) +#define NEUREKA_SUBTILE_INPUT_WIDTH_1x1 (NNX_NEUREKA_PE_W) #define NEUREKA_SUBTILE_INPUT_CHANNEL_1x1 (32) -#define NEUREKA_SUBTILE_INPUT_HEIGHT_3x3 (8) -#define NEUREKA_SUBTILE_INPUT_WIDTH_3x3 (8) -#define NEUREKA_SUBTILE_INPUT_CHANNEL_3x3 (28) +#define NEUREKA_SUBTILE_INPUT_HEIGHT_3x3 (NNX_NEUREKA_PE_H+2) +#define NEUREKA_SUBTILE_INPUT_WIDTH_3x3 (NNX_NEUREKA_PE_W+2) +#define NEUREKA_SUBTILE_INPUT_CHANNEL_3x3 (32) -#define NEUREKA_SUBTILE_OUTPUT_HEIGHT (6) -#define NEUREKA_SUBTILE_OUTPUT_WIDTH (6) +#define NEUREKA_SUBTILE_OUTPUT_HEIGHT (NNX_NEUREKA_PE_H) +#define NEUREKA_SUBTILE_OUTPUT_WIDTH (NNX_NEUREKA_PE_W) #define NEUREKA_SUBTILE_OUTPUT_CHANNEL (32) #define NEUREKA_OUTPUT_BANDWIDTH_BYTES (32) -#define NEUREKA_WEIGHT_BANDWIDTH_BYTES (32) +#define NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1 (NNX_NEUREKA_BANDWIDTH_1x1/8) +#define NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3 (NNX_NEUREKA_BANDWIDTH_3x3/8) /* TASK REGISTERS */ From 7bec621d369bec6330bce061416d836d888fbfaf Mon Sep 17 00:00:00 2001 From: georg Date: Wed, 9 Apr 2025 15:11:49 +0200 Subject: [PATCH 3/6] fix compiler warnings about const pointers --- util/hwpe.c | 22 +++++++++++----------- util/hwpe.h | 22 +++++++++++----------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/util/hwpe.c b/util/hwpe.c index 0430081..730e033 100644 --- a/util/hwpe.c +++ b/util/hwpe.c @@ -30,33 +30,33 @@ #define HWPE_SWSYNC 6 #define HWPE_TASK_REG_OFFSET 8 -inline void hwpe_reg_write(hwpe_dev_t *dev, int reg, uint32_t value) { +inline void hwpe_reg_write(const hwpe_dev_t *dev, int reg, uint32_t value) { dev->base_addr[reg] = value; } -inline uint32_t hwpe_reg_read(hwpe_dev_t *dev, int reg) { +inline uint32_t hwpe_reg_read(const hwpe_dev_t *dev, int reg) { return dev->base_addr[reg]; } -inline void hwpe_task_reg_write(hwpe_dev_t *dev, int reg, uint32_t value) { +inline void hwpe_task_reg_write(const hwpe_dev_t *dev, int reg, uint32_t value) { hwpe_reg_write(dev, HWPE_TASK_REG_OFFSET + reg, value); } -inline uint32_t hwpe_task_reg_read(hwpe_dev_t *dev, int reg) { +inline uint32_t hwpe_task_reg_read(const hwpe_dev_t *dev, int reg) { return hwpe_reg_read(dev, HWPE_TASK_REG_OFFSET + reg); } -void hwpe_soft_clear(hwpe_dev_t *dev) { +void hwpe_soft_clear(const hwpe_dev_t *dev) { hwpe_reg_write(dev, HWPE_SOFT_CLEAR, 0); for (volatile int i = 0; i < 10; i++) ; } -uint32_t hwpe_task_queue_status(hwpe_dev_t *dev) { +uint32_t hwpe_task_queue_status(const hwpe_dev_t *dev) { return hwpe_reg_read(dev, HWPE_STATUS); } -int hwpe_task_queue_acquire_task(hwpe_dev_t *dev, uint8_t *id) { +int hwpe_task_queue_acquire_task(const hwpe_dev_t *dev, uint8_t *id) { uint32_t read_value = (int32_t)hwpe_reg_read(dev, HWPE_ACQUIRE); if (read_value >= 256) { return 1; @@ -66,20 +66,20 @@ int hwpe_task_queue_acquire_task(hwpe_dev_t *dev, uint8_t *id) { } } -void hwpe_task_queue_write_task(hwpe_dev_t *dev, uint32_t *data, int len) { +void hwpe_task_queue_write_task(const hwpe_dev_t *dev, uint32_t *data, int len) { for (int i = 0; i < len; i++) { hwpe_task_reg_write(dev, i, data[i]); } } -void hwpe_task_queue_release_and_run(hwpe_dev_t *dev) { +void hwpe_task_queue_release_and_run(const hwpe_dev_t *dev) { hwpe_reg_write(dev, HWPE_TRIGGER, 0); } -void hwpe_task_queue_release(hwpe_dev_t *dev) { +void hwpe_task_queue_release(const hwpe_dev_t *dev) { hwpe_reg_write(dev, HWPE_TRIGGER, 1); } -uint8_t hwpe_last_task_id(hwpe_dev_t *dev) { +uint8_t hwpe_last_task_id(const hwpe_dev_t *dev) { return (uint8_t)hwpe_reg_read(dev, HWPE_RUNNING_JOB); } diff --git a/util/hwpe.h b/util/hwpe.h index 52bf912..7b7f65c 100644 --- a/util/hwpe.h +++ b/util/hwpe.h @@ -28,16 +28,16 @@ typedef struct hwpe_dev_t { volatile uint32_t *base_addr; } hwpe_dev_t; -void hwpe_reg_write(hwpe_dev_t *dev, int reg, uint32_t value); -uint32_t hwpe_reg_read(hwpe_dev_t *dev, int reg); -void hwpe_task_reg_write(hwpe_dev_t *dev, int reg, uint32_t value); -uint32_t hwpe_task_reg_read(hwpe_dev_t *dev, int reg); -void hwpe_soft_clear(hwpe_dev_t *dev); -uint32_t hwpe_task_queue_status(hwpe_dev_t *dev); -int hwpe_task_queue_acquire_task(hwpe_dev_t *dev, uint8_t *id); -void hwpe_task_queue_write_task(hwpe_dev_t *dev, uint32_t *data, int len); -void hwpe_task_queue_release_and_run(hwpe_dev_t *dev); -void hwpe_task_queue_release(hwpe_dev_t *dev); -uint8_t hwpe_last_task_id(hwpe_dev_t *dev); +void hwpe_reg_write(const hwpe_dev_t *dev, int reg, uint32_t value); +uint32_t hwpe_reg_read(const hwpe_dev_t *dev, int reg); +void hwpe_task_reg_write(const hwpe_dev_t *dev, int reg, uint32_t value); +uint32_t hwpe_task_reg_read(const hwpe_dev_t *dev, int reg); +void hwpe_soft_clear(const hwpe_dev_t *dev); +uint32_t hwpe_task_queue_status(const hwpe_dev_t *dev); +int hwpe_task_queue_acquire_task(const hwpe_dev_t *dev, uint8_t *id); +void hwpe_task_queue_write_task(const hwpe_dev_t *dev, uint32_t *data, int len); +void hwpe_task_queue_release_and_run(const hwpe_dev_t *dev); +void hwpe_task_queue_release(const hwpe_dev_t *dev); +uint8_t hwpe_last_task_id(const hwpe_dev_t *dev); #endif // !__HWPE_H__ From 7a65b7e62590bedd9d5904cd6b945f85452a9f87 Mon Sep 17 00:00:00 2001 From: georg Date: Wed, 9 Apr 2025 16:54:07 +0200 Subject: [PATCH 4/6] wrap Neureka architecture parameters in #ifndefs --- neureka/hal/neureka_task_defs.h | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/neureka/hal/neureka_task_defs.h b/neureka/hal/neureka_task_defs.h index ffc0c79..3c5f689 100644 --- a/neureka/hal/neureka_task_defs.h +++ b/neureka/hal/neureka_task_defs.h @@ -21,20 +21,29 @@ #ifndef __NEUREKA_DEFS_H__ #define __NEUREKA_DEFS_H__ -/* ARHITECTURE */ - -#define NNX_NEUREKA_PE_H (4) -#define NNX_NEUREKA_PE_W (4) +/* ARCHITECTURE */ +// The definitions wrapped in #ifndefs can be overwritten with compiler flags for +// different parametrizations of the Neureka architecture +#ifndef NNX_NEUREKA_PE_H +#define NNX_NEUREKA_PE_H (6) +#endif +#ifndef NNX_NEUREKA_PE_W +#define NNX_NEUREKA_PE_W (6) +#endif +#ifndef NNX_NEUREKA_BANDWIDTH_1x1 #define NNX_NEUREKA_BANDWIDTH_1x1 (256) -#define NNX_NEUREKA_BANDWIDTH_3x3 (288) +#endif +#define NNX_NEUREKA_BANDWIDTH_3x3 (256) #define NEUREKA_SUBTILE_INPUT_HEIGHT_1x1 (NNX_NEUREKA_PE_H) #define NEUREKA_SUBTILE_INPUT_WIDTH_1x1 (NNX_NEUREKA_PE_W) #define NEUREKA_SUBTILE_INPUT_CHANNEL_1x1 (32) #define NEUREKA_SUBTILE_INPUT_HEIGHT_3x3 (NNX_NEUREKA_PE_H+2) -#define NEUREKA_SUBTILE_INPUT_WIDTH_3x3 (NNX_NEUREKA_PE_W+2) -#define NEUREKA_SUBTILE_INPUT_CHANNEL_3x3 (32) +#define NEUREKA_SUBTILE_INPUT_WIDTH_3x3 (NNX_NEUREKA_PE_W + 2) +#ifndef NEUREKA_SUBTILE_INPUT_CHANNEL_3x3 +#define NEUREKA_SUBTILE_INPUT_CHANNEL_3x3 (28) +#endif #define NEUREKA_SUBTILE_OUTPUT_HEIGHT (NNX_NEUREKA_PE_H) #define NEUREKA_SUBTILE_OUTPUT_WIDTH (NNX_NEUREKA_PE_W) From 7be571b0fec29961ea30d2f07fc2fca9d538f146 Mon Sep 17 00:00:00 2001 From: georg Date: Thu, 10 Apr 2025 17:42:28 +0200 Subject: [PATCH 5/6] fix linting --- neureka/hal/neureka.h | 1 - neureka/hal/neureka_task.c | 16 ++++++++-------- neureka/hal/neureka_task_defs.h | 12 ++++++------ util/hwpe.c | 6 ++++-- 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/neureka/hal/neureka.h b/neureka/hal/neureka.h index 4db09f2..b17c8b5 100644 --- a/neureka/hal/neureka.h +++ b/neureka/hal/neureka.h @@ -30,7 +30,6 @@ typedef struct neureka_dev_t { hwpe_dev_t hwpe_dev; /* Implements the HWPE device interface */ } neureka_dev_t; - int neureka_task_queue_tasks_in_flight(const neureka_dev_t *dev); int neureka_task_queue_empty(const neureka_dev_t *dev); int neureka_task_queue_full(const neureka_dev_t *dev); diff --git a/neureka/hal/neureka_task.c b/neureka/hal/neureka_task.c index b024dd2..18939b8 100644 --- a/neureka/hal/neureka_task.c +++ b/neureka/hal/neureka_task.c @@ -166,17 +166,17 @@ void neureka_task_set_strides(neureka_task_t *task, const uint32_t k_in, task->data.cfg.output_stride = output_stride; if (task->kernel_shape == 1) { // 1x1 - task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1; - task->data.cfg.weights_stride.d1 = - (NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1 / 8) * task->qw * num_k_in; + task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1; + task->data.cfg.weights_stride.d1 = + (NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1 / 8) * task->qw * num_k_in; } else if (!task->depthwise) { // 3x3 - task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3; - task->data.cfg.weights_stride.d1 = - NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3 * task->qw * num_k_in; + task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3; + task->data.cfg.weights_stride.d1 = + NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3 * task->qw * num_k_in; } else { // 3x3 depthwise - task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3; - task->data.cfg.weights_stride.d1 = 0; + task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3; + task->data.cfg.weights_stride.d1 = 0; } task->data.cfg.weights_stride.d2 = 0; } diff --git a/neureka/hal/neureka_task_defs.h b/neureka/hal/neureka_task_defs.h index 3c5f689..0b9a9a4 100644 --- a/neureka/hal/neureka_task_defs.h +++ b/neureka/hal/neureka_task_defs.h @@ -22,8 +22,8 @@ #define __NEUREKA_DEFS_H__ /* ARCHITECTURE */ -// The definitions wrapped in #ifndefs can be overwritten with compiler flags for -// different parametrizations of the Neureka architecture +// The definitions wrapped in #ifndefs can be overwritten with compiler flags +// for different parametrizations of the Neureka architecture #ifndef NNX_NEUREKA_PE_H #define NNX_NEUREKA_PE_H (6) #endif @@ -36,10 +36,10 @@ #define NNX_NEUREKA_BANDWIDTH_3x3 (256) #define NEUREKA_SUBTILE_INPUT_HEIGHT_1x1 (NNX_NEUREKA_PE_H) -#define NEUREKA_SUBTILE_INPUT_WIDTH_1x1 (NNX_NEUREKA_PE_W) +#define NEUREKA_SUBTILE_INPUT_WIDTH_1x1 (NNX_NEUREKA_PE_W) #define NEUREKA_SUBTILE_INPUT_CHANNEL_1x1 (32) -#define NEUREKA_SUBTILE_INPUT_HEIGHT_3x3 (NNX_NEUREKA_PE_H+2) +#define NEUREKA_SUBTILE_INPUT_HEIGHT_3x3 (NNX_NEUREKA_PE_H + 2) #define NEUREKA_SUBTILE_INPUT_WIDTH_3x3 (NNX_NEUREKA_PE_W + 2) #ifndef NEUREKA_SUBTILE_INPUT_CHANNEL_3x3 #define NEUREKA_SUBTILE_INPUT_CHANNEL_3x3 (28) @@ -50,8 +50,8 @@ #define NEUREKA_SUBTILE_OUTPUT_CHANNEL (32) #define NEUREKA_OUTPUT_BANDWIDTH_BYTES (32) -#define NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1 (NNX_NEUREKA_BANDWIDTH_1x1/8) -#define NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3 (NNX_NEUREKA_BANDWIDTH_3x3/8) +#define NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1 (NNX_NEUREKA_BANDWIDTH_1x1 / 8) +#define NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3 (NNX_NEUREKA_BANDWIDTH_3x3 / 8) /* TASK REGISTERS */ diff --git a/util/hwpe.c b/util/hwpe.c index 730e033..1f99c1f 100644 --- a/util/hwpe.c +++ b/util/hwpe.c @@ -38,7 +38,8 @@ inline uint32_t hwpe_reg_read(const hwpe_dev_t *dev, int reg) { return dev->base_addr[reg]; } -inline void hwpe_task_reg_write(const hwpe_dev_t *dev, int reg, uint32_t value) { +inline void hwpe_task_reg_write(const hwpe_dev_t *dev, int reg, + uint32_t value) { hwpe_reg_write(dev, HWPE_TASK_REG_OFFSET + reg, value); } @@ -66,7 +67,8 @@ int hwpe_task_queue_acquire_task(const hwpe_dev_t *dev, uint8_t *id) { } } -void hwpe_task_queue_write_task(const hwpe_dev_t *dev, uint32_t *data, int len) { +void hwpe_task_queue_write_task(const hwpe_dev_t *dev, uint32_t *data, + int len) { for (int i = 0; i < len; i++) { hwpe_task_reg_write(dev, i, data[i]); } From fbba46f90d35b4fd387b0eecc945b0787032b773 Mon Sep 17 00:00:00 2001 From: georg Date: Fri, 11 Apr 2025 17:52:27 +0200 Subject: [PATCH 6/6] fix wrong bandwidth parameter wrapped in #ifndef --- neureka/hal/neureka_task_defs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/neureka/hal/neureka_task_defs.h b/neureka/hal/neureka_task_defs.h index 0b9a9a4..b48f78e 100644 --- a/neureka/hal/neureka_task_defs.h +++ b/neureka/hal/neureka_task_defs.h @@ -30,10 +30,11 @@ #ifndef NNX_NEUREKA_PE_W #define NNX_NEUREKA_PE_W (6) #endif -#ifndef NNX_NEUREKA_BANDWIDTH_1x1 #define NNX_NEUREKA_BANDWIDTH_1x1 (256) -#endif + +#ifndef NNX_NEUREKA_BANDWIDTH_3x3 #define NNX_NEUREKA_BANDWIDTH_3x3 (256) +#endif #define NEUREKA_SUBTILE_INPUT_HEIGHT_1x1 (NNX_NEUREKA_PE_H) #define NEUREKA_SUBTILE_INPUT_WIDTH_1x1 (NNX_NEUREKA_PE_W)