diff --git a/aws-neuronx-dkms-mkdeb/debian/prerm b/aws-neuronx-dkms-mkdeb/debian/prerm index f42d293..654f5fc 100755 --- a/aws-neuronx-dkms-mkdeb/debian/prerm +++ b/aws-neuronx-dkms-mkdeb/debian/prerm @@ -8,6 +8,15 @@ set -e case "$1" in remove|upgrade|deconfigure) if [ "`dkms status -m $NAME`" ]; then + # Check if module is loaded before attempting removal + if lsmod | grep -q "^neuron "; then + echo "Neuron module is currently loaded. Attempting to unload..." + if ! rmmod neuron 2>/dev/null; then + echo "ERROR: Cannot unload neuron module - it is currently in use." >&2 + echo "Please stop all processes using the neuron module before uninstalling." >&2 + exit 1 + fi + fi dkms remove -m $NAME -v $VERSION --all fi ;; diff --git a/aws-neuronx-dkms-mkrpm.spec b/aws-neuronx-dkms-mkrpm.spec index c0b59b7..a01839a 100644 --- a/aws-neuronx-dkms-mkrpm.spec +++ b/aws-neuronx-dkms-mkrpm.spec @@ -69,6 +69,15 @@ exit 1 %preun echo -e echo -e "Uninstall of %{module_name} module (version %{version}) beginning:" +# Check if module is loaded before attempting removal +if lsmod | grep -q "^neuron "; then + echo "Neuron module is currently loaded. Attempting to unload..." + if ! rmmod neuron 2>/dev/null; then + echo "ERROR: Cannot unload neuron module - it is currently in use." + echo "Please stop all processes using the neuron module before uninstalling." + exit 1 + fi +fi dkms remove -m %{module_name} -v %{version} --all --rpm_safe_upgrade exit 0 diff --git a/dkms.conf b/dkms.conf index 03f894c..24de583 100644 --- a/dkms.conf +++ b/dkms.conf @@ -1,5 +1,5 @@ PACKAGE_NAME=aws-neuronx -PACKAGE_VERSION=2.25.4.0 +PACKAGE_VERSION=2.26.5.0 BUILT_MODULE_NAME[0]="neuron" MAKE[0]="make -C ${kernel_source_dir} M=${dkms_tree}/${PACKAGE_NAME}/${PACKAGE_VERSION}/build" CLEAN="make -C ${kernel_source_dir} M=${dkms_tree}/${PACKAGE_NAME}/${PACKAGE_VERSION}/build clean" diff --git a/neuron_cdev.c b/neuron_cdev.c index b8c3458..ad4e3d7 100644 --- a/neuron_cdev.c +++ b/neuron_cdev.c @@ -188,7 +188,8 @@ static int ncdev_dma_queue_init_batch(struct neuron_device *nd, void *param) ret = neuron_copy_from_user(__func__, arg, (struct neuron_ioctl_dma_queue_init_batch *)param, sizeof(struct neuron_ioctl_dma_queue_init_batch)); if (ret) { - return -EACCES; + ret = -EACCES; + goto done; } if (arg->count >= MAX_DMA_QUEUE_INIT_BATCH) { @@ -1200,7 +1201,7 @@ static int ncdev_mem_buf_zerocopy64(struct neuron_device *nd, unsigned int cmd, IS_ALIGNED(offset, 4); // For smallish transfers, just do "copy from" directly to bar4 - // simulation (inkling) does not have bar4 mapped to the actual memory, don't do it + // simulation does not have bar4 mapped to the actual memory, don't do it if (use_bar4_wr) { u64 cpy_offset; ndhal->ndhal_mmap.mmap_get_bar4_offset(mc->pa + offset, size, &cpy_offset); @@ -1371,7 +1372,7 @@ static int ncdev_mem_buf_zerocopy64_batch(struct neuron_device *nd, void *param) } // For smallish transfers, just do "copy from" directly to bar4 - // simulation (inkling) does not have bar4 mapped to the actual memory, don't do it + // simulation does not have bar4 mapped to the actual memory, don't do it if (use_bar4_wr) { for (j = 0; j < batch->num_ops; j++) { const nrt_tensor_batch_op_t op = batch->ops_ptr[j]; @@ -1397,12 +1398,14 @@ static int ncdev_mem_buf_zerocopy64_batch(struct neuron_device *nd, void *param) if (!ndmar_qid_valid(qid)) { pr_err("nd%02d: invalid h2t queue index %d", nd->device_index, qid); - return -ENOENT; + ret = -ENOENT; + goto cleanup; } if (!ndma_zerocopy_supported()) { pr_err_once("nd%02d: zero copy is not supported for architectures requiring DMA retry", nd->device_index); - return -EINVAL; + ret = -EINVAL; + goto cleanup; } // use the zero-copy batch function for ops within a single batch @@ -1480,9 +1483,25 @@ static long ncdev_bar_read(struct neuron_device *nd, u8 bar, u64 *reg_addresses, int i; if (bar == 0 || bar == 2) { u32 *data = NULL; + + for (i = 0; i < data_count; i++) { + u64 bar_addr = (u64)nd->npdev.bar0; + u64 bar_size = nd->npdev.bar0_size; + + // From V2 arch onwards, nd->npdev.bar2 (axi_bar) is no longer initialized + // TODO: Remove bar2 fields + + // nd->npdev.bar0 is initialized to APB bar + // On V2 arch, APB bar is 0 usually, but 2 in case of QEMU + + if ((reg_addresses[i] < bar_addr) || (reg_addresses[i] >= bar_addr + bar_size)) { + return -EINVAL; + } + } data = kmalloc(data_size, GFP_KERNEL); if (data == NULL) return -ENOMEM; + ret = ndhal->ndhal_reg_access.reg_read32_array((void **)reg_addresses, data, data_count); if (ret) { kfree(data); @@ -1541,8 +1560,10 @@ static int ncdev_bar_write_data(struct neuron_device *nd, u8 bar, u64 *reg_addre if (bar == 0) { int i; for (i = 0; i < data_count; i++) { - u64 off = reg_addresses[i] - (u64)nd->npdev.bar0; - if (off > nd->npdev.bar0_size) { + u64 bar_addr = (u64)nd->npdev.bar0; + u64 bar_size = nd->npdev.bar0_size; + u64 off = reg_addresses[i] - bar_addr; + if ((reg_addresses[i] < bar_addr) || (reg_addresses[i] >= bar_addr + bar_size)) { return -EINVAL; } if (ndhal->ndhal_ndma.ndma_is_bar0_write_blocked(off)) { @@ -2999,6 +3020,39 @@ static int ncdev_power_profile_set(struct neuron_device *nd, void *param) return ndhal->ndhal_perf.perf_set_profile(nd, arg.profile); } +static int ncdev_throttling_notifications_set(struct neuron_device *nd, void *param) +{ + struct neuron_ioctl_throttling_notifications arg; + int ret; + + ret = neuron_copy_from_user(__func__, &arg, (struct neuron_ioctl_throttling_notifications*) param, sizeof(arg)); + if (ret) + return ret; + + return fw_io_enable_throttling_notifications(nd->fw_io_ctx, arg.enable ? true : false); +} + +static int ncdev_get_va_placement(void *param) +{ + struct neuron_ioctl_get_va_placement arg; + int ret, unused; + int device_index, hbm_index; + ret = neuron_copy_from_user(__func__, &arg, (struct neuron_ioctl_get_va_placement*) param, sizeof(arg)); + if (ret) + return ret; + + ret = nmmap_get_va_placement((void*)arg.va, &device_index, &hbm_index); + if (!ret) { + arg.device_index = device_index; + arg.hbm_index = hbm_index; + } else { + arg.device_index = -1; + arg.hbm_index = -1; + } + unused = copy_to_user(param, &arg, sizeof(arg)); + return ret; +} + inline static long ncdev_misc_ioctl(struct file *filep, unsigned int cmd, unsigned long param) { if ((cmd == NEURON_IOCTL_CRWL_NC_RANGE_MARK) || (cmd == NEURON_IOCTL_CRWL_NC_RANGE_MARK_EXT0)) { return ncdev_crwl_nc_range_mark(filep, cmd, (void *)param); @@ -3031,6 +3085,8 @@ inline static long ncdev_misc_ioctl(struct file *filep, unsigned int cmd, unsign return ncdev_pod_status(cmd, (void *)param); } else if (_IOC_NR(cmd) == _IOC_NR(NEURON_IOCTL_POD_CTRL)) { return ncdev_pod_ctrl(filep, cmd, (void *)param); + } else if (_IOC_NR(cmd) == _IOC_NR(NEURON_IOCTL_GET_VA_PLACEMENT)) { + return ncdev_get_va_placement((void *)param); } pr_err("invalid misc IOCTL %d (dir=%d, type=%d, nr=%d, size=%d)\n", cmd, _IOC_DIR(cmd), @@ -3226,6 +3282,8 @@ static long ncdev_ioctl(struct file *filep, unsigned int cmd, unsigned long para return ncdev_h2t_dma_free_queues(nd, cmd, (void*)param); } else if (cmd == NEURON_IOCTL_POWER_PROFILE) { return ncdev_power_profile_set(nd, (void*)param); + } else if (cmd == NEURON_IOCTL_THROTTLING_NOTIFICATIONS) { + return ncdev_throttling_notifications_set(nd, (void*)param); } // B/W compatibility @@ -3500,6 +3558,8 @@ static inline int ncdev_init_device_node(struct ncdev *devnode, const char *dev_ return ret; } devnode->device = device; + devnode->minor = minor; + devnode->ndev = ndev; ret = sysfs_create_group(&(device->kobj), &attr_group); if (ret) { @@ -3518,9 +3578,6 @@ static inline int ncdev_init_device_node(struct ncdev *devnode, const char *dev_ return -1; } - devnode->minor = minor; - devnode->ndev = ndev; - return 0; } diff --git a/neuron_dhal.h b/neuron_dhal.h index bbdbbe5..ab34019 100644 --- a/neuron_dhal.h +++ b/neuron_dhal.h @@ -50,7 +50,8 @@ struct ndhal_address_map { uint64_t event_count; uint32_t ts_per_device; int dma_eng_per_nc; - int dma_eng_per_nd; + int seng_dma_eng_per_nd; + int h2d_dma_eng_per_nd; int dram_channels; }; @@ -280,4 +281,4 @@ int ndhal_register_funcs_v2(void); int ndhal_register_funcs_v3(void); int ndhal_register_funcs_v4(void); -#endif +#endif // #ifndef NEURON_DHAL_H diff --git a/neuron_dma.c b/neuron_dma.c index 5f7cbc0..8258605 100644 --- a/neuron_dma.c +++ b/neuron_dma.c @@ -986,6 +986,7 @@ static int ndma_build_n_issue_zc_descs( struct ndma_h2t_zcdma_context * dma_ctx) break; } contig_size += PAGE_SIZE; + tmp = next; } if (dma_ctx->direction) { // write to device diff --git a/neuron_fw_io.c b/neuron_fw_io.c index 8632d04..4222309 100644 --- a/neuron_fw_io.c +++ b/neuron_fw_io.c @@ -233,14 +233,18 @@ static const u32 fw_io_cmd_timeout_tbl[FW_IO_CMD_MAX] = { 0, // cmd 0 (1000 * 1000 * 1), // cmd 1 (FW_IO_CMD_READ) (1000 * 1000 * 1), // cmd 2 (FW_IO_CMD_POST_TO_CW) - (1000 * 1000 * 60) // cmd 3 (FW_IO_CMD_SET_POWER_PROFILE) + (1000 * 1000 * 60), // cmd 3 (FW_IO_CMD_SET_POWER_PROFILE) + (1000 * 1000 * 1), // cmd 4 (FW_IO_CMD_GET_DATA) + (1000 * 1000 * 60), // cmd 5 (FW_IO_CMD_SET_FEATURE) }; static const u32 fw_io_cmd_retry_tbl[FW_IO_CMD_MAX] = { 0, // cmd 0 15, // cmd 1 (FW_IO_CMD_READ) 15, // cmd 2 (FW_IO_CMD_POST_TO_CW) - 3 // cmd 3 (FW_IO_CMD_SET_POWER_PROFILE) + 3, // cmd 3 (FW_IO_CMD_SET_POWER_PROFILE) + 3, // cmd 4 (FW_IO_CMD_GET_DATA) + 3, // cmd 5 (FW_IO_CMD_SET_FEATURE) }; static u32 crc32c(const u8 *hdr, const u8 *data, size_t len) @@ -352,7 +356,7 @@ int fw_io_execute_request_new(struct fw_io_ctx *ctx, u8 command_id, const u8 *re ret = fw_io_api_version_read(ctx->bar0, &api_version_num); if ((ret != 0) || (api_version_num < FW_IO_NEW_READLESS_READ_MIN_API_VERSION)) { - pr_info_once("Pacific version %d, using legacy Pacific/Runtime comm framework", api_version_num); + pr_info_once("Firmware version %d, using legacy Firmware/Runtime comm framework", api_version_num); return -ENOTSUPP; } @@ -657,6 +661,7 @@ struct fw_io_ctx *fw_io_setup(void __iomem *bar0, u64 bar0_size, ctx->next_seq_num = 1; mutex_init(&ctx->lock); + ctx->request_response_size = FW_IO_MAX_SIZE; ctx->request = kmalloc(FW_IO_MAX_SIZE, GFP_ATOMIC); if (ctx->request == NULL) { pr_err("memory allocation failed\n"); @@ -705,12 +710,28 @@ void fw_io_destroy(struct fw_io_ctx *ctx) kfree(ctx); } -uint32_t fw_io_get_total_uecc_err_count(void *bar0) { +static inline uint32_t uncorrectable_ecc_err_count(uint32_t api_version, uint32_t ecc_err_count) { + // API Version<6: bitfield[15:0] Uncorrectable Errors + // API Version>=6: bitfield[15:12] Uncorrectable Errors + return (api_version >= 6) ? ((ecc_err_count >> 12) & 0xf) : (ecc_err_count & 0xffff); +} + +static inline uint32_t repairable_ecc_err_count(uint32_t api_version, uint32_t ecc_err_count) { + // API Version<6: N/A + // API Version>=6: bitfield[11:0] Repairable Errors + return (api_version >= 6) ? (ecc_err_count & 0xfff) : 0; +} + +void fw_io_get_total_ecc_err_counts(void *bar0, uint32_t *ue_ecc_count, uint32_t *repairable_ecc_count) { uint32_t total_uncorrected_ecc_err_count = 0; + uint32_t total_repairable_ecc_err_count = 0; uint32_t channel = 0; uint32_t ecc_err_count = 0; uint64_t ecc_offset = 0; + uint32_t api_version; + fw_io_api_version_read(bar0, &api_version); + for (channel = 0; channel < ndhal->ndhal_address_map.dram_channels; channel++) { ecc_offset = FW_IO_REG_HBM0_ECC_OFFSET + channel * sizeof(uint32_t); ecc_err_count = 0; @@ -718,11 +739,12 @@ uint32_t fw_io_get_total_uecc_err_count(void *bar0) { if (ret) { pr_err("sysfs failed to read ECC HBM%u error from FWIO\n", channel); } else if (ecc_err_count != 0xdeadbeef) { - // ue count is in the lowest 16 bits - total_uncorrected_ecc_err_count += (ecc_err_count & 0x0000ffff); + total_uncorrected_ecc_err_count += uncorrectable_ecc_err_count(api_version, ecc_err_count); + total_repairable_ecc_err_count += repairable_ecc_err_count(api_version, ecc_err_count); } } - return total_uncorrected_ecc_err_count; + *ue_ecc_count = total_uncorrected_ecc_err_count; + *repairable_ecc_count = total_repairable_ecc_err_count; } int fw_io_set_power_profile(struct fw_io_ctx *ctx, uint32_t profile) @@ -739,3 +761,27 @@ int fw_io_set_power_profile(struct fw_io_ctx *ctx, uint32_t profile) return fw_io_execute_request_new(ctx, FW_IO_CMD_SET_POWER_PROFILE, (u8 *)&data, sizeof(data), NULL, 0); } + +int fw_io_enable_throttling_notifications(struct fw_io_ctx *ctx, bool enable) +{ /* + * Note: + * This implementation assumes throttling notifications is the only feature. + * Current behavior sets features=0x01 (enable) or features=0x00 (disable all) + * When more features are added, we need to: + * 1. Add a get feature command to read current feature state + * 2. Cache the current features bitmap in fw_io_ctx + * 3. Set/clear individual feature bits + * This will avoid wiping out other enabled features when disabling one feature + */ + u8 features = 0; + + if (!ctx) { + return -EINVAL; + } + + if (enable) { + features = FW_IO_FEATURE_THROTTLING_NOTIFICATIONS; + } + + return fw_io_execute_request_new(ctx, FW_IO_CMD_SET_FEATURE, &features, sizeof(features), NULL, 0); +} \ No newline at end of file diff --git a/neuron_fw_io.h b/neuron_fw_io.h index 6c60d19..3faf65c 100644 --- a/neuron_fw_io.h +++ b/neuron_fw_io.h @@ -52,10 +52,29 @@ union fw_io_req_perfprofile_data { uint32_t raw[2]; }; +struct fw_io_get_data_request { + uint8_t type; // fw_io_data_request_type + uint8_t reserved[3]; // reserved for future use/alignment +}; + +// Feature bitmap for FW_IO_CMD_SET_FEATURE +// Each bit represents a different feature that can be enabled/disabled +enum fw_io_feature_bits { + FW_IO_FEATURE_THROTTLING_NOTIFICATIONS = (0x1 << 0), // bit 0: enable throttling notifications +}; + +/* + * Note: + * GET_DATA can retrieve information such as current power profile, + * but there's no corresponding GET_FEATURE to read current feature bitmap. + * This limitation affects fw_io_enable_throttling_notifications (see note). + */ enum { FW_IO_CMD_READ = 1, // read a register value FW_IO_CMD_POST_TO_CW = 2, // post given blob as metrics to CloudWatch - FW_IO_CMD_SET_POWER_PROFILE = 3 // set power profile + FW_IO_CMD_SET_POWER_PROFILE = 3, // set power profile + FW_IO_CMD_GET_DATA = 4, // get various FW data (see fw_io_get_data_request) + FW_IO_CMD_SET_FEATURE = 5 // set feature bitmap }; enum { @@ -146,9 +165,7 @@ struct fw_io_ctx { #define UINT64_LOW(x) ((u32)(((u64)(x)) & 0xffffffffULL)) #define UINT64_HIGH(x) ((u32)((x) >> 32)) -#define FW_IO_CMD_MAX 4 - -#define FW_IO_CMD_MAX 4 +#define FW_IO_CMD_MAX 6 // Wait up to 30 seconds in worst case. // Hardware can in some cases take longer to come out of reset but for some reads @@ -160,7 +177,7 @@ struct fw_io_ctx { // max number of registers can be read in single function call #define FW_IO_MAX_READLESS_READ_REGISTER_COUNT 100 -// Min Pacific API version for new readless read framework +// Min Firmware API version for new readless read framework #define FW_IO_NEW_READLESS_READ_MIN_API_VERSION 7 #define FW_IO_POWER_MIN_API_VERSION 3 @@ -371,11 +388,12 @@ int fw_io_ecc_read(void *bar0, uint64_t ecc_offset, uint32_t *ecc_err_count); int fw_io_serial_number_read(void *bar0, uint64_t *serial_number); /** - * fw_io_get_total_uecc_err_count() - Get UE ecc error count + * fw_io_get_total_ecc_err_counts() - Get UE ecc error count * @param bar0: from bar - * @return err count + * @param ue_ecc_count: Pointer to the ue counter + * @param repairable_err_count: Pointer to the repairable counter */ -uint32_t fw_io_get_total_uecc_err_count(void *bar0); +void fw_io_get_total_ecc_err_counts(void *bar0, uint32_t *ue_ecc_count, uint32_t *repairable_ecc_count); /** * fw_io_hbm_uecc_repair_state_read() - Get HBM UE ecc repair state @@ -414,4 +432,12 @@ int fw_io_execute_request_new(struct fw_io_ctx *ctx, u8 command_id, const u8 *re * @return 0 on success, negative on failure */ int fw_io_set_power_profile(struct fw_io_ctx *ctx, uint32_t profile); + +/** + * fw_io_enable_throttling_notifications() - Enable throttling notifications + * @param ctx: FWIO context + * @param enable: true to enable, false to disable + * @return 0 on success, negative on failure + */ +int fw_io_enable_throttling_notifications(struct fw_io_ctx *ctx, bool enable); #endif diff --git a/neuron_ioctl.h b/neuron_ioctl.h index 80989d9..bcc9c6e 100644 --- a/neuron_ioctl.h +++ b/neuron_ioctl.h @@ -628,6 +628,15 @@ struct neuron_ioctl_metrics_ctrl { __u32 mode; // [in] modifications to metric behavior (neuron_metrics_mode) }; +struct neuron_ioctl_throttling_notifications { + __u32 enable; // [in] 1 to enable, 0 to disable +}; + +struct neuron_ioctl_get_va_placement { + __u64 va; // [in] virtual address of Neuron memory + __s32 device_index; // [out] Neuron device index (negative if VA does not represent Neuron memory) + __s32 hbm_index; // [out] HBM index +}; #define NEURON_IOCTL_BASE 'N' @@ -834,7 +843,11 @@ struct neuron_ioctl_metrics_ctrl { #define NEURON_IOCTL_MEM_BUF_ZEROCOPY64_BATCHES _IOWR(NEURON_IOCTL_BASE, 129, struct neuron_ioctl_mem_buf_copy64zc_batches) +#define NEURON_IOCTL_THROTTLING_NOTIFICATIONS _IOW(NEURON_IOCTL_BASE, 130, struct neuron_ioctl_throttling_notifications) + +#define NEURON_IOCTL_GET_VA_PLACEMENT _IOW(NEURON_IOCTL_BASE, 131, struct neuron_ioctl_get_va_placement) + // Note: 133 is taken by NEURON_IOCTL_DMA_QUEUE_INIT_BATCH -#define NEURON_IOCTL_MAX 130 +#define NEURON_IOCTL_MAX 132 #endif diff --git a/neuron_mempool.h b/neuron_mempool.h index 2ad7aa9..2fcdf10 100644 --- a/neuron_mempool.h +++ b/neuron_mempool.h @@ -224,10 +224,16 @@ int mc_dump_all_chunks(struct neuron_device *nd, u32 channel, u32 num_entries_in static inline bool mc_access_is_within_bounds(const struct mem_chunk *mc, u64 access_offset, u64 access_size) { + u64 allowed_range = 0; if (mc->alloc_type == NEURON_MEMALLOC_TYPE_CONTIGUOUS_SCRATCHPAD_DEVICE) { - return (mc->pa + access_offset + access_size <= mc->mp->main_pool_end_addr); + allowed_range = mc->mp->main_pool_end_addr - mc->pa; + } else { + allowed_range = mc->size; } - return access_offset + access_size <= mc->size; + + // Do NOT refactor to access_offset + access_size <= allowed_range, as the addition + // can overflow and wraparound to be less than allowed_range + return (access_size <= allowed_range && access_offset <= allowed_range - access_size); } #endif diff --git a/neuron_metrics.c b/neuron_metrics.c index 6fbc28d..b849262 100644 --- a/neuron_metrics.c +++ b/neuron_metrics.c @@ -40,72 +40,128 @@ extern const char driver_version[]; enum nmetric_cw_id { NMETRIC_CW_ID_UNUSED = 0, - NMETRIC_CW_ID_FW_IO_ERROR_COUNT = 11, // internal driver fw_io error count. counted internally using a counter in fw_io_ctx struct - NMETRIC_CW_ID_INSTANCE_ID = 12, // instance id - NMETRIC_CW_ID_DRIVER_VERSION = 13, // driver version - - // Driver internal metics + // Total number of internal driver firmware I/O errors, counter appended on driver → hardware execution errors + NMETRIC_CW_ID_FW_IO_ERROR_COUNT = 11, + // EC2 Instance Identifier, read from DMI board asset tag during driver module initialization + NMETRIC_CW_ID_INSTANCE_ID = 12, + // Driver version string, initialized during driver build + NMETRIC_CW_ID_DRIVER_VERSION = 13, + + // Driver internal metics + // Maximum time taken for device reset operations across all neuron devices in the instance (ms) NMETRIC_CW_ID_MAX_DEVICE_RESET_TIME_MS = 50, + // Maximum time taken for TPB reset operations across all neuron devices in the instance (ms) NMETRIC_CW_ID_MAX_TPB_RESET_TIME_MS = 51, + // Average time for device reset operations (ms), calculated from DEVICE_RESET_FAILURE_COUNT and total reset time NMETRIC_CW_ID_AVG_DEVICE_RESET_TIME_MS = 52, + // Average time for TPB reset operations (ms), calculated from TPB_RESET_FAILURE_COUNT and total reset time NMETRIC_CW_ID_AVG_TPB_RESET_TIME_MS = 53, + // Count of failed device reset operations (timeouts from framework), max wait time in NR_RESET_INIT_MAX_TOTAL_WAIT_TIME_MS NMETRIC_CW_ID_DEVICE_RESET_FAILURE_COUNT = 54, + // Count of failed TPB reset operations (timeouts from framework), similar to device reset timeouts NMETRIC_CW_ID_TPB_RESET_FAILURE_COUNT = 55, + // Device performance profile identifier for power and performance characteristics, set via performance_profile_tool NMETRIC_CW_ID_PERFORMANCE_PROFILE_ID = 56, + // Ultraserver supported modes (only for ULTRASERVER/PDS platforms), values defined in neuron_ultraserver_mode enum + NMETRIC_CW_ID_ULTRASERVER_MODES_SUPPORTED = 57, + // Ultraserver mode configured on device (only for ULTRASERVER/PDS platforms), values defined in neuron_ultraserver_mode enum + NMETRIC_CW_ID_ULTRASERVER_MODE = 58, + + // Platform Utilization Metrics + // Percentage of time that the neuron device was executing NEFFs in a given interval, aggregated across NCs + // For example, a ND with full utilization of one core with the other idle, will be reported as 50% + NMETRIC_CW_ID_NC_UTILIZATION = 90, // Extra versions // extra space for reporting multiple versions of the same type in one post - NMETRIC_CW_ID_RT_VERSION_BASE = 180, // base id for rt version + // Most frequent Runtime version information across all devices, persisted during nrt_init in NDS data store + NMETRIC_CW_ID_RT_VERSION_BASE = 180, NMETRIC_CW_ID_RT_VERSION_0 = NMETRIC_CW_ID_RT_VERSION_BASE, + // Next most frequent runtime version info across all neuron devices of the instance NMETRIC_CW_ID_RT_VERSION_1, NMETRIC_CW_ID_RT_VERSION_LAST = NMETRIC_CW_ID_RT_VERSION_1, // inclusive of last version + // Framework version string provided by upstream consumer when calling nrt_init API NMETRIC_CW_ID_FW_VERSION_BASE = 190, NMETRIC_CW_ID_FW_VERSION_0 = NMETRIC_CW_ID_FW_VERSION_BASE, + // Framework type provided by upstream consumer during nrt_init, values defined in Runtime nrt_framework_type_t enum NMETRIC_CW_ID_FW_TYPE_0, + // Next most frequent framework version string across all neuron devices of the instance NMETRIC_CW_ID_FW_VERSION_1, + // Next most frequent framework type across all neuron devices of the instance NMETRIC_CW_ID_FW_TYPE_1, NMETRIC_CW_ID_FW_VERSION_LAST = NMETRIC_CW_ID_FW_TYPE_1, + // FAL (Framework Abstraction Layer) version string provided by upstream consumer when calling nrt_init API NMETRIC_CW_ID_FAL_VERSION_BASE = 195, NMETRIC_CW_ID_FAL_VERSION_0 = NMETRIC_CW_ID_FAL_VERSION_BASE, + // Next most frequent FAL version string across all neuron devices of the instance NMETRIC_CW_ID_FAL_VERSION_1, NMETRIC_CW_ID_FAL_VERSION_LAST = NMETRIC_CW_ID_FAL_VERSION_1, // Return codes - NMETRIC_CW_ID_NERR_OK = 200, // status ok - NMETRIC_CW_ID_NERR_FAIL = 201, // status fail + // Successful model load tracking, following NRT_SUCCESS runtime status + NMETRIC_CW_ID_NERR_OK = 200, + // Generic model load failure tracking, following NRT_FAILURE runtime status + NMETRIC_CW_ID_NERR_FAIL = 201, + // NRT_INVALID runtime status tracking (invalid NEFF, bad instruction, bad DMA descriptor etc.) NMETRIC_CW_ID_NERR_INVALID = 202, + // Resource allocation failures tracking NRT_RESOURCE runtime status errors NMETRIC_CW_ID_NERR_RESOURCE = 204, + // nrt_execute operation timeout tracking NRT_TIMEOUT status, max wait time set via NEURON_RT_EXEC_TIMEOUT NMETRIC_CW_ID_NERR_TIMEOUT = 205, + // Hardware failure count during runtime execution, tracking NRT_HW_ERROR status NMETRIC_CW_ID_NERR_HW_ERROR = 206, + // Async execution requests not queued due to queue overflow, queue size set via NEURON_RT_ASYNC_EXEC_MAX_INFLIGHT_REQUESTS NMETRIC_CW_ID_NERR_QUEUE_FULL = 207, + // Resource allocation failures when insufficient neuron cores available, tracks NRT_LOAD_NOT_ENOUGH_NC status NMETRIC_CW_ID_NERR_RESOURCE_NC = 208, + // Unsupported NEFF version model load failures, tracking NRT_UNSUPPORTED_NEFF_VERSION status NMETRIC_CW_ID_NERR_UNSUPPORTED_VERSION = 209, + // Incorrect input data failures leading to NRT_EXEC_BAD_INPUT during nrt_execute (legacy metric) NMETRIC_CW_ID_NERR_INFER_BAD_INPUT = 212, + // NEURON_ISA_TPB_ERROR_TYPE_FP_NAN TPB error notifications, enabled via NEURON_FAIL_ON_NAN environment variable NMETRIC_CW_ID_NERR_INFER_COMPLETED_WITH_NUM_ERR = 213, + // Generic TPB error notifications (MEMORY_ERROR, FAKE_ERROR, SEMAPHORE_ERROR, etc.), tracking NRT_EXEC_COMPLETED_WITH_ERR NMETRIC_CW_ID_NERR_INFER_COMPLETED_WITH_ERR = 214, + // Numerical computation errors during nrt_execute (deprecated from Runtime v2.25) NMETRIC_CW_ID_NERR_NUMERICAL_ERR = 215, + // Model load errors, unused in Runtime (deprecated from Runtime v2.25) NMETRIC_CW_ID_NERR_MODEL_ERR = 216, + // Transient SEQUENCER_NONFATAL TPB error notifications that may be retryable (deprecated from Runtime v2.20) NMETRIC_CW_ID_NERR_TRANSIENT_ERR = 217, + // Runtime specific errors (deprecated from Runtime v2.25) NMETRIC_CW_ID_NERR_RT_ERR = 218, - NMETRIC_CW_ID_NERR_GENERIC_TPB_ERR = 219, // generic notification error - // for reference look at "INFER_SUBTYPE_NONE" in - // Runtime repo "tdrv/infer_error_subtype_int.c" + // Generic TPB errors (FP_UNDERFLOW, FP_INF, FP_OVERFLOW notifications) (deprecated from Runtime v2.25) + NMETRIC_CW_ID_NERR_GENERIC_TPB_ERR = 219, + // Out-of-bounds access errors during execution, tracking NRT_EXEC_OOB Runtime status NMETRIC_CW_ID_NERR_OOB = 220, + // Collective operations errors leading to execution hangs, tracking NRT_EXEC_HW_ERR_COLLECTIVES Runtime status NMETRIC_CW_ID_NERR_HW_ERR_COLLECTIVES = 221, + // Total count of HBM Unrepairable Uncorrectable hardware errors across the instance NMETRIC_CW_ID_NERR_HW_ERR_HBM_UE = 222, + // Total count of Uncorrectable SRAM errors across the instance NMETRIC_CW_ID_NERR_HW_ERR_NC_UE = 223, + // Total count of DMA abort hardware errors across the instance NMETRIC_CW_ID_NERR_HW_ERR_DMA_ABORT = 224, + // Count of software semaphore errors NMETRIC_CW_ID_NERR_SW_SEMAPHORE_ERROR = 225, + // Count of software event handling errors NMETRIC_CW_ID_NERR_SW_EVENT_ERROR = 226, + // Software partial sum collision errors, tracking NEURON_ISA_TPB_ERROR_TYPE_PSUM_COLLISION TPB notifications NMETRIC_CW_ID_NERR_SW_PSUM_COLLISION = 227, + // Fatal software sequencer errors, tracking NEURON_ISA_TPB_ERROR_TYPE_SEQUENCER_FATAL TPB notifications NMETRIC_CW_ID_NERR_SW_SEQUENCER_FATAL = 228, + // Total count of HBM Repairable Uncorrectable hardware errors across the instance NMETRIC_CW_ID_NERR_HW_ERR_REPAIRABLE_HBM_UE = 229, + // Bitmap indicating enabled features on device (decimal format), aggregated via bitwise OR across all devices NMETRIC_CW_ID_FEATURE_BITMAP = 250, + // Bitmap indicating available sysfs metrics (currently NOT SET), posted on unprocessed cloudwatch id NMETRIC_CW_ID_SYSFS_METRIC_BITMAP = 251, + // Global communication identifier initialized by Collectives on all ranks NMETRIC_CW_ID_DEVICE_CLUSTER_ID = 252, + // Count of interrupt controller software notification queue overflow errors NMETRIC_CW_ID_NERR_SW_NQ_OVERFLOW = 253, }; @@ -157,6 +213,7 @@ static const nmetric_def_t nmetric_defs[] = { NMETRIC_COUNTER_DEF(26, POST_TIME_TICK_1, NMETRIC_CW_ID_NERR_SW_PSUM_COLLISION, NDS_EXT_NC_COUNTER_ERR_SW_PSUM_COLLISION), NMETRIC_COUNTER_DEF(27, POST_TIME_TICK_1, NMETRIC_CW_ID_NERR_SW_SEQUENCER_FATAL, NDS_EXT_NC_COUNTER_ERR_SW_SEQUENCER_FATAL), NMETRIC_COUNTER_DEF(28, POST_TIME_TICK_1, NMETRIC_CW_ID_NERR_HW_ERR_REPAIRABLE_HBM_UE, NDS_EXT_NC_COUNTER_HW_ERR_REPAIRABLE_HBM_UE), + NMETRIC_UTILIZATION_DEF(29, POST_TIME_ALWAYS, NMETRIC_CW_ID_NC_UTILIZATION, NDS_NC_COUNTER_TIME_IN_USE), // bitmap metrics NMETRIC_BITMAP_DEF(0, POST_TIME_TICK_1, NMETRIC_CW_ID_FEATURE_BITMAP, NDS_ND_COUNTER_FEATURE_BITMAP), @@ -172,6 +229,10 @@ static const nmetric_def_t nmetric_defs[] = { NMETRIC_DRIVER_DEF(NMETRIC_DRIVER_METRICS_IDX_AVG_TPB_RESET_TIME_MS, POST_TIME_TICK_1, NMETRIC_CW_ID_AVG_TPB_RESET_TIME_MS), NMETRIC_DRIVER_DEF(NMETRIC_DRIVER_METRICS_IDX_DEVICE_RESET_FAILURE_COUNT, POST_TIME_TICK_1, NMETRIC_CW_ID_DEVICE_RESET_FAILURE_COUNT), NMETRIC_DRIVER_DEF(NMETRIC_DRIVER_METRICS_IDX_TPB_RESET_FAILURE_COUNT, POST_TIME_TICK_1, NMETRIC_CW_ID_TPB_RESET_FAILURE_COUNT), + + // ultraserver metrics + NMETRIC_DRIVER_USERVER_DEF(0, POST_TIME_TICK_0, NMETRIC_CW_ID_ULTRASERVER_MODES_SUPPORTED), + NMETRIC_DRIVER_USERVER_DEF(1, POST_TIME_TICK_0, NMETRIC_CW_ID_ULTRASERVER_MODE), }; static const int nmetric_count = sizeof(nmetric_defs) / sizeof(nmetric_def_t); @@ -299,6 +360,7 @@ static void nmetric_aggregate_nd_counter_entry(struct neuron_device *nd, struct curr_metric->ds_id, &nd->metrics.component_versions[curr_metric->index]); break; + case NMETRIC_TYPE_UTILIZATION: case NMETRIC_TYPE_COUNTER: for (nc_id = 0; nc_id < ndhal->ndhal_address_map.nc_per_device; nc_id++) { if (((1 << nc_id) & ndhal->ndhal_address_map.dev_nc_map) == 0) { @@ -493,6 +555,49 @@ static inline int nmetric_post_version(struct nmetric_versions *versions, const return written_size; } +/** + * Function to post utilization stats from an NDS counter, which requires more transformations than a regular counter post + */ +static int nmetric_post_utilization(struct neuron_device *nd, u64 *curr_metrics, u64 *prev_metrics, + u64 *freed_metrics, const nmetric_def_t *metric, + struct nmetric_cw_metric *dest, int available_size) +{ + int metric_index = metric->index; + u64 crt_metric_value = curr_metrics[metric_index] + freed_metrics[metric_index] - prev_metrics[metric_index]; + u32 elapsed_jiffies = jiffies - nd->metrics.neuron_aggregation.last_logged_slow_tick_jiffies; + u64 nsecs_since_last_post = 0; + + if (elapsed_jiffies == 0) { // Be extra safe to avoid division by zero + return 0; + } + + switch (metric->cw_id) { + // The original crt_metric_value will be the aggregated time each core was executing, e.g. nc1 + nc2 ... ncN in picoseconds. So we need + // to first normalize this value by dividing by the number of cores to get the average duration a NC spent executing on this device. + // We then convert this to nanoseconds and take the ratio of this usage time to the elapsed time. The metric will be posted as + // an int representing the percentage of time the device was being used to execute a NEFF. + case NMETRIC_CW_ID_NC_UTILIZATION: + nsecs_since_last_post = jiffies_to_nsecs(elapsed_jiffies); + crt_metric_value = crt_metric_value / 1000 / ndhal->ndhal_address_map.nc_per_device; + crt_metric_value = (crt_metric_value * 100) / nsecs_since_last_post; + break; + } + + // check if there is enough space in buffer (if there's not, skip, maybe the next one fits) + int expected_len = snprintf(NULL, 0, "%llu", crt_metric_value); + int metric_size = sizeof(struct nmetric_cw_metric) + expected_len; + if (available_size < metric_size) { + return 0; + } + + // save metrics to buffer + dest->id = metric->cw_id; + dest->len = expected_len; + snprintf(dest->data, expected_len + 1, "%llu", crt_metric_value); + + return metric_size; +} + static inline int nmetric_post_counter(u64 *curr_metrics, u64 *prev_metrics, u64 *freed_metrics, const nmetric_def_t *metric, struct nmetric_cw_metric *dest, int available_size) { @@ -619,6 +724,39 @@ static inline int nmetric_post_and_reset_driver_metrics(const nmetric_def_t *dri return nmetric_post_u64(driver_final_metric, metric_value, dest, available_size); } +static inline int nmetric_post_driver_userver_metrics(const nmetric_def_t *metric, struct nmetric_cw_metric *dest, int available_size) +{ + u8 pod_type, pod_id, pod_sz; + enum neuron_ultraserver_mode mode; + u32 modes_supported; + int supported_mode = 0; + int i; + int metric_value = 0; + + // Only post if npe_pod_info is available and succeeds + if (!ndhal->ndhal_npe.npe_pod_info || ndhal->ndhal_npe.npe_pod_info(&pod_type, &pod_id, &pod_sz, &mode, &modes_supported) != 0) { + return 0; + } + + if (pod_type == NEURON_POD_TYPE_NONE) { + return 0; + } + + if (metric->cw_id == NMETRIC_CW_ID_ULTRASERVER_MODES_SUPPORTED) { + for (i = NEURON_ULTRASERVER_MODE_X4; i <= NEURON_ULTRASERVER_MODE_X1; i++) { + if (modes_supported & (1 << i)) { + supported_mode = i; + break; + } + } + metric_value = supported_mode; + } else if (metric->cw_id == NMETRIC_CW_ID_ULTRASERVER_MODE) { + metric_value = mode; + } + + return nmetric_post_u64(metric, metric_value, dest, available_size); +} + /** * nmetric_post_metrics() * @@ -661,6 +799,10 @@ static void nmetric_post_metrics(struct neuron_device *nd, u64 *curr_metrics, u6 case NMETRIC_TYPE_VERSION: data_size += nmetric_post_version(versions, curr_metric, dest, available_size); break; + case NMETRIC_TYPE_UTILIZATION: + data_size += nmetric_post_utilization(nd, curr_metrics, prev_metrics, freed_metrics, + curr_metric, dest, available_size); + break; case NMETRIC_TYPE_COUNTER: case NMETRIC_TYPE_FW_IO_ERR: data_size += nmetric_post_counter(curr_metrics, prev_metrics, freed_metrics, @@ -672,9 +814,12 @@ static void nmetric_post_metrics(struct neuron_device *nd, u64 *curr_metrics, u6 case NMETRIC_TYPE_CONSTANT_U64: data_size += nmetric_post_constant_u64(curr_metric, dest, const_u64_metrics, freed_const_u64_metrics, available_size); break; - case NMETRIC_TYPE_DRIVER: + case NMETRIC_TYPE_DRIVER_RESET: data_size += nmetric_post_and_reset_driver_metrics(curr_metric, dest, &nd->metrics.driver_metrics, available_size); break; + case NMETRIC_TYPE_DRIVER_USERVER: + data_size += nmetric_post_driver_userver_metrics(curr_metric, dest, available_size); + break; } } @@ -729,6 +874,7 @@ static void nmetric_cache_shared_bufs(struct neuron_device *nd, u64 *freed_metri memset(&nd->metrics.component_versions[curr_metric->index], 0, sizeof(struct nmetric_versions)); break; case NMETRIC_TYPE_COUNTER: + case NMETRIC_TYPE_UTILIZATION: case NMETRIC_TYPE_FW_IO_ERR: nd->metrics.ds_freed_metrics_buf[curr_metric->index] = 0; break; @@ -772,6 +918,7 @@ static void nmetric_start_new_session(struct neuron_device *nd, u64 *curr_metric if (!nmetric_check_post_tick(tick, curr_metric)) continue; switch(curr_metric->type) { + case NMETRIC_TYPE_UTILIZATION: case NMETRIC_TYPE_COUNTER: prev_metrics[curr_metric->index] = curr_metrics[curr_metric->index]; break; @@ -874,6 +1021,11 @@ static int nmetric_thread_fn(void *arg) break; }; + // do not attempt to post metrics if the device isn't operational + if (nd->device_state != NEURON_DEVICE_STATE_READY) { + continue; + } + // There are some metrics that we sample at a relatively higher frequency. Do that here. nmetric_sample_high_freq(nd); @@ -904,6 +1056,7 @@ static int nmetric_thread_fn(void *arg) tick = (tick + 1) % POST_TICK_COUNT; } nd->metrics.neuron_aggregation.last_logged_slow_tick = current_slow_tick; + nd->metrics.neuron_aggregation.last_logged_slow_tick_jiffies = jiffies; } } diff --git a/neuron_metrics.h b/neuron_metrics.h index a0df6e4..60fbb61 100644 --- a/neuron_metrics.h +++ b/neuron_metrics.h @@ -26,7 +26,9 @@ #define NMETRIC_TYPE_FW_IO_ERR 0x3 #define NMETRIC_TYPE_BITMAP 0x4 #define NMETRIC_TYPE_CONSTANT_U64 0x5 -#define NMETRIC_TYPE_DRIVER 0x6 +#define NMETRIC_TYPE_DRIVER_RESET 0x6 +#define NMETRIC_TYPE_DRIVER_USERVER 0x7 +#define NMETRIC_TYPE_UTILIZATION 0x8 #define NMETRIC_FLAG_VERS_ALLOW_TYPE (1) @@ -73,7 +75,7 @@ struct nmetric_driver_metrics { #define NMETRIC_CONSTANTS_COUNT 3 // Number of metrics of type NMETRIC_TYPE_COUNTER + the special case (type NMETRIC_TYPE_FW_IO_ERR) -#define NMETRIC_COUNTER_COUNT 29 +#define NMETRIC_COUNTER_COUNT 30 // Number of metrics of type NMETRIC_TYPE_BITMAP #define NMETRIC_BITMAP_COUNT 1 @@ -97,9 +99,11 @@ typedef struct { #define NMETRIC_CONSTANT_DEF(idx, tick, cw_id) NMETRIC_DEF(idx, NMETRIC_TYPE_CONSTANT, 1, tick, cw_id, 0xFF, 0) #define NMETRIC_VERSION_DEF(idx, tick, cw_id, ds_id, flags) NMETRIC_DEF(idx, NMETRIC_TYPE_VERSION, NEURON_METRICS_VERSION_CAPACITY, tick, cw_id, ds_id, flags) #define NMETRIC_COUNTER_DEF(idx, tick, cw_id, ds_id) NMETRIC_DEF(idx, NMETRIC_TYPE_COUNTER, 1, tick, cw_id, ds_id, 0) +#define NMETRIC_UTILIZATION_DEF(idx, tick, cw_id, ds_id) NMETRIC_DEF(idx, NMETRIC_TYPE_UTILIZATION, 1, tick, cw_id, ds_id, 0) #define NMETRIC_BITMAP_DEF(idx, tick, cw_id, ds_id) NMETRIC_DEF(idx, NMETRIC_TYPE_BITMAP, 1, tick, cw_id, ds_id, 0) #define NMETRIC_CONSTANT_U64(idx, tick, cw_id, ds_id, flags) NMETRIC_DEF(idx, NMETRIC_TYPE_CONSTANT_U64, 1, tick, cw_id, ds_id, flags) -#define NMETRIC_DRIVER_DEF(idx, tick, cw_id) NMETRIC_DEF(idx, NMETRIC_TYPE_DRIVER, 1, tick, cw_id, 0xFF, 0) +#define NMETRIC_DRIVER_DEF(idx, tick, cw_id) NMETRIC_DEF(idx, NMETRIC_TYPE_DRIVER_RESET, 1, tick, cw_id, 0xFF, 0) +#define NMETRIC_DRIVER_USERVER_DEF(idx, tick, cw_id) NMETRIC_DEF(idx, NMETRIC_TYPE_DRIVER_USERVER, 1, tick, cw_id, 0xFF, 0) struct nmetric_versions { u32 version_usage_count[NEURON_METRICS_VERSION_MAX_CAPACITY]; @@ -117,7 +121,8 @@ struct nmetric_aggregation_thread { struct task_struct *thread; // aggregation thread that sends metrics every ~5 minutes wait_queue_head_t wait_queue; volatile enum nmetric_state state; - u64 last_logged_slow_tick; // when the last metric request was posted + u64 last_logged_slow_tick; // Tick count from the start to when the last metric was posted + u64 last_logged_slow_tick_jiffies; // Last time we posted metrics in jiffies u64 curr[NMETRIC_COUNTER_COUNT]; // metrics for the current session so far u64 prev[NMETRIC_COUNTER_COUNT]; // recorded metrics from the last post u64 freed[NMETRIC_COUNTER_COUNT]; // cache holding metrics that were freed before the posting period was reached diff --git a/neuron_mmap.c b/neuron_mmap.c index 38d3519..6a7dda8 100644 --- a/neuron_mmap.c +++ b/neuron_mmap.c @@ -9,6 +9,7 @@ #include #include #include "neuron_mmap.h" +#include "neuron_pci.h" #include "neuron_device.h" #include "neuron_dhal.h" @@ -126,7 +127,7 @@ static void nmmap_remove_node_rbtree(struct rb_root *root, struct nmmap_node *mm rb_erase(&mmap->node, root); } -void nmmap_create_node(struct neuron_device *nd, void *va, pid_t pid, u64 size, u64 pa) +void nmmap_create_node(struct neuron_device *nd, void *va, pid_t pid, u64 size, u64 pa, u64 neuron_pa) { // Now insert the va in rb tree int slot; @@ -150,6 +151,7 @@ void nmmap_create_node(struct neuron_device *nd, void *va, pid_t pid, u64 size, mmap->device_index = nd->device_index; mmap->free_callback = NULL; mmap->dmabuf_ref_cnt = 0; + mmap->neuron_pa = neuron_pa; write_lock(&nd->mpset.rbmmaplock); nmmap_insert_node_rbtree(&nd->mpset.mmap_root[slot], mmap); write_unlock(&nd->mpset.rbmmaplock); @@ -347,7 +349,7 @@ static int nmmap_dm_mc(struct neuron_device *nd, struct vm_area_struct *vma, str // Insert the virtual address into tree so that we can do search using VA nmmap_create_node(nd, (void *)vma->vm_start, task_tgid_nr(current), - (u64)(vma->vm_end - vma->vm_start), (bar4_offset + nd->npdev.bar4_pa)); + (u64)(vma->vm_end - vma->vm_start), (bar4_offset + nd->npdev.bar4_pa), pa); // set the vm ops to cleanup on unmap vma->vm_private_data = (void *)nd; @@ -404,7 +406,7 @@ static int nmap_dm_special(struct neuron_device *nd, struct vm_area_struct *vma) // Insert the virtual address into tree so that we can do search using VA nmmap_create_node(nd, (void *)vma->vm_start, task_tgid_nr(current), - size, (offset + bar_pa)); + size, (offset + bar_pa), (u64)-1); // set the vm ops to cleanup on unmap vma->vm_private_data = (void *)nd; @@ -457,9 +459,54 @@ int nmmap_mem(struct neuron_device *nd, struct vm_area_struct *vma) // Insert the virtual address into tree so that we can do search using VA nmmap_create_node(nd, (void *)vma->vm_start, task_tgid_nr(current), - (u64)(vma->vm_end - vma->vm_start), mc->pa); + (u64)(vma->vm_end - vma->vm_start), mc->pa, (u64)-1); // set the vm ops to cleanup on unmap vma->vm_private_data = (void *)nd; vma->vm_ops = &nmmap_dm_vm_ops; return 0; } + +/** + * slightly modified version of neuron_p2p_register_and_get_pa(), used to find Neuron device and its HBM index that VA + * pointing at + * + */ +int nmmap_get_va_placement(void *va, int *device_index, int *hbm_index) +{ + int i, hbm; + struct neuron_device *nd; + u64 hbm_pa = (u64)-1; + + for (i = 0; i < MAX_NEURON_DEVICE_COUNT; i++) { + nd = neuron_pci_get_device(i); + if (!nd) { + continue; + } + write_lock(&nd->mpset.rbmmaplock); // TODO read_lock + struct nmmap_node *mmap = nmmap_search_va(nd, va); + if (mmap != NULL) { + // note that we just want to use PA to identify HBM index, we don't need to + // adjust the offset if VA is pointing to the middle of mmap'ed region since + // it's not possible to have a single mmap that crosses to another HBM + hbm_pa = mmap->neuron_pa; + *device_index = i; + } + write_unlock(&nd->mpset.rbmmaplock); + if (hbm_pa != (u64)-1) { // found it, now find HBM + for (hbm = 0; hbm < ndhal->ndhal_address_map.dram_channels; hbm++) { + u64 start = ndhal->ndhal_mpset.device_dram_effective_base_addr[hbm]; + u64 end = ndhal->ndhal_mpset.device_dram_end_addr[hbm]; + if (hbm_pa >= start && hbm_pa < end) { + *hbm_index = hbm; + return 0; + } + } + // if we got here something is wrong, this is a valid VA but PA does not match any of the HBMs + pr_err("VA belongs to device: %d but PA %llx does not match any of the HBMs", i, hbm_pa); + return -ENXIO; + } + } + return -ENXIO; +} + + diff --git a/neuron_mmap.h b/neuron_mmap.h index 51fa87a..190e753 100644 --- a/neuron_mmap.h +++ b/neuron_mmap.h @@ -37,6 +37,7 @@ struct nmmap_node { pid_t pid; //pid that map'd this device memory u32 device_index; //device index to which the memory belongs to u64 size; //size of the device memory + u64 neuron_pa; // physical address in Neuron address space (only set when mmap'ed host allocated Neuron HBM) //call back routine that will be called when this device memory is freed //this will be used by efa or other utilities that are using this memory @@ -77,9 +78,10 @@ struct neuron_dm_special_mmap_ent { * @va: mapped virtual address * @pid_t: pid that has this virtual address * @size: size of the mapped memory - * @pa: physical address of the device memory + * @pa: physical address of the device memory in host address space + * @neuron_pa: physical address in Neuron address space */ -void nmmap_create_node(struct neuron_device *nd, void *va, pid_t pid, u64 size, u64 pa); +void nmmap_create_node(struct neuron_device *nd, void *va, pid_t pid, u64 size, u64 pa, u64 neuron_pa); /** * nmmap_delete_node - Deletes all mmap nodes for the current PID. If external drivers have any free function regsistered @@ -135,4 +137,15 @@ struct nmmap_node *nmmap_search_va(struct neuron_device *nd, void *va); */ struct mem_chunk *nmmap_get_mc_from_pa(struct neuron_device *nd, phys_addr_t pa); + +/** + * nmmap_get_va_placement() - returns Neuron device and HBM index that VA + * was allocated from + * + * @va: virtual address of Neuron memory + * @device_index: Neuron device the memory was allocated from + * @hbm_index: the index of the HBM the memory was allocated from + */ + +int nmmap_get_va_placement(void *va, int *device_index, int *hbm_index); #endif diff --git a/neuron_module.c b/neuron_module.c index e6eb69a..0b257b3 100644 --- a/neuron_module.c +++ b/neuron_module.c @@ -18,13 +18,13 @@ #include "neuron_cdev.h" #include "neuron_pci.h" -MODULE_DESCRIPTION("Neuron Driver, built from SHA: 5ebb67d2e5be7052dcf1774cff03c69ab40d21ee"); +MODULE_DESCRIPTION("Neuron Driver, built from SHA: 6670442319042643165ab7986e5184496ea4407c"); MODULE_LICENSE("GPL"); -MODULE_VERSION("2.25.4.0"); +MODULE_VERSION("2.26.5.0"); MODULE_ALIAS("pci:v00001d0fd00007064sv*sd*bc*sc*i*"); -const char driver_version[] = "2.25.4.0"; -const char driver_revision[] = "5ebb67d2e5be7052dcf1774cff03c69ab40d21ee"; +const char driver_version[] = "2.26.5.0"; +const char driver_revision[] = "6670442319042643165ab7986e5184496ea4407c"; #ifdef CONFIG_FAULT_INJECTION diff --git a/neuron_power.c b/neuron_power.c index 47f2d25..4fc0fce 100644 --- a/neuron_power.c +++ b/neuron_power.c @@ -58,7 +58,7 @@ bool npower_enabled_in_fw(struct neuron_device *nd) } // Just read the API version from firmware. We could try to be smart here and cache - // this, but we need to protect ourselves from rollbacks in the Pacific version or + // this, but we need to protect ourselves from rollbacks in the firmware version or // other changes. Plus, this is just a simple MMIO read, so it's cheap. ret = fw_io_api_version_read(nd->npdev.bar0, &api_version_num); if (ret != 0) { diff --git a/neuron_reset.c b/neuron_reset.c index 2c908e3..1794ba4 100644 --- a/neuron_reset.c +++ b/neuron_reset.c @@ -114,9 +114,10 @@ static int nr_reset_thread_fn(void *arg) ret = ndhal->ndhal_reset.nr_initiate_reset(nd, nc_map); if (ret) { + char *reason = (ret == -EINTR) ? "interrupted by driver unload\n" : "failed\n"; nr_call_post_reset_config(nd, nc_map, false); ITER_COAL_REQS(request_iter, first_request, last_request, - pr_info("nd%d: reset request %u failed\n", nd->device_index, request_iter->request_id);) + pr_info("nd%d: reset request %u %s\n", nd->device_index, request_iter->request_id, reason);) state = NEURON_RESET_STATE_FAILED; nsysfsmetric_inc_reset_fail_count(nd); } else { @@ -399,7 +400,7 @@ int nr_initiate_reset_via_fw(struct neuron_device *nd, uint32_t nc_map, uint32_t * the device completes the reset. Wait before next polling cycle. */ if (nr_msleep_stoppable(nd, ndhal->ndhal_reset.reset_poll_interval)) { - return -1; + return -EINTR; } /* Poll to check if firmware has acknowledged the reset request */ @@ -409,7 +410,7 @@ int nr_initiate_reset_via_fw(struct neuron_device *nd, uint32_t nc_map, uint32_t if (reset_time > 0) { nmetric_set_reset_time_metrics(nd, reset_time, is_device_reset); } else { - return -1; + pr_warn_once("unexpected reset time value of %lldms", reset_time); } return 0; } @@ -428,5 +429,5 @@ int nr_initiate_reset_via_fw(struct neuron_device *nd, uint32_t nc_map, uint32_t /* Timeout reached - reset failed */ nmetric_increment_reset_failure_count(nd, is_device_reset); // Record the reset failure in metrics - return -1; + return -ETIMEDOUT; } diff --git a/neuron_ring.c b/neuron_ring.c index 165e0c9..0c8420c 100644 --- a/neuron_ring.c +++ b/neuron_ring.c @@ -26,6 +26,10 @@ int dev_nc_map = 1; module_param(dev_nc_map, int, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); MODULE_PARM_DESC(dev_nc_map, "Map of active neuron cores"); +int dma_teardown_on_exit = 1; +module_param(dma_teardown_on_exit, int, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP); +MODULE_PARM_DESC(dma_teardown_on_exit, "Reset the DMA state on user process exit"); + // forward static void ndmar_h2t_ring_free(struct ndma_ring *ring); @@ -208,9 +212,14 @@ void ndmar_handle_process_exit(struct neuron_device *nd, pid_t pid) { int ret, eng_id, qid; + // Skip resetting the DMAs on user process exit if module param is disabled + if (dma_teardown_on_exit == 0) { + return; + } + struct mem_chunk *mc = nd->ndma_q_dummy_mc; const int desc_count = NDMA_QUEUE_DUMMY_RING_DESC_COUNT; - for (eng_id = 0; eng_id < ndhal->ndhal_address_map.dma_eng_per_nd; eng_id++) { + for (eng_id = 0; eng_id < ndhal->ndhal_address_map.seng_dma_eng_per_nd; eng_id++) { for (qid = 0; qid < DMA_MAX_Q_MAX; qid++) { struct ndma_eng *eng = ndmar_acquire_engine_nl(nd, eng_id); struct ndma_queue *queue; @@ -283,8 +292,14 @@ int ndmar_ack_completed(struct neuron_device *nd, u32 eng_id, u32 qid, u32 count goto done; } - udma_cdesc_ack(rxq, count); - udma_cdesc_ack(txq, count); + ret = udma_cdesc_ack(rxq, count); + if (ret) { + goto done; + } + ret = udma_cdesc_ack(txq, count); + if (ret) { + goto done; + } done: ndmar_release_engine(eng); diff --git a/neuron_sysfs_metrics.c b/neuron_sysfs_metrics.c index 72f5943..2fd72b3 100644 --- a/neuron_sysfs_metrics.c +++ b/neuron_sysfs_metrics.c @@ -814,26 +814,6 @@ static int nsysfsmetric_init_and_add_root_node(struct nsysfsmetric_metrics *metr return 0; } -static void nsysfsmetric_destroy_nodes(struct nsysfsmetric_node *node) -{ - int i; - - if (node->child_node_num == 0) { - return; - } - - mutex_lock(&node->lock); - - for (i = 0; i < node->child_node_num; i++) { - struct nsysfsmetric_node *child_node = node->child_nodes[i]; - nsysfsmetric_destroy_nodes(child_node); // destroy node's children recursively - kobject_put(&child_node->kobj); - } - node->child_node_num = 0; - - mutex_unlock(&node->lock); -} - static int nsysfsmetric_get_metric_id(int metric_id_category, int id) { int metric_id; @@ -975,9 +955,47 @@ int nsysfsmetric_register(struct neuron_device *nd, struct kobject *neuron_devic return 0; } +static void nsysfsmetric_destroy_counters(struct nsysfsmetric_metrics *metrics) +{ + // cleanup sysfs counters + // zero them out and set the sysfs node they are pointing to as NULL + // preventing use after free scenarios caused by attempts to access + // sysfs after it is destroyed. + memset(metrics->nrt_metrics, 0, sizeof(metrics->nrt_metrics)); + memset(metrics->nrt_nd_metrics, 0, sizeof(metrics->nrt_nd_metrics)); + memset(metrics->dev_metrics, 0, sizeof(metrics->dev_metrics)); +} + +static void nsysfsmetric_destroy_nodes(struct nsysfsmetric_node *node, bool acquire_lock) +{ + int i; + + if (node->child_node_num == 0) { + return; + } + + if (acquire_lock == true) { + mutex_lock(&node->lock); + } + + for (i = 0; i < node->child_node_num; i++) { + struct nsysfsmetric_node *child_node = node->child_nodes[i]; + nsysfsmetric_destroy_nodes(child_node, true); // destroy node's children recursively + kobject_put(&child_node->kobj); + } + node->child_node_num = 0; + + if (acquire_lock == true) { + mutex_unlock(&node->lock); + } +} + void nsysfsmetric_destroy(struct neuron_device *nd) { - nsysfsmetric_destroy_nodes(&nd->sysfs_metrics.root); + mutex_lock(&nd->sysfs_metrics.root.lock); + nsysfsmetric_destroy_counters(&nd->sysfs_metrics); + nsysfsmetric_destroy_nodes(&nd->sysfs_metrics.root, false); + mutex_unlock(&nd->sysfs_metrics.root.lock); } int nsysfsmetric_init_and_add_dynamic_counter_nodes(struct neuron_device *nd, uint64_t ds_val) diff --git a/udma/udma.h b/udma/udma.h index ce76549..484b82e 100644 --- a/udma/udma.h +++ b/udma/udma.h @@ -232,9 +232,8 @@ int udma_q_init(struct udma *udma, u32 qid, struct udma_q_params *q_params); * * @udma_q: udma queue data structure * - * Return: 0 on success, a negative error code otherwise. */ -int udma_q_pause(struct udma_q *udma_q); +void udma_q_pause(struct udma_q *udma_q); /** * udma_q_handle_get() - Return a pointer to a queue date structure. @@ -317,7 +316,10 @@ static inline u32 udma_available_get(struct udma_q *udma_q) // wraparound feature (h2t only at the moment). // Due to the wraparound logic using bitwise and as mod, // we need to check size is power of 2. - BUG_ON(IS_POWER_OF_TWO(udma_q->size) == false); + if (IS_POWER_OF_TWO(udma_q->size) == false) { + pr_err("Expected UDMA queue size to be power of 2"); + return 0; // No descriptors can be submitted + } u32 tmp = udma_q->next_cdesc_idx - (udma_q->next_desc_idx + UDMA_MAX_NUM_CDESC_PER_CACHE_LINE); tmp &= udma_q->size_mask; @@ -337,7 +339,6 @@ static inline union udma_desc *udma_desc_get(struct udma_q *udma_q) union udma_desc *desc; u32 next_desc_idx; - BUG_ON(udma_q == NULL); /* when setting up the queue caller might pass NULL for the queue base address. That means the caller is responsible for attaching the ring and managing it somehow. Should not be calling @@ -357,7 +358,10 @@ static inline union udma_desc *udma_desc_get(struct udma_q *udma_q) // wraparound feature (h2t only at the moment). // Due to the wraparound logic using bitwise and as mod, // we need to check size is power of 2. - BUG_ON(IS_POWER_OF_TWO(udma_q->size) == false); + if (IS_POWER_OF_TWO(udma_q->size) == false) { + pr_err("Expected UDMA queue size to be power of 2"); + return NULL; + } /* if reached end of queue, wrap around */ udma_q->next_desc_idx = next_desc_idx & udma_q->size_mask; @@ -378,8 +382,6 @@ static inline u32 udma_ring_id_get(struct udma_q *udma_q) { u32 ring_id; - BUG_ON(udma_q == NULL); - ring_id = udma_q->desc_ring_id; /* calculate the ring id of the next desc */ @@ -395,22 +397,26 @@ static inline u32 udma_ring_id_get(struct udma_q *udma_q) * @udma_q: queue handle * @num: number of descriptors to add to the queues ring */ -void udma_desc_action_add(struct udma_q *udma_q, u32 num); +int udma_desc_action_add(struct udma_q *udma_q, u32 num); /** * udma_cdesc_ack() - Acknowledge processing completion descriptors * * @udma_q: udma queue handle * @num: number of descriptors to acknowledge + * + * Return: 0 on success, other values for error */ -static inline void udma_cdesc_ack(struct udma_q *udma_q, u32 num) +static inline int udma_cdesc_ack(struct udma_q *udma_q, u32 num) { - BUG_ON(udma_q == NULL); // This function will only run on dma queues that use the // wraparound feature (h2t only at the moment). // Due to the wraparound logic using bitwise and as mod, // we need to check size is power of 2. - BUG_ON(IS_POWER_OF_TWO(udma_q->size) == false); + if (IS_POWER_OF_TWO(udma_q->size) == false) { + pr_err("Expected UDMA queue size to be power of 2"); + return -1; + } u32 cdesc_idx = udma_q->next_cdesc_idx; u32 next_cdesc_idx = (cdesc_idx + num) & udma_q->size_mask; @@ -419,6 +425,7 @@ static inline void udma_cdesc_ack(struct udma_q *udma_q, u32 num) cdesc_idx = udma_q->next_cdesc_idx; next_cdesc_idx = (cdesc_idx + num) & udma_q->size_mask; } + return 0; } #define UDMA_M2S_MAX_ALLOWED_DESCS_PER_PACKET_V4 128 diff --git a/udma/udma_m2m.c b/udma/udma_m2m.c index 0b0a4eb..37372d5 100644 --- a/udma/udma_m2m.c +++ b/udma/udma_m2m.c @@ -149,7 +149,6 @@ int udma_m2m_init_queue(struct udma *udma, int qid, u32 eng_id, u32 m2s_ring_siz int ret; struct udma_q_params qp; - BUG_ON(udma == NULL); /* the h/w only supports rings base addr and end addr that are 256 byte aligned, check both m2s & s2m */ if (m2s_ring != NULL && HAS_ALIGNMENT(m2s_ring->addr, UDMA_QUEUE_ADDR_BYTE_ALIGNMENT) == false) { pr_err("invalid m2s ring alignment. Start addr must be %u byte aligned. base addr: 0x%llx\n", UDMA_QUEUE_ADDR_BYTE_ALIGNMENT, m2s_ring->addr); @@ -370,7 +369,6 @@ int udma_m2m_copy_prepare_one(struct udma *udma, u32 qid, dma_addr_t s_addr, dma struct udma_q *rxq; int ret; - BUG_ON(udma == NULL); if (qid >= udma->num_of_queues) { return -1; } @@ -401,8 +399,13 @@ int udma_m2m_copy_prepare_one(struct udma *udma, u32 qid, dma_addr_t s_addr, dma union udma_desc *rx_desc = udma_desc_get(rxq); union udma_desc *tx_desc = udma_desc_get(txq); + + if (rx_desc == NULL || tx_desc == NULL) { + return -EINVAL; + } + return udma_m2m_build_descriptor(rx_desc, tx_desc, udma_ring_id_get(rxq), - udma_ring_id_get(txq), s_addr, d_addr, size, barrier_type, set_dst_int); + udma_ring_id_get(txq), s_addr, d_addr, size, barrier_type, set_dst_int); } /* Start DMA data transfer for m2s_count/s2m_count number or descriptors. @@ -415,8 +418,6 @@ int udma_m2m_copy_start(struct udma *udma, u32 qid, u32 m2s_count, u32 s2m_count struct udma_q *rxq; int ret; - BUG_ON(udma == NULL); - if (qid >= udma->num_of_queues) { return -1; } @@ -435,9 +436,15 @@ int udma_m2m_copy_start(struct udma *udma, u32 qid, u32 m2s_count, u32 s2m_count if (s2m_count > rxq->size) { return -1; } - udma_desc_action_add(rxq, s2m_count); + ret = udma_desc_action_add(rxq, s2m_count); + if (ret) { + return ret; + } if (m2s_count > 0) { - udma_desc_action_add(txq, m2s_count); + ret = udma_desc_action_add(txq, m2s_count); + if (ret) { + return ret; + } } return ret; } diff --git a/udma/udma_main.c b/udma/udma_main.c index 710da33..7390c6a 100644 --- a/udma/udma_main.c +++ b/udma/udma_main.c @@ -233,7 +233,9 @@ static int udma_q_set_pointers(struct udma_q *udma_q) { /* reset the descriptors ring pointers */ - BUG_ON((ADDR_LOW(udma_q->desc_phy_base) & ~UDMA_M2S_Q_TDRBP_LOW_ADDR_MASK)); + if((ADDR_LOW(udma_q->desc_phy_base) & ~UDMA_M2S_Q_TDRBP_LOW_ADDR_MASK)) { + return -1; + } reg_write32(&udma_q->q_regs->rings.drbp_low, ADDR_LOW(udma_q->desc_phy_base)); reg_write32(&udma_q->q_regs->rings.drbp_high, ADDR_HIGH(udma_q->desc_phy_base)); @@ -245,7 +247,9 @@ static int udma_q_set_pointers(struct udma_q *udma_q) } else { /* reset the completion descriptors ring pointers */ /* assert completion base address aligned. */ - BUG_ON((ADDR_LOW(udma_q->cdesc_phy_base) & ~UDMA_M2S_Q_TCRBP_LOW_ADDR_MASK)); + if ((ADDR_LOW(udma_q->cdesc_phy_base) & ~UDMA_M2S_Q_TCRBP_LOW_ADDR_MASK)) { + return -1; + } reg_write32(&udma_q->q_regs->rings.crbp_low, ADDR_LOW(udma_q->cdesc_phy_base)); reg_write32(&udma_q->q_regs->rings.crbp_high, ADDR_HIGH(udma_q->cdesc_phy_base)); } @@ -259,8 +263,6 @@ static void udma_q_enable(struct udma_q *udma_q, int enable) { u32 reg; - BUG_ON(udma_q == NULL); - reg = udma_q->cfg; if (enable) { reg |= (UDMA_M2S_Q_CFG_EN_PREF | UDMA_M2S_Q_CFG_EN_SCHEDULING); @@ -278,7 +280,6 @@ static void udma_q_enable(struct udma_q *udma_q, int enable) static int udma_handle_init_aux(struct udma *udma, struct udma_params *udma_params) { int i; - /* note, V1 hardware uses DMA rev4, no need to support other version */ udma->rev_id = UDMA_REV_ID_4; udma->num_of_queues_max = DMA_MAX_Q_V4; @@ -336,9 +337,6 @@ int udma_init(struct udma *udma, struct udma_params *udma_params) int ret; u32 val; - BUG_ON(udma == NULL); - BUG_ON(udma_params == NULL); - ret = udma_handle_init_aux(udma, udma_params); if (ret) return ret; @@ -371,8 +369,9 @@ static int udma_q_init_validate(struct udma *udma, u32 qid, struct udma_q_params { struct udma_q *udma_q; - BUG_ON(udma == NULL); - BUG_ON(q_params == NULL); + if (udma == NULL || q_params == NULL) { + return -EINVAL; + } if (qid >= udma->num_of_queues) { pr_err("invalid queue id (%d)\n", qid); @@ -405,10 +404,9 @@ static int udma_q_init_validate(struct udma *udma, u32 qid, struct udma_q_params return 0; } -int udma_q_pause(struct udma_q *udma_q) +void udma_q_pause(struct udma_q *udma_q) { udma_q_enable(udma_q, 0); - return 0; } /* @@ -418,7 +416,9 @@ static int udma_q_reset(struct udma_q *udma_q) { u32 __iomem *q_sw_ctrl_reg; - BUG_ON(udma_q->cdesc_size != 0); + if (udma_q->cdesc_size != 0) { + return -1; + } udma_q_pause(udma_q); @@ -435,10 +435,10 @@ static int udma_q_reset(struct udma_q *udma_q) /** Initializes the udma queue data structure. */ -static void udma_q_init_internal(struct udma *udma, u32 qid, struct udma_q_params *q_params) +static int udma_q_init_internal(struct udma *udma, u32 qid, struct udma_q_params *q_params) { struct udma_q *udma_q; - + int ret = 0; udma_q = (q_params->type == UDMA_TX) ? &udma->udma_q_m2s[qid] : &udma->udma_q_s2m[qid]; udma_q->type = q_params->type; udma_q->adapter_rev_id = q_params->adapter_rev_id; @@ -481,12 +481,20 @@ static void udma_q_init_internal(struct udma *udma, u32 qid, struct udma_q_param /* clear all queue ptrs */ - udma_q_reset(udma_q); + ret = udma_q_reset(udma_q); + if (ret) { + return ret; + } /* reset the queue pointers */ - udma_q_set_pointers(udma_q); + ret = udma_q_set_pointers(udma_q); + if (ret) { + return ret; + } udma_q_enable(udma_q, 1); + + return ret; } /** Validates and Initializes the udma queue data structure and hardware. @@ -498,7 +506,9 @@ int udma_q_init(struct udma *udma, u32 qid, struct udma_q_params *q_params) ret = udma_q_init_validate(udma, qid, q_params); if (ret) return ret; - udma_q_init_internal(udma, qid, q_params); + ret = udma_q_init_internal(udma, qid, q_params); + if (ret) + return ret; return 0; } @@ -548,8 +558,6 @@ int udma_state_set(struct udma *udma, enum udma_state state) { u32 reg; - BUG_ON(udma == NULL); - reg = 0; switch (state) { case UDMA_DISABLE: @@ -589,8 +597,6 @@ static bool udma_s2m_stream_status_get(struct udma *udma) bool queue_stream_status_valid = false; bool stream_status = true; - BUG_ON(udma == NULL); - reg_read32(&udma->udma_regs_s2m->s2m.stream_cfg, &stream_cfg); stream_cfg &= UDMA_S2M_STREAM_FLUSH; @@ -691,14 +697,16 @@ enum udma_state udma_state_get(struct udma *udma, enum udma_type type) } /* Increment tail pointer of a DMA queue, that starts data transfer by the queue */ -void udma_desc_action_add(struct udma_q *udma_q, u32 num) +int udma_desc_action_add(struct udma_q *udma_q, u32 num) { u32 __iomem *addr; - BUG_ON(udma_q == NULL); - BUG_ON((num == 0) || (num > udma_q->size)); + if ((num == 0) || (num > udma_q->size)) { + return -1; + } addr = &udma_q->q_regs->rings.drtp_inc; mb(); // to make sure data written to the descriptors will be visible to the DMA reg_write32(addr, num); + return 0; } diff --git a/v2/address_map.h b/v2/address_map.h index f079e7d..b0309a2 100644 --- a/v2/address_map.h +++ b/v2/address_map.h @@ -48,7 +48,8 @@ // Number of DMA queues in each engine #define V2_DMA_QUEUE_PER_ENG 16 -#define V2_NUM_DMA_ENG_PER_DEVICE (V2_NC_PER_DEVICE * V2_DMA_ENG_PER_NC) + 2 +#define V2_NUM_H2D_DMA_PER_DEVICE 2 +#define V2_NUM_DMA_ENG_PER_DEVICE (V2_NC_PER_DEVICE * V2_DMA_ENG_PER_NC) + V2_NUM_H2D_DMA_PER_DEVICE #define V2_MAX_DMA_RINGS 16 // Number of TPB engines per NC diff --git a/v2/neuron_dhal_v2.c b/v2/neuron_dhal_v2.c index 465c811..d196952 100644 --- a/v2/neuron_dhal_v2.c +++ b/v2/neuron_dhal_v2.c @@ -900,10 +900,11 @@ static void nsysfsmetric_get_hbm_error_count_v2(struct neuron_device *nd, bool repairable, uint32_t *err_count) { + uint32_t repairable_count; if (repairable) { *err_count = 0; } else { - *err_count = fw_io_get_total_uecc_err_count(nd->npdev.bar0); + fw_io_get_total_ecc_err_counts(nd->npdev.bar0, err_count, &repairable_count); } } @@ -1727,7 +1728,8 @@ int ndhal_register_funcs_v2(void) { if (narch_is_qemu()) { ndhal->ndhal_reset.nr_initiate_reset = nr_initiate_reset_v2_qemu; ndhal->ndhal_reset.nr_wait_for_reset_completion = nr_wait_for_reset_completion_v2_qemu; - ndhal->ndhal_address_map.dma_eng_per_nd = V2_NC_PER_DEVICE * V2_DMA_ENG_PER_NC; + ndhal->ndhal_address_map.seng_dma_eng_per_nd = V2_NC_PER_DEVICE * V2_DMA_ENG_PER_NC; + ndhal->ndhal_address_map.h2d_dma_eng_per_nd = V2_NUM_H2D_DMA_PER_DEVICE; ndhal->ndhal_reg_access.reg_read32_array = reg_read32_array_v2_qemu_emu; ndhal->ndhal_pci.apb_bar = 2; ndhal->ndhal_ndma.ndma_get_wait_for_completion_time = ndma_get_wait_for_completion_time_v2_qemu; @@ -1735,7 +1737,8 @@ int ndhal_register_funcs_v2(void) { ndhal->ndhal_reset.retry_count *= 1000; // wait longer on the emulator ndhal->ndhal_reset.nr_initiate_reset = nr_initiate_reset_v2_emu; ndhal->ndhal_reset.nr_wait_for_reset_completion = nr_wait_for_reset_completion_v2_emu; - ndhal->ndhal_address_map.dma_eng_per_nd = nc_per_dev_param * V2_DMA_ENG_PER_NC; + ndhal->ndhal_address_map.seng_dma_eng_per_nd = nc_per_dev_param * V2_DMA_ENG_PER_NC; + ndhal->ndhal_address_map.h2d_dma_eng_per_nd = nc_per_dev_param; ndhal->ndhal_address_map.nc_per_device = nc_per_dev_param; ndhal->ndhal_address_map.dev_nc_map = dev_nc_map; ndhal->ndhal_reg_access.reg_read32_array = reg_read32_array_v2_qemu_emu; @@ -1744,7 +1747,8 @@ int ndhal_register_funcs_v2(void) { } else { ndhal->ndhal_reset.nr_initiate_reset = nr_initiate_reset_v2; ndhal->ndhal_reset.nr_wait_for_reset_completion = nr_wait_for_reset_completion_v2; - ndhal->ndhal_address_map.dma_eng_per_nd = V2_NC_PER_DEVICE * V2_DMA_ENG_PER_NC; + ndhal->ndhal_address_map.seng_dma_eng_per_nd = V2_NC_PER_DEVICE * V2_DMA_ENG_PER_NC; + ndhal->ndhal_address_map.h2d_dma_eng_per_nd = V2_NUM_H2D_DMA_PER_DEVICE; ndhal->ndhal_reg_access.reg_read32_array = reg_read32_array_v2; ndhal->ndhal_pci.apb_bar = 0; } diff --git a/v3/neuron_dhal_v3.c b/v3/neuron_dhal_v3.c index c6682b4..52e2d11 100644 --- a/v3/neuron_dhal_v3.c +++ b/v3/neuron_dhal_v3.c @@ -897,7 +897,7 @@ const int trn2_32xl_neigbor_ids[16][4] = { */ static int fw_io_topology_v3(struct fw_io_ctx *ctx, int pdev_index, int device_id, u32 *connected_device_ids, int *count) { - // V3 does not have Pacific support to detect east/west/south/north neighbors, + // V3 does not have firmware support to detect east/west/south/north neighbors, // so its topology is hardcoded based on instance type. *count = 0; @@ -1099,6 +1099,7 @@ static void nsysfsmetric_get_hbm_error_count_v3(struct neuron_device *nd, { int ret; uint32_t total_uncorrected_ecc_err_count; + uint32_t total_repairable_ecc_err_count; uint32_t ecc_repair_state; *err_count = 0; @@ -1108,7 +1109,7 @@ static void nsysfsmetric_get_hbm_error_count_v3(struct neuron_device *nd, pr_err("sysfs failed to read HBM ECC repair state from FWIO\n"); return; } - total_uncorrected_ecc_err_count = fw_io_get_total_uecc_err_count(nd->npdev.bar0); + fw_io_get_total_ecc_err_counts(nd->npdev.bar0, &total_uncorrected_ecc_err_count, &total_repairable_ecc_err_count); /* * HBM Repair State Bitfield notes: @@ -1118,20 +1119,27 @@ static void nsysfsmetric_get_hbm_error_count_v3(struct neuron_device *nd, * 0x2 means repair failure */ if (total_uncorrected_ecc_err_count == 0 && ecc_repair_state != 0) { - // For legacy Pacific firmware, there might be the case that (err count > 0 && repair state == 0), so allow this case + // For legacy firmware, there might be the case that (err count > 0 && repair state == 0), so allow this case // When err count = 0, repair state must be 0x0 pr_warn_once("[ND %d] Total Uncorrected ecc err count is %d, but repair state is %d which is invalid. Please contact Neuron for support.\n", nd->device_index, total_uncorrected_ecc_err_count, ecc_repair_state); return; } - if (repairable) { - *err_count = (ecc_repair_state == 0x1) ? total_uncorrected_ecc_err_count : 0; - } else { - *err_count = (ecc_repair_state == 0x2) ? total_uncorrected_ecc_err_count : 0; - if (ecc_repair_state == 0x0) { - // legacy FW hack - TODO remove at some point - *err_count = total_uncorrected_ecc_err_count; - } + + // We did not complete the repair for some reason, in this case we expect that the error count is non-zero since the repairs have + // not gone through yet. If it is zero notify the user since this is unexpected. + if (ecc_repair_state == 0x1 && total_repairable_ecc_err_count == 0) { + pr_warn_once("[ND %d] HBM repairs were not completed, but no repairable ecc errors were reported, which is invalid. Please contact Neuron for support.\n", nd->device_index); + return; + } + + // We failed to repair ECC memory but have not encountered a UECC yet. Proactively notify the user of this since the ECC + // will be more susceptible to errors in the future. + if (ecc_repair_state == 0x2 && total_uncorrected_ecc_err_count == 0) { + pr_warn_once("[ND %d] HBM repair failed. No uncorrectable ecc errors detected, however memory will be more suseptible to corruption. Please contact Neuron for support.\n", nd->device_index); + return; } + + *err_count = (repairable) ? total_repairable_ecc_err_count : total_uncorrected_ecc_err_count; } /** @@ -1346,7 +1354,7 @@ static u32 neuron_pci_routing_id_to_user_id(u32 routing_id) } /** - * neuron_pci_get_device_id() - get device id from pacific and set nd->device_index + * neuron_pci_get_device_id() - get device id and set nd->device_index * * @param dev: PCI device * @param nd: neuron device @@ -2171,18 +2179,20 @@ int ndhal_register_funcs_v3(void) { ndhal->ndhal_reset.retry_count *= 1000; // wait longer on qemu ndhal->ndhal_reset.nr_initiate_reset = nr_initiate_reset_v3_qemu; ndhal->ndhal_reset.nr_wait_for_reset_completion = nr_wait_for_reset_completion_v3_qemu; - ndhal->ndhal_address_map.dma_eng_per_nd = V3_NC_PER_DEVICE * V3_DMA_ENG_PER_NC; + ndhal->ndhal_address_map.seng_dma_eng_per_nd = V3_NC_PER_DEVICE * V3_DMA_ENG_PER_NC; + ndhal->ndhal_address_map.h2d_dma_eng_per_nd = V3_NUM_H2D_DMA_PER_DEVICE; ndhal->ndhal_reg_access.reg_read32_array = reg_read32_array_v3_qemu_emu; ndhal->ndhal_ndma.ndma_get_wait_for_completion_time = ndma_get_wait_for_completion_time_v3_qemu; ndhal->ndhal_address_map.dice_per_device = 1; - // Disable metrics on inkling + // Disable metrics on qemu nmetric_log_posts = 0; } else if (narch_is_emu()) { ndhal->ndhal_reset.retry_count *= 1000; // wait longer on the emulator ndhal->ndhal_reset.nr_initiate_reset = nr_initiate_reset_v3_emu; ndhal->ndhal_reset.nr_wait_for_reset_completion = nr_wait_for_reset_completion_v3_emu; - ndhal->ndhal_address_map.dma_eng_per_nd = nc_per_dev_param * V3_DMA_ENG_PER_NC; + ndhal->ndhal_address_map.seng_dma_eng_per_nd = nc_per_dev_param * V3_DMA_ENG_PER_NC; + ndhal->ndhal_address_map.h2d_dma_eng_per_nd = nc_per_dev_param; ndhal->ndhal_address_map.nc_per_device = nc_per_dev_param; ndhal->ndhal_address_map.dev_nc_map = dev_nc_map; ndhal->ndhal_reg_access.reg_read32_array = reg_read32_array_v3_qemu_emu; @@ -2194,7 +2204,8 @@ int ndhal_register_funcs_v3(void) { } else { ndhal->ndhal_reset.nr_initiate_reset = nr_initiate_reset_v3; ndhal->ndhal_reset.nr_wait_for_reset_completion = nr_wait_for_reset_completion_v3; - ndhal->ndhal_address_map.dma_eng_per_nd = V3_NC_PER_DEVICE * V3_DMA_ENG_PER_NC; + ndhal->ndhal_address_map.seng_dma_eng_per_nd = V3_NC_PER_DEVICE * V3_DMA_ENG_PER_NC; + ndhal->ndhal_address_map.h2d_dma_eng_per_nd = V3_NUM_H2D_DMA_PER_DEVICE; ndhal->ndhal_reg_access.reg_read32_array = reg_read32_array_v3; } diff --git a/v3/neuron_pelect.c b/v3/neuron_pelect.c index a9d1b0d..c00a579 100644 --- a/v3/neuron_pelect.c +++ b/v3/neuron_pelect.c @@ -1203,13 +1203,6 @@ int npe_election_exec_on_rst(struct neuron_device *nd, bool reset_successful) } } - // spoof PDS topology/election data - // - if (ndhal->ndhal_arch.platform_type == NEURON_PLATFORM_TYPE_PDS) { - npe_pds_spoof(); - goto done; - } - // if we aren't kicking off election on first driver reset (testing) or // if we aren't in init state then we've already made an election decision. // @@ -1223,6 +1216,13 @@ int npe_election_exec_on_rst(struct neuron_device *nd, bool reset_successful) goto done; } + // spoof PDS topology/election data + // + if (ndhal->ndhal_arch.platform_type == NEURON_PLATFORM_TYPE_PDS) { + npe_pds_spoof(); + goto done; + } + npe_initiate_election(ndhal_pelect_data.nbr_data_read_timeout); done: @@ -1801,6 +1801,35 @@ ssize_t npe_class_ultraserver_mode_show_data(char *buf) return dhal_sysfs_emit(buf, "%s\n", output); } +/* + * Temporary hard-coded mapping table for server_id assignment. + * If we find device0's serial number in the table, this is + * part of a PDS server + * + */ +struct { + uint64_t d0_serial_number; // serial number of a particular device 0 on a particular server + uint64_t server_num; // server unique id of the associated server + uint32_t node_id; // (rack id<<1 | server id) +} npe_pds_tmp_mapping_tbl[] = { + {0x644b8499cd7bf298ull, 0x0000004005590701ull, 0}, + {0x001e8649a094af56ull, 0x0000004005590701ull, 1}, + {0x4b63b0678ae2a930ull, 0x0000004005590701ull, 2}, + {0x7242db0306415ed7ull, 0x0000004005590701ull, 3}, + {0x7e3a518befdf7a57ull, 0x0000004005590689ull, 0}, + {0x3c604484897a4f1aull, 0x0000004005590689ull, 1}, + {0xacfba8515bb626a6ull, 0x0000004005590689ull, 2}, + {0x48c2b73699e97cadull, 0x0000004005590689ull, 3}, + {0xa952ff53b45fc298ull, 0x0000004005590680ull, 0}, + {0x5961a8d75d827fc0ull, 0x0000004005590680ull, 1}, + {0x714cf1792facf83bull, 0x0000004005590680ull, 2}, + {0x9b3187e1756c8a7full, 0x0000004005590680ull, 3}, + {0x85059f2db248f3dfull, 0x0000004005590682ull, 0}, + {0xf4d2ef81ad1b1264ull, 0x0000004005590682ull, 1}, + {0x3d3ea5a61b768cbdull, 0x0000004005590682ull, 2}, + {0x85752a544054033aull, 0x0000004005590682ull, 3} +}; + /* npe_pds_spoof(void) * * temp spoof of PDS platform data @@ -1809,12 +1838,42 @@ ssize_t npe_class_ultraserver_mode_show_data(char *buf) static void npe_pds_spoof(void) { static bool initialized = false; + int ret; + int i; + struct neuron_device *nd; + uint64_t serial_number; + pr_info("spoofing pds data"); if (initialized) { return; } + // first check the mapping table to see if there's a match for preview server + // + nd = ndhal_pelect_data.pnd[0]; + if (nd == NULL) { + pr_err("internal error. Neuron device pointer should not be null at start of election"); + return; + } + + ret = fw_io_serial_number_read(nd->npdev.bar0, &serial_number); + if (ret) { + pr_err("nd%02d: local serial number read failed", nd->device_index); + return; + } + + for (i = 0; i < sizeof(npe_pds_tmp_mapping_tbl) / sizeof(*npe_pds_tmp_mapping_tbl); i++) { + if (serial_number == npe_pds_tmp_mapping_tbl[i].d0_serial_number) { + ndhal_pelect_data.node_cnt = 4; + ndhal_pelect_data.node_id = npe_pds_tmp_mapping_tbl[i].node_id; + ndhal_pelect_data.pod_serial_num = npe_pds_tmp_mapping_tbl[i].server_num; + goto done; + } + } + + // otherwise, we use temporary parameter overrides + // ndhal_pelect_data.node_cnt = userver_pds_node_cnt; if (ndhal_pelect_data.node_cnt == 0) { @@ -1832,6 +1891,8 @@ static void npe_pds_spoof(void) } ndhal_pelect_data.pod_serial_num = userver_pds_server_id; + +done: ndhal_pelect_data.pod_state_internal = NEURON_NPE_POD_ST_ELECTION_SUCCESS; initialized = true; diff --git a/v4/neuron_dhal_v4.c b/v4/neuron_dhal_v4.c index 798719b..9ed4d3c 100644 --- a/v4/neuron_dhal_v4.c +++ b/v4/neuron_dhal_v4.c @@ -151,6 +151,7 @@ static int ndhal_register_funcs_trn3(void) { */ #define NEURON_TRN3PDS_INSTANCE_NAME "trn3s.48xlarge" #define NEURON_TRN3PDS0_INSTANCE_NAME "trn3-dev0.48xlarge" +#define NEURON_TRN3P_INSTANCE_NAME "trn3p.48xlarge" static enum neuron_platform_type ndhal_platform_type_v4(void) { @@ -162,6 +163,8 @@ static enum neuron_platform_type ndhal_platform_type_v4(void) platform_type = NEURON_PLATFORM_TYPE_PDS; } else if ((strncmp(buf, NEURON_TRN3PDS0_INSTANCE_NAME, sizeof(NEURON_TRN3PDS_INSTANCE_NAME)-1) == 0)) { platform_type = NEURON_PLATFORM_TYPE_PDS; + } else if ((strncmp(buf, NEURON_TRN3P_INSTANCE_NAME, sizeof(NEURON_TRN3P_INSTANCE_NAME)-1) == 0)) { + platform_type = NEURON_PLATFORM_TYPE_ULTRASERVER; } else { platform_type = NEURON_PLATFORM_TYPE_STD; } @@ -336,7 +339,7 @@ static u32 neuron_pci_routing_id_to_user_id(u32 routing_id) } /** - * neuron_pci_get_device_id() - get device id from pacific and set nd->device_index + * neuron_pci_get_device_id() - get device id and set nd->device_index * * @param dev: PCI device * @param nd: neuron device @@ -409,6 +412,51 @@ static int neuron_pci_get_device_id_v4(struct neuron_device *nd, struct pci_dev return 0; } +#define NC_MAPPING_MAX_CORE_COUNT_V4 128 +static const struct neuron_ioctl_nc_map_entry nc_mapping_v0_seng_swap_pds[] = { + { .device_id = 0, .device_nc_idx = 4 }, { .device_id = 0, .device_nc_idx = 5 }, { .device_id = 0, .device_nc_idx = 6 }, { .device_id = 0, .device_nc_idx = 7 }, { .device_id = 0, .device_nc_idx = 2 }, { .device_id = 0, .device_nc_idx = 3 }, { .device_id = 0, .device_nc_idx = 0 }, { .device_id = 0, .device_nc_idx = 1 }, // ND0 + { .device_id = 1, .device_nc_idx = 2 }, { .device_id = 1, .device_nc_idx = 3 }, { .device_id = 1, .device_nc_idx = 0 }, { .device_id = 1, .device_nc_idx = 1 }, { .device_id = 1, .device_nc_idx = 4 }, { .device_id = 1, .device_nc_idx = 5 }, { .device_id = 1, .device_nc_idx = 6 }, { .device_id = 1, .device_nc_idx = 7 }, // ND1 + { .device_id = 2, .device_nc_idx = 4 }, { .device_id = 2, .device_nc_idx = 5 }, { .device_id = 2, .device_nc_idx = 6 }, { .device_id = 2, .device_nc_idx = 7 }, { .device_id = 2, .device_nc_idx = 2 }, { .device_id = 2, .device_nc_idx = 3 }, { .device_id = 2, .device_nc_idx = 0 }, { .device_id = 2, .device_nc_idx = 1 }, // ND2 + { .device_id = 3, .device_nc_idx = 2 }, { .device_id = 3, .device_nc_idx = 3 }, { .device_id = 3, .device_nc_idx = 0 }, { .device_id = 3, .device_nc_idx = 1 }, { .device_id = 3, .device_nc_idx = 4 }, { .device_id = 3, .device_nc_idx = 5 }, { .device_id = 3, .device_nc_idx = 6 }, { .device_id = 3, .device_nc_idx = 7 }, // ND3 + { .device_id = 4, .device_nc_idx = 4 }, { .device_id = 4, .device_nc_idx = 5 }, { .device_id = 4, .device_nc_idx = 6 }, { .device_id = 4, .device_nc_idx = 7 }, { .device_id = 4, .device_nc_idx = 2 }, { .device_id = 4, .device_nc_idx = 3 }, { .device_id = 4, .device_nc_idx = 0 }, { .device_id = 4, .device_nc_idx = 1 }, // ND4 + { .device_id = 5, .device_nc_idx = 2 }, { .device_id = 5, .device_nc_idx = 3 }, { .device_id = 5, .device_nc_idx = 0 }, { .device_id = 5, .device_nc_idx = 1 }, { .device_id = 5, .device_nc_idx = 4 }, { .device_id = 5, .device_nc_idx = 5 }, { .device_id = 5, .device_nc_idx = 6 }, { .device_id = 5, .device_nc_idx = 7 }, // ND5 + { .device_id = 6, .device_nc_idx = 4 }, { .device_id = 6, .device_nc_idx = 5 }, { .device_id = 6, .device_nc_idx = 6 }, { .device_id = 6, .device_nc_idx = 7 }, { .device_id = 6, .device_nc_idx = 2 }, { .device_id = 6, .device_nc_idx = 3 }, { .device_id = 6, .device_nc_idx = 0 }, { .device_id = 6, .device_nc_idx = 1 }, // ND6 + { .device_id = 7, .device_nc_idx = 2 }, { .device_id = 7, .device_nc_idx = 3 }, { .device_id = 7, .device_nc_idx = 0 }, { .device_id = 7, .device_nc_idx = 1 }, { .device_id = 7, .device_nc_idx = 4 }, { .device_id = 7, .device_nc_idx = 5 }, { .device_id = 7, .device_nc_idx = 6 }, { .device_id = 7, .device_nc_idx = 7 }, // ND7 + { .device_id = 8, .device_nc_idx = 4 }, { .device_id = 8, .device_nc_idx = 5 }, { .device_id = 8, .device_nc_idx = 6 }, { .device_id = 8, .device_nc_idx = 7 }, { .device_id = 8, .device_nc_idx = 2 }, { .device_id = 8, .device_nc_idx = 3 }, { .device_id = 8, .device_nc_idx = 0 }, { .device_id = 8, .device_nc_idx = 1 }, // ND8 + { .device_id = 9, .device_nc_idx = 2 }, { .device_id = 9, .device_nc_idx = 3 }, { .device_id = 9, .device_nc_idx = 0 }, { .device_id = 9, .device_nc_idx = 1 }, { .device_id = 9, .device_nc_idx = 4 }, { .device_id = 9, .device_nc_idx = 5 }, { .device_id = 9, .device_nc_idx = 6 }, { .device_id = 9, .device_nc_idx = 7 }, // ND9 + { .device_id = 10, .device_nc_idx = 4 }, { .device_id = 10, .device_nc_idx = 5 }, { .device_id = 10, .device_nc_idx = 6 }, { .device_id = 10, .device_nc_idx = 7 }, { .device_id = 10, .device_nc_idx = 2 }, { .device_id = 10, .device_nc_idx = 3 }, { .device_id = 10, .device_nc_idx = 0 }, { .device_id = 10, .device_nc_idx = 1 }, // ND10 + { .device_id = 11, .device_nc_idx = 2 }, { .device_id = 11, .device_nc_idx = 3 }, { .device_id = 11, .device_nc_idx = 0 }, { .device_id = 11, .device_nc_idx = 1 }, { .device_id = 11, .device_nc_idx = 4 }, { .device_id = 11, .device_nc_idx = 5 }, { .device_id = 11, .device_nc_idx = 6 }, { .device_id = 11, .device_nc_idx = 7 }, // ND11 + { .device_id = 12, .device_nc_idx = 4 }, { .device_id = 12, .device_nc_idx = 5 }, { .device_id = 12, .device_nc_idx = 6 }, { .device_id = 12, .device_nc_idx = 7 }, { .device_id = 12, .device_nc_idx = 2 }, { .device_id = 12, .device_nc_idx = 3 }, { .device_id = 12, .device_nc_idx = 0 }, { .device_id = 12, .device_nc_idx = 1 }, // ND12 + { .device_id = 13, .device_nc_idx = 2 }, { .device_id = 13, .device_nc_idx = 3 }, { .device_id = 13, .device_nc_idx = 0 }, { .device_id = 13, .device_nc_idx = 1 }, { .device_id = 13, .device_nc_idx = 4 }, { .device_id = 13, .device_nc_idx = 5 }, { .device_id = 13, .device_nc_idx = 6 }, { .device_id = 13, .device_nc_idx = 7 }, // ND13 + { .device_id = 14, .device_nc_idx = 4 }, { .device_id = 14, .device_nc_idx = 5 }, { .device_id = 14, .device_nc_idx = 6 }, { .device_id = 14, .device_nc_idx = 7 }, { .device_id = 14, .device_nc_idx = 2 }, { .device_id = 14, .device_nc_idx = 3 }, { .device_id = 14, .device_nc_idx = 0 }, { .device_id = 14, .device_nc_idx = 1 }, // ND14 + { .device_id = 15, .device_nc_idx = 2 }, { .device_id = 15, .device_nc_idx = 3 }, { .device_id = 15, .device_nc_idx = 0 }, { .device_id = 15, .device_nc_idx = 1 }, { .device_id = 15, .device_nc_idx = 4 }, { .device_id = 15, .device_nc_idx = 5 }, { .device_id = 15, .device_nc_idx = 6 }, { .device_id = 15, .device_nc_idx = 7 }, // ND15 +}; + +#define NC_MAPPING_V0_SENG_SWAP_SIZE (sizeof(nc_mapping_v0_seng_swap_pds) / sizeof(nc_mapping_v0_seng_swap_pds[0])) +static_assert((NC_MAPPING_V0_SENG_SWAP_SIZE == NC_MAPPING_MAX_CORE_COUNT_V4) && (NC_MAPPING_V0_SENG_SWAP_SIZE <= NEURON_NC_MAP_MAX_ENTRIES)); + +static int ncdev_logical_to_physical_nc_map_v4(struct neuron_ioctl_nc_map *map, uint32_t max_num_entries, enum neuron_ioctl_nc_mapping_type version) +{ + uint32_t entry_idx; + uint32_t entries_to_copy = (max_num_entries < NC_MAPPING_MAX_CORE_COUNT_V4) ? max_num_entries : NC_MAPPING_MAX_CORE_COUNT_V4; + const struct neuron_ioctl_nc_map_entry *mapping; + + if (version != NEURON_IOCTL_NC_MAPPING_TYPE_V0) { + pr_err("Unsupported Neuron Core Mapping verion %u for v4 arch", version); + return -EINVAL; + } + mapping = nc_mapping_v0_seng_swap_pds; + + for (entry_idx = 0; entry_idx < entries_to_copy; entry_idx++) { + uint32_t core_idx = entry_idx; + WARN_ONCE(core_idx >= NC_MAPPING_MAX_CORE_COUNT_V4, "core_idx %d > max core count %d", core_idx, NC_MAPPING_MAX_CORE_COUNT_V4); + map->mappings[entry_idx] = mapping[core_idx]; + } + map->num_entries = entries_to_copy; + + return 0; +} + /** * ndhal_register_funcs_v4() - initialize the dhal for v4 chips * @@ -432,7 +480,7 @@ int ndhal_register_funcs_v4(void) { ndhal->ndhal_cdev.ncdev_mem_regions = ncdev_mem_regions_v4; if (narch_is_emu()) { - // Temporarily disable resets on mariana emulation until pacific is ready + // Temporarily disable resets on emulation until support is ready extern int no_reset; no_reset = 1; } @@ -448,6 +496,7 @@ int ndhal_register_funcs_v4(void) { } } else if (ndhal->ndhal_arch.platform_type == NEURON_PLATFORM_TYPE_PDS) { //TODO PDS + ndhal->ndhal_cdev.ncdev_logical_to_physical_nc_map = ncdev_logical_to_physical_nc_map_v4; } switch (ndhal->pci_device_id) {