Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion hw/ip/spatz/src/spatz.sv
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ module spatz import spatz_pkg::*; import rvv_pkg::*; import fpnew_pkg::*; #(
logic [NrReadPorts-1:0] vrf_re;
vrf_data_t [NrReadPorts-1:0] vrf_rdata;
logic [NrReadPorts-1:0] vrf_rvalid;
logic [NrWritePorts-1:0] vrf_wvalid_vlsu;

spatz_vrf #(
.NrReadPorts (NrReadPorts ),
Expand All @@ -229,6 +230,8 @@ module spatz import spatz_pkg::*; import rvv_pkg::*; import fpnew_pkg::*; #(
.we_i (vrf_we ),
.wbe_i (vrf_wbe_buf ),
.wvalid_o (vrf_wvalid ),
.wvalid_vlsu_o (vrf_wvalid_vlsu),

`ifdef BUF_FPU
.fpu_buf_usage_i (vfu_buf_usage ),
`endif
Expand Down Expand Up @@ -424,7 +427,8 @@ module spatz import spatz_pkg::*; import rvv_pkg::*; import fpnew_pkg::*; #(
.vrf_wdata_o (vrf_wdata[VLSU_VD_WD] ),
.vrf_we_o (sb_we[VLSU_VD_WD] ),
.vrf_wbe_o (vrf_wbe[VLSU_VD_WD] ),
.vrf_wvalid_i (vrf_wvalid[VLSU_VD_WD] ),
// .vrf_wvalid_i (vrf_wvalid[VLSU_VD_WD] ),
.vrf_wvalid_i (vrf_wvalid_vlsu[VLSU_VD_WD] ),//vrf_wvalid[1]
.vrf_raddr_o (vrf_raddr[VLSU_VD_RD:VLSU_VS2_RD] ),
.vrf_re_o (sb_re[VLSU_VD_RD:VLSU_VS2_RD] ),
.vrf_rdata_i (vrf_rdata[VLSU_VD_RD:VLSU_VS2_RD] ),
Expand Down
33 changes: 30 additions & 3 deletions hw/ip/spatz/src/spatz_vrf.sv
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ module spatz_vrf
input logic [NrWritePorts-1:0] we_i,
input vrf_be_t [NrWritePorts-1:0] wbe_i,
output logic [NrWritePorts-1:0] wvalid_o,
output logic [NrWritePorts-1:0] wvalid_vlsu_o,
output logic [NrWritePorts-1:0] wbe_o,
`ifdef BUF_FPU
// Signal to track if result can be buffered or not
input logic [$clog2(FpuBufDepth)-1:0] fpu_buf_usage_i,
Expand Down Expand Up @@ -90,13 +92,32 @@ module spatz_vrf
end
end: gen_write_request


vrf_be_t [NrVRFBanks-1:0] wbe_d,wbe_q;

always_ff @(posedge clk_i or negedge rst_ni) begin
if(~rst_ni) begin
wbe_q <= '0;
end else begin
for (int bank = 0; bank < NrVRFBanks; bank++) begin
if(!write_request[bank][VLSU_VD_WD])begin
wbe_q[bank] <= '0;
end else begin
wbe_q[bank] <= wbe_d[bank];
end
end
end
end


always_comb begin : proc_write
waddr = '0;
wdata = '0;
we = '0;
wbe = '0;
wvalid_o = '0;

wvalid_vlsu_o = '0;
wbe_d = wbe_q;
// For each bank, we have a priority based access scheme. First priority always has the VFU,
// second priority has the LSU, and third priority has the slide unit.
for (int unsigned bank = 0; bank < NrVRFBanks; bank++) begin
Expand All @@ -123,7 +144,10 @@ module spatz_vrf
wdata[bank] = wdata_i[VFU_VD_WD];
we[bank] = 1'b1;
wbe[bank] = wbe_i[VFU_VD_WD];
wvalid_o[VFU_VD_WD] = 1'b1;
// wvalid_o[VFU_VD_WD] = 1'b1;
wbe_d[bank] = wbe_q[bank]|wbe_i[VLSU_VD_WD];
wvalid_o[VLSU_VD_WD] = &(wbe_q[bank]|wbe_i[VLSU_VD_WD]);//1
wvalid_vlsu_o[VLSU_VD_WD] = 1'b1;//1
end else if (write_request[bank][VSLDU_VD_WD]) begin
waddr[bank] = f_vreg(waddr_i[VSLDU_VD_WD]);
wdata[bank] = wdata_i[VSLDU_VD_WD];
Expand All @@ -144,7 +168,10 @@ module spatz_vrf
wdata[bank] = wdata_i[VLSU_VD_WD];
we[bank] = 1'b1;
wbe[bank] = wbe_i[VLSU_VD_WD];
wvalid_o[VLSU_VD_WD] = 1'b1;
// wvalid_o[VLSU_VD_WD] = 1'b1;
wbe_d[bank] = wbe_q[bank]|wbe_i[VLSU_VD_WD];
wvalid_o[VLSU_VD_WD] = &(wbe_q[bank]|wbe_i[VLSU_VD_WD]);//1
wvalid_vlsu_o[VLSU_VD_WD] = 1'b1;//1
end else if (write_request[bank][VSLDU_VD_WD]) begin
waddr[bank] = f_vreg(waddr_i[VSLDU_VD_WD]);
wdata[bank] = wdata_i[VSLDU_VD_WD];
Expand Down
3 changes: 3 additions & 0 deletions sw/riscvTests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,6 @@ add_snitch_test(vfcvt isa/rv64uv/vfcvt.c)
add_snitch_test(vfncvt isa/rv64uv/vfncvt.c)

add_snitch_test(vfmv isa/rv64uv/vfmv.c)

add_snitch_test(vls isa/rv64uv/vls.c)
add_snitch_test(vss isa/rv64uv/vss.c)
190 changes: 190 additions & 0 deletions sw/riscvTests/isa/rv64uv/vls.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
// Copyright 2021 ETH Zurich and University of Bologna.
// Solderpad Hardware License, Version 0.51, see LICENSE for details.
// SPDX-License-Identifier: SHL-0.51
//
// Author: Matteo Perotti <mperotti@iis.ee.ethz.ch>

#include "vector_macros.h"

// Positive-stride tests
void TEST_CASE1(void) {
VSET(4, e8, m1);
volatile uint8_t INP1[] = {0x9f, 0xe4, 0x19, 0x20, 0x8f, 0x2e, 0x05, 0xe0,
0xf9, 0xaa, 0x71, 0xf0, 0xc3, 0x94, 0xbb, 0xd3};
uint64_t stride = 3;
asm volatile("vlse8.v v1, (%0), %1" ::"r"(INP1), "r"(stride));
VCMP_U8(1, v1, 0x9f, 0x20, 0x05, 0xaa);
}

void TEST_CASE2(void) {
VSET(4, e16, m1);
volatile uint16_t INP1[] = {0x9fe4, 0x1920, 0x8f2e, 0x05e0,
0xf9aa, 0x71f0, 0xc394, 0xbbd3};
uint64_t stride = 4;
asm volatile("vlse16.v v1, (%0), %1" ::"r"(INP1), "r"(stride));
VCMP_U16(2, v1, 0x9fe4, 0x8f2e, 0xf9aa, 0xc394);
}

void TEST_CASE3(void) {
VSET(4, e32, m1);
volatile uint32_t INP1[] = {0x9fe41920, 0x8f2e05e0, 0xf9aa71f0, 0xc394bbd3,
0xa11a9384, 0xa7163840, 0x99991348, 0xa9f38cd1};
uint64_t stride = 8;
asm volatile("vlse32.v v1, (%0), %1" ::"r"(INP1), "r"(stride));
VCMP_U32(3, v1, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348);
}

void TEST_CASE4(void) {
VSET(4, e64, m1);
volatile uint64_t INP1[] = {0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3,
0xa11a9384a7163840, 0x99991348a9f38cd1};
uint64_t stride = 8;
asm volatile("vlse64.v v1, (%0), %1" ::"r"(INP1), "r"(stride));
VCMP_U64(4, v1, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840,
0x99991348a9f38cd1);
}

// Zero-stride tests
// The implementation must perform all the memory accesses
void TEST_CASE5(void) {
VSET(16, e8, m1);
volatile uint8_t INP1[] = {0x9f};
uint64_t stride = 0;
asm volatile("vlse8.v v1, (%0), %1" ::"r"(INP1), "r"(stride));
VCMP_U8(5, v1, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f,
0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f);
}

// The implementation can also perform fewer accesses
void TEST_CASE6(void) {
VSET(16, e8, m1);
volatile uint8_t INP1[] = {0x9f};
asm volatile("vlse8.v v1, (%0), x0" ::"r"(INP1));
VCMP_U8(6, v1, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f,
0x9f, 0x9f, 0x9f, 0x9f, 0x9f, 0x9f);
}

// Different LMUL
void TEST_CASE7(void) {
VSET(8, e64, m2);
volatile uint64_t INP1[] = {0x9fa831c7a11a9384};
asm volatile("vlse64.v v2, (%0), x0" ::"r"(INP1));
VCMP_U64(7, v2, 0x9fa831c7a11a9384, 0x9fa831c7a11a9384, 0x9fa831c7a11a9384,
0x9fa831c7a11a9384, 0x9fa831c7a11a9384, 0x9fa831c7a11a9384,
0x9fa831c7a11a9384, 0x9fa831c7a11a9384);
}

// Others
// Negative-stride test
void TEST_CASE8(void) {
VSET(4, e16, m1);
volatile uint16_t INP1[] = {0x9fe4, 0x1920, 0x8f2e, 0x05e0,
0xf9aa, 0x71f0, 0xc394, 0xbbd3};
uint64_t stride = -4;
asm volatile("vlse16.v v1, (%0), %1" ::"r"(&INP1[7]), "r"(stride));
VCMP_U16(8, v1, 0xbbd3, 0x71f0, 0x05e0, 0x1920);
}

// Stride greater than default Ara AXI width == 128-bit (4 lanes)
void TEST_CASE9(void) {
VSET(2, e64, m1);
volatile uint64_t INP1[] = {0x99991348a9f38cd1, 0x9fa831c7a11a9384,
0x9fa831c7a11a9384, 0x9fa831c7a11a9384,
0x9fa831c7a11a9384, 0x01015ac1309bb678};
uint64_t stride = 40;
asm volatile("vlse64.v v1, (%0), %1" ::"r"(INP1), "r"(stride));
VCMP_U64(9, v1, 0x99991348a9f38cd1, 0x01015ac1309bb678);
}

// Fill Ara internal Load Buffer
void TEST_CASE10(void) {
VSET(8, e64, m1);
volatile uint64_t INP1[] = {
0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840,
0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548,
0x1893179501093489, 0x81937598aa819388, 0x1874754791888188,
0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891,
0x9031850931584902, 0x3189759837598759, 0x8319599991911111,
0x8913984898951989};
uint64_t stride = 16;
asm volatile("vlse64.v v1, (%0), %1" ::"r"(INP1), "r"(stride));
VCMP_U64(10, v1, 0x9fe419208f2e05e0, 0xa11a9384a7163840, 0x9fa831c7a11a9384,
0x1893179501093489, 0x1874754791888188, 0x9013930148815808,
0x9031850931584902, 0x8319599991911111);
}

// Masked stride loads
void TEST_CASE11(void) {
VSET(4, e8, m1);
volatile uint8_t INP1[] = {0x9f, 0xe4, 0x19, 0x20, 0x8f, 0x2e, 0x05, 0xe0,
0xf9, 0xaa, 0x71, 0xf0, 0xc3, 0x94, 0xbb, 0xd3};
uint64_t stride = 3;
VLOAD_8(v0, 0xAA);
VCLEAR(v1);
asm volatile("vlse8.v v1, (%0), %1, v0.t" ::"r"(INP1), "r"(stride));
VCMP_U8(11, v1, 0x00, 0x20, 0x00, 0xaa);
}

void TEST_CASE12(void) {
VSET(4, e16, m1);
volatile uint16_t INP1[] = {0x9fe4, 0x1920, 0x8f2e, 0x05e0,
0xf9aa, 0x71f0, 0xc394, 0xbbd3};
uint64_t stride = 4;
VLOAD_8(v0, 0xAA);
VCLEAR(v1);
asm volatile("vlse16.v v1, (%0), %1, v0.t" ::"r"(INP1), "r"(stride));
VCMP_U16(12, v1, 0, 0x8f2e, 0, 0xc394);
}

void TEST_CASE13(void) {
VSET(4, e32, m1);
volatile uint32_t INP1[] = {0x9fe41920, 0x8f2e05e0, 0xf9aa71f0, 0xc394bbd3,
0xa11a9384, 0xa7163840, 0x99991348, 0xa9f38cd1};
uint64_t stride = 8;
VLOAD_8(v0, 0xAA);
VCLEAR(v1);
asm volatile("vlse32.v v1, (%0), %1, v0.t" ::"r"(INP1), "r"(stride));
VCMP_U32(13, v1, 0, 0xf9aa71f0, 0, 0x99991348);
}

void TEST_CASE14(void) {
VSET(8, e64, m1);
volatile uint64_t INP1[] = {
0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840,
0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548,
0x1893179501093489, 0x81937598aa819388, 0x1874754791888188,
0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891,
0x9031850931584902, 0x3189759837598759, 0x8319599991911111,
0x8913984898951989};
uint64_t stride = 16;
VLOAD_8(v0, 0xAA);
VCLEAR(v1);
asm volatile("vlse64.v v1, (%0), %1, v0.t" ::"r"(INP1), "r"(stride));
VCMP_U64(14, v1, 0, 0xa11a9384a7163840, 0, 0x1893179501093489, 0,
0x9013930148815808, 0, 0x8319599991911111);
}

int main(void) {
INIT_CHECK();
enable_vec();

TEST_CASE1();
TEST_CASE2();
TEST_CASE3();
TEST_CASE4();

TEST_CASE5();
TEST_CASE6();
TEST_CASE7();

TEST_CASE8();
TEST_CASE9();
TEST_CASE10();

// TEST_CASE11();
// TEST_CASE12();
// TEST_CASE13();
// TEST_CASE14();

EXIT_CHECK();
}
Loading
Loading