From 6a0ca2d5029034561a75025287cffe4560238d69 Mon Sep 17 00:00:00 2001
From: Ahmed Charles <me@ahmedcharles.com>
Date: Thu, 9 Jan 2025 11:03:26 +0000
Subject: [PATCH] Add XLEN parameter.

---
 .gitignore           |  2 ++
 cpu/core.v           | 10 +++++-----
 cpu/cpu.v            |  6 +++---
 cpu/decode.v         | 38 +++++++++++++++++++-------------------
 cpu/register.v       | 18 +++++++++---------
 cpu/tinyqv.v         |  4 ++--
 iceFUN/iceFUN.v      |  2 +-
 pico_ice/pico_ice.v  |  2 +-
 test/Makefile        | 14 ++++++++++++--
 test/tb_core.v       |  6 +++---
 test/tb_counter.v    |  4 ++--
 test/tb_cpu.v        |  2 +-
 test/tb_decode.v     |  8 ++++----
 test/tb_qspi_flash.v |  4 ++--
 test/tb_register.v   |  5 +++--
 test/test_decode.mk  |  4 ++--
 test/test_decode.py  | 11 +++++++++--
 17 files changed, 80 insertions(+), 60 deletions(-)

diff --git a/.gitignore b/.gitignore
index e755647..3ac59b0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,8 @@ verify/sby/
 test/__pycache__/
 test/*.vcd
 test/*results.xml
+test/*results-32.xml
+test/*results-64.xml
 stats.md
 test/sim_build/
 
diff --git a/cpu/core.v b/cpu/core.v
index 205c10f..0c5eb14 100644
--- a/cpu/core.v
+++ b/cpu/core.v
@@ -3,7 +3,7 @@
    This core module takes decoded instructions and produces output data
  */
 
-module tinyqv_core #(parameter NUM_REGS=16, parameter REG_ADDR_BITS=4) (
+module tinyqv_core #(parameter XLEN=32, parameter NUM_REGS=16, parameter REG_ADDR_BITS=4) (
     input clk,
     input rstn,
 
@@ -85,9 +85,9 @@ module tinyqv_core #(parameter NUM_REGS=16, parameter REG_ADDR_BITS=4) (
     reg [3:0] data_rd;
     reg wr_en;
 
-    reg [31:0] tmp_data;
+    reg [XLEN-1:0] tmp_data;
 
-    tinyqv_registers #(.REG_ADDR_BITS(REG_ADDR_BITS), .NUM_REGS(NUM_REGS)) 
+    tinyqv_registers #(.XLEN(XLEN), .NUM_REGS(NUM_REGS), .REG_ADDR_BITS(REG_ADDR_BITS))
         i_registers(clk, rstn, wr_en, counter, rs1, rs2, rd, data_rs1, data_rs2, data_rd, return_addr);
 
 
@@ -258,7 +258,7 @@ module tinyqv_core #(parameter NUM_REGS=16, parameter REG_ADDR_BITS=4) (
 
     always @(posedge clk) begin
         if (tmp_data_shift)
-            tmp_data <= {tmp_data_in, tmp_data[31:4]};
+            tmp_data <= {tmp_data_in, tmp_data[XLEN-1:4]};
     end
 
     assign addr_out = is_mret ? {4'b0000, mepc} : tmp_data[31:4];
@@ -299,7 +299,7 @@ module tinyqv_core #(parameter NUM_REGS=16, parameter REG_ADDR_BITS=4) (
         instr_retired <= instr_complete && !is_stall;
     end
     /* verilator lint_off PINMISSING */  // No carry
-    tinyqv_counter i_instrret (
+    tinyqv_counter #(.OUTPUT_WIDTH(4)) i_instrret (
         .clk(clk),
         .rstn(rstn),
         .add(instr_retired),
diff --git a/cpu/cpu.v b/cpu/cpu.v
index 663a731..ce306f0 100644
--- a/cpu/cpu.v
+++ b/cpu/cpu.v
@@ -3,7 +3,7 @@
    This CPU module interfaces with memory, the instruction decoder and the core.
  */
 
-module tinyqv_cpu #(parameter NUM_REGS=16, parameter REG_ADDR_BITS=4) (
+module tinyqv_cpu #(parameter XLEN=32, parameter NUM_REGS=16, parameter REG_ADDR_BITS=4) (
     input clk,
     input rstn,
 
@@ -69,7 +69,7 @@ module tinyqv_cpu #(parameter NUM_REGS=16, parameter REG_ADDR_BITS=4) (
     wire [2:0] additional_mem_ops_de;
     wire mem_op_increment_reg_de;
 
-    tinyqv_decoder i_decoder(
+    tinyqv_decoder #(.XLEN(XLEN), .REG_ADDR_BITS(REG_ADDR_BITS)) i_decoder(
         .instr(instr),
         .imm(imm_de),
 
@@ -302,7 +302,7 @@ module tinyqv_cpu #(parameter NUM_REGS=16, parameter REG_ADDR_BITS=4) (
             was_early_branch <= early_branch && !branch;
     end
 
-    tinyqv_core #(.REG_ADDR_BITS(REG_ADDR_BITS), .NUM_REGS(NUM_REGS))  i_core(
+    tinyqv_core #(.XLEN(XLEN), .NUM_REGS(NUM_REGS), .REG_ADDR_BITS(REG_ADDR_BITS)) i_core(
         .clk(clk),
         .rstn(rstn),
         
diff --git a/cpu/decode.v b/cpu/decode.v
index 912b10d..5b182b8 100644
--- a/cpu/decode.v
+++ b/cpu/decode.v
@@ -3,10 +3,10 @@
     Note parts of this are from the excellent FemtoRV by Bruno Levy et al.
 */
 
-module tinyqv_decoder #(parameter REG_ADDR_BITS=4) (
+module tinyqv_decoder #(parameter XLEN=32, parameter REG_ADDR_BITS=4) (
     input [31:0] instr,
 
-    output reg [31:0] imm,
+    output reg [XLEN-1:0] imm,
 
     output reg is_load,
     output reg is_alu_imm,
@@ -34,25 +34,25 @@ module tinyqv_decoder #(parameter REG_ADDR_BITS=4) (
     output reg       mem_op_increment_reg
 );
 
-    wire [31:0] Uimm = {    instr[31],   instr[30:12], {12{1'b0}}};
-    wire [31:0] Iimm = {{21{instr[31]}}, instr[30:20]};
-    wire [31:0] Simm = {{21{instr[31]}}, instr[30:25],instr[11:7]};
-    wire [31:0] Bimm = {{20{instr[31]}}, instr[7],instr[30:25],instr[11:8],1'b0};
-    wire [31:0] Jimm = {{12{instr[31]}}, instr[19:12],instr[20],instr[30:21],1'b0};
+    wire [XLEN-1:0] Uimm = {{XLEN-31{instr[31]}}, instr[30:12], {12{1'b0}}};
+    wire [XLEN-1:0] Iimm = {{XLEN-11{instr[31]}}, instr[30:20]};
+    wire [XLEN-1:0] Simm = {{XLEN-11{instr[31]}}, instr[30:25],instr[11:7]};
+    wire [XLEN-1:0] Bimm = {{XLEN-12{instr[31]}}, instr[7],instr[30:25],instr[11:8],1'b0};
+    wire [XLEN-1:0] Jimm = {{XLEN-20{instr[31]}}, instr[19:12],instr[20],instr[30:21],1'b0};
 
     // Compressed immediates
-    wire [31:0] CLWSPimm     = {24'b0, instr[3:2], instr[12], instr[6:4], 2'b00};
-    wire [31:0] CSWSPimm     = {24'b0, instr[8:7], instr[12:9], 2'b00};
-    wire [31:0] CLSWimm      = {25'b0, instr[5], instr[12:10], instr[6], 2'b00};  // LW and SW
-    wire [31:0] CLSHimm      = {30'b0, instr[5], 1'b0};  // LH(U) and SH
-    wire [31:0] CLSBimm      = {30'b0, instr[5], instr[6]};  // LBU and SB
-    wire [31:0] CJimm        = {{21{instr[12]}}, instr[8], instr[10:9], instr[6], instr[7], instr[2], instr[11], instr[5:3], 1'b0};
-    wire [31:0] CBimm        = {{24{instr[12]}}, instr[6:5], instr[2], instr[11:10], instr[4:3], 1'b0};
-    wire [31:0] CALUimm      = {{27{instr[12]}}, instr[6:2]};          // ADDI, LI, shifts, ANDI
-    wire [31:0] CLUIimm      = {{15{instr[12]}}, instr[6:2], 12'b0};
-    wire [31:0] CADDI16SPimm = {{23{instr[12]}}, instr[4:3], instr[5], instr[2], instr[6], 4'b0};
-    wire [31:0] CADDI4SPimm  = {22'b0, instr[10:7], instr[12:11], instr[5], instr[6], 2'b0};
-    wire [31:0] CSCXTimm     = {{23{instr[12]}}, instr[9:7], instr[10], instr[11], 4'b0};
+    wire [XLEN-1:0] CLWSPimm     = {{XLEN-8{1'b0}}, instr[3:2], instr[12], instr[6:4], 2'b00};
+    wire [XLEN-1:0] CSWSPimm     = {{XLEN-8{1'b0}}, instr[8:7], instr[12:9], 2'b00};
+    wire [XLEN-1:0] CLSWimm      = {{XLEN-7{1'b0}}, instr[5], instr[12:10], instr[6], 2'b00};  // LW and SW
+    wire [XLEN-1:0] CLSHimm      = {{XLEN-2{1'b0}}, instr[5], 1'b0};  // LH(U) and SH
+    wire [XLEN-1:0] CLSBimm      = {{XLEN-2{1'b0}}, instr[5], instr[6]};  // LBU and SB
+    wire [XLEN-1:0] CJimm        = {{XLEN-11{instr[12]}}, instr[8], instr[10:9], instr[6], instr[7], instr[2], instr[11], instr[5:3], 1'b0};
+    wire [XLEN-1:0] CBimm        = {{XLEN-8{instr[12]}}, instr[6:5], instr[2], instr[11:10], instr[4:3], 1'b0};
+    wire [XLEN-1:0] CALUimm      = {{XLEN-5{instr[12]}}, instr[6:2]};          // ADDI, LI, shifts, ANDI
+    wire [XLEN-1:0] CLUIimm      = {{XLEN-17{instr[12]}}, instr[6:2], 12'b0};
+    wire [XLEN-1:0] CADDI16SPimm = {{XLEN-9{instr[12]}}, instr[4:3], instr[5], instr[2], instr[6], 4'b0};
+    wire [XLEN-1:0] CADDI4SPimm  = {{XLEN-10{1'b0}}, instr[10:7], instr[12:11], instr[5], instr[6], 2'b0};
+    wire [XLEN-1:0] CSCXTimm     = {{XLEN-9{instr[12]}}, instr[9:7], instr[10], instr[11], 4'b0};
 
     always @(*) begin
         additional_mem_ops = 3'b000;
diff --git a/cpu/register.v b/cpu/register.v
index 2eb1a90..0da16aa 100644
--- a/cpu/register.v
+++ b/cpu/register.v
@@ -7,7 +7,7 @@
     The read bit address is one ahead of write bit address, and both increment every clock.
  */
 
-module tinyqv_registers #(parameter NUM_REGS=16, parameter REG_ADDR_BITS=4) (
+module tinyqv_registers #(parameter XLEN=32, parameter NUM_REGS=16, parameter REG_ADDR_BITS=4) (
     input clk,
     input rstn,
 
@@ -26,7 +26,7 @@ module tinyqv_registers #(parameter NUM_REGS=16, parameter REG_ADDR_BITS=4) (
     output [23:1] return_addr
 );
 
-    reg [31:0] registers [1:NUM_REGS-1];
+    reg [XLEN-1:0] registers [1:NUM_REGS-1];
     wire [3:0] reg_access [0:2**REG_ADDR_BITS-1];
 
     genvar i;
@@ -46,22 +46,22 @@ module tinyqv_registers #(parameter NUM_REGS=16, parameter REG_ADDR_BITS=4) (
                         registers[i][3:0] <= registers[i][7:4];
                 end
 
-                wire [31:4] reg_buf;
+                wire [XLEN-1:4] reg_buf;
                 `ifdef SIM
                 /* verilator lint_off ASSIGNDLY */
-                buf #1 i_regbuf[31:4] (reg_buf, {registers[i][3:0], registers[i][31:8]});
+                buf #1 i_regbuf[XLEN-1:4] (reg_buf, {registers[i][3:0], registers[i][XLEN-1:8]});
                 /* verilator lint_on ASSIGNDLY */
                 `elsif ICE40
-                assign reg_buf = {registers[i][3:0], registers[i][31:8]};
+                assign reg_buf = {registers[i][3:0], registers[i][XLEN-1:8]};
                 `elsif SCL_sky130_fd_sc_hd
                 /* verilator lint_off PINMISSING */
-                sky130_fd_sc_hd__dlygate4sd3_1 i_regbuf[31:4] ( .X(reg_buf), .A({registers[i][3:0], registers[i][31:8]}) );
+                sky130_fd_sc_hd__dlygate4sd3_1 i_regbuf[XLEN-1:4] ( .X(reg_buf), .A({registers[i][3:0], registers[i][XLEN-1:8]}) );
                 /* verilator lint_on PINMISSING */
                 `else
                 // On SG13G2 no buffer is required, use direct assignment
-                assign reg_buf = {registers[i][3:0], registers[i][31:8]};
+                assign reg_buf = {registers[i][3:0], registers[i][XLEN-1:8]};
                 `endif
-                always @(posedge clk) registers[i][31:4] <= reg_buf;
+                always @(posedge clk) registers[i][XLEN-1:4] <= reg_buf;
 
                 assign reg_access[i] = registers[i][7:4];
             end
@@ -71,7 +71,7 @@ module tinyqv_registers #(parameter NUM_REGS=16, parameter REG_ADDR_BITS=4) (
     assign data_rs1 = reg_access[rs1];
     assign data_rs2 = reg_access[rs2];
 
-    assign return_addr = registers[1][31:9];
+    assign return_addr = registers[1][XLEN-1:9];
 
     wire _unused = &{rstn, 1'b0};
 
diff --git a/cpu/tinyqv.v b/cpu/tinyqv.v
index 4e247c6..9067014 100644
--- a/cpu/tinyqv.v
+++ b/cpu/tinyqv.v
@@ -6,7 +6,7 @@
 `default_nettype none
 
 // TinyQV CPU and QSPI memory controller wrapper
-module tinyQV (
+module tinyQV #(parameter XLEN=32, parameter NUM_REGS=16, parameter REG_ADDR_BITS=4) (
     input clk,
     input rstn,
 
@@ -91,7 +91,7 @@ module tinyQV (
   reg rst_reg_n;
   always @(posedge clk) rst_reg_n <= rstn;
 
-  tinyqv_cpu cpu(
+  tinyqv_cpu #(.XLEN(XLEN), .NUM_REGS(NUM_REGS), .REG_ADDR_BITS(REG_ADDR_BITS)) cpu(
         .clk(clk),
         .rstn(rst_reg_n),
 
diff --git a/iceFUN/iceFUN.v b/iceFUN/iceFUN.v
index 9a4e902..f43f745 100644
--- a/iceFUN/iceFUN.v
+++ b/iceFUN/iceFUN.v
@@ -87,7 +87,7 @@ module tinyQV_top (
     wire        data_ready;
     reg [31:0] data_from_read;
 
-    tinyQV i_tinyqv(
+    tinyQV #(.XLEN(32), .NUM_REGS(16), .REG_ADDR_BITS(4)) i_tinyqv(
         .clk(clk),
         .rstn(rst_reg_n),
 
diff --git a/pico_ice/pico_ice.v b/pico_ice/pico_ice.v
index 385b99b..892414e 100644
--- a/pico_ice/pico_ice.v
+++ b/pico_ice/pico_ice.v
@@ -86,7 +86,7 @@ module tinyQV_top (
     wire       debug_stop_txn;
     wire [3:0] debug_rd;
 
-    tinyQV i_tinyqv(
+    tinyQV #(.XLEN(32), .NUM_REGS(16), .REG_ADDR_BITS(4)) i_tinyqv(
         .clk(clk),
         .rstn(rst_reg_n),
 
diff --git a/test/Makefile b/test/Makefile
index 1381cb9..aca2941 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -5,7 +5,17 @@
 	make -f test_$*.mk
 	mv results.xml $@
 
-all: clean alu-results.xml core-results.xml counter-results.xml cpu-results.xml decode-results.xml mem_ctrl-results.xml qspi_ctrl-results.xml register-results.xml
+%-results-32.xml:
+	make XLEN=32 -f test_$*.mk clean
+	make XLEN=32 -f test_$*.mk
+	mv results.xml $@
+
+%-results-64.xml:
+	make XLEN=64 -f test_$*.mk clean
+	make XLEN=64 -f test_$*.mk
+	mv results.xml $@
+
+all: clean alu-results.xml core-results.xml counter-results.xml cpu-results.xml decode-results-32.xml decode-results-64.xml mem_ctrl-results.xml qspi_ctrl-results.xml register-results.xml
 
 clean:
-	rm *results.xml || true
\ No newline at end of file
+	rm *results.xml *results-32.xml *results-64.xml || true
diff --git a/test/tb_core.v b/test/tb_core.v
index 1f1a9e1..f4bf0f5 100644
--- a/test/tb_core.v
+++ b/test/tb_core.v
@@ -58,7 +58,7 @@ end
     wire [2:0] additional_mem_ops;
     wire mem_op_increment_reg;
 
-    tinyqv_decoder decoder(instr, 
+    tinyqv_decoder #(.XLEN(32), .REG_ADDR_BITS(4)) decoder(instr,
         imm,
 
         is_load,
@@ -108,7 +108,7 @@ end
     wire debug_reg_wen;
     wire [3:0] debug_rd;
 
-    tinyqv_core core(clk,
+    tinyqv_core #(.XLEN(32), .NUM_REGS(16), .REG_ADDR_BITS(4)) core(clk,
         rstn,
         
         imm[counter+:4],
@@ -155,4 +155,4 @@ end
         debug_rd
         );
 
-endmodule
\ No newline at end of file
+endmodule
diff --git a/test/tb_counter.v b/test/tb_counter.v
index b52768d..f5d5294 100644
--- a/test/tb_counter.v
+++ b/test/tb_counter.v
@@ -31,11 +31,11 @@ end
         end
 
     wire [3:0] data;
-    tinyqv_counter i_mcount(clk, rstn, add, last_counter[4:2], data, cy_out);
+    tinyqv_counter #(.OUTPUT_WIDTH(4)) i_mcount(clk, rstn, add, last_counter[4:2], data, cy_out);
 
     always @(posedge clk) begin
         val[last_counter+:4] <= data;
         if (last_counter[4:2] == 3'b111) cy <= cy_out;
     end
 
-endmodule
\ No newline at end of file
+endmodule
diff --git a/test/tb_cpu.v b/test/tb_cpu.v
index f6b16f0..6af6b34 100644
--- a/test/tb_cpu.v
+++ b/test/tb_cpu.v
@@ -49,7 +49,7 @@ end
     wire        debug_counter_0;
     wire [3:0] debug_rd;
 
-    tinyqv_cpu cpu(
+    tinyqv_cpu #(.XLEN(32), .NUM_REGS(16), .REG_ADDR_BITS(4)) cpu(
         clk,
         rstn,
 
diff --git a/test/tb_decode.v b/test/tb_decode.v
index cec4a06..ac87227 100644
--- a/test/tb_decode.v
+++ b/test/tb_decode.v
@@ -3,13 +3,13 @@
    Aim is to support RV32E 
  */
 
-module tb_decode (
+module tb_decode #(parameter XLEN=32) (
     input clk,
     input rstn,
 
     input [31:0] instr,
 
-    output [31:0] imm,
+    output [XLEN-1:0] imm,
 
     output is_load,
     output is_alu_imm,
@@ -44,7 +44,7 @@ initial begin
 end
 `endif
 
-    tinyqv_decoder decoder(instr, 
+    tinyqv_decoder #(.XLEN(XLEN), .REG_ADDR_BITS(4)) decoder(instr,
         imm,
 
         is_load,
@@ -74,4 +74,4 @@ end
 
     assign instr_len[0] = 1'b0;
 
-endmodule
\ No newline at end of file
+endmodule
diff --git a/test/tb_qspi_flash.v b/test/tb_qspi_flash.v
index 799ed2c..2ecd4c7 100644
--- a/test/tb_qspi_flash.v
+++ b/test/tb_qspi_flash.v
@@ -33,7 +33,7 @@ initial begin
 end
 `endif
 
-    qspi_flash_controller i_flash(
+    qspi_flash_controller #(.DATA_WIDTH_BYTES(2), .ADDR_BITS(24)) i_flash(
         clk,
         rstn,
 
@@ -53,4 +53,4 @@ end
         busy
     );
 
-endmodule
\ No newline at end of file
+endmodule
diff --git a/test/tb_register.v b/test/tb_register.v
index ff9b1f4..ef47784 100644
--- a/test/tb_register.v
+++ b/test/tb_register.v
@@ -38,11 +38,12 @@ end
     wire [3:0] data_rs1;
     wire [3:0] data_rs2;
     wire [23:1] return_addr;
-    tinyqv_registers registers(clk, rstn, wr_en, last_counter[4:2], rs1, rs2, rd, data_rs1, data_rs2, rd_in[last_counter+:4], return_addr);
+    tinyqv_registers #(.XLEN(32), .NUM_REGS(16), .REG_ADDR_BITS(4))
+        registers(clk, rstn, wr_en, last_counter[4:2], rs1, rs2, rd, data_rs1, data_rs2, rd_in[last_counter+:4], return_addr);
 
     always @(posedge clk) begin
         rs1_out[last_counter+:4] <= data_rs1;
         rs2_out[last_counter+:4] <= data_rs2;
     end
 
-endmodule
\ No newline at end of file
+endmodule
diff --git a/test/test_decode.mk b/test/test_decode.mk
index e4c5e37..ca2e4c0 100644
--- a/test/test_decode.mk
+++ b/test/test_decode.mk
@@ -7,7 +7,7 @@ TOPLEVEL_LANG ?= verilog
 
 CPUD = $(PWD)/../cpu
 VERILOG_SOURCES += $(CPUD)/decode.v $(PWD)/tb_decode.v
-COMPILE_ARGS    += -DSIM
+COMPILE_ARGS    += -DSIM -Ptb_decode.XLEN=$(XLEN)
 
 # TOPLEVEL is the name of the toplevel module in your Verilog or VHDL file
 TOPLEVEL = tb_decode
@@ -16,4 +16,4 @@ TOPLEVEL = tb_decode
 MODULE = test_decode
 
 # include cocotb's make rules to take care of the simulator setup
-include $(shell cocotb-config --makefiles)/Makefile.sim
\ No newline at end of file
+include $(shell cocotb-config --makefiles)/Makefile.sim
diff --git a/test/test_decode.py b/test/test_decode.py
index cee53eb..e3a6505 100644
--- a/test/test_decode.py
+++ b/test/test_decode.py
@@ -675,6 +675,8 @@ async def test_auipc(dut):
         assert dut.is_system.value == 0
         assert dut.instr_len.value == 4
 
+        if dut.XLEN.value == 64 and offset >> 19 == 1:
+            offset |= 0xffffffff00000
         assert dut.imm.value == offset << 12
         assert dut.alu_op.value == 0  # ADD
         
@@ -904,6 +906,8 @@ async def test_lui(dut):
         assert dut.is_system.value == 0
         assert dut.instr_len.value == 4
 
+        if dut.XLEN.value == 64 and imm >> 19 == 1:
+            imm |= 0xffffffff00000
         assert dut.imm.value == imm << 12
         
         assert dut.rd.value == reg
@@ -933,7 +937,10 @@ def encode_clui(reg, imm):
         assert dut.is_system.value == 0
         assert dut.instr_len.value == 2
 
-        assert dut.imm.value == (imm << 12) & 0xffffffff
+        if dut.XLEN.value == 64:
+            assert dut.imm.value == (imm << 12) & 0xffffffffffffffff
+        else:
+            assert dut.imm.value == (imm << 12) & 0xffffffff
         
         assert dut.rd.value == reg
 
@@ -1178,4 +1185,4 @@ async def test_system(dut):
         assert dut.is_system.value == 1
         assert dut.instr_len.value == (4 if (instr & 3) == 3 else 2)
 
-        assert dut.imm.value == (0 if (instr == 0x73) else 1)
\ No newline at end of file
+        assert dut.imm.value == (0 if (instr == 0x73) else 1)