From cde47f1ea722cc31e5bf01df291d06c6bafc7678 Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Fri, 5 May 2023 14:25:48 +0800
Subject: [PATCH 01/32] TLB: remove implicit parameters

---
 diplomatic/src/rocket/TLB.scala | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/diplomatic/src/rocket/TLB.scala b/diplomatic/src/rocket/TLB.scala
index 6dbc63bf9..2c36ad273 100644
--- a/diplomatic/src/rocket/TLB.scala
+++ b/diplomatic/src/rocket/TLB.scala
@@ -35,7 +35,7 @@ case object VMIdBits extends Field[Int](0)
   * If rs1!=x0 and rs2!=x0, the fence orders only reads and writes made to the leaf page table entry corresponding to the virtual address in rs1, for the address space identified by integer register rs2. Accesses to global mappings are not ordered.
   * }}}
   */
-class SFenceReq(implicit p: Parameters) extends CoreBundle()(p) {
+class SFenceReq extends Bundle {
   val rs1 = Bool()
   val rs2 = Bool()
   val addr = UInt(vaddrBits.W)
@@ -44,7 +44,7 @@ class SFenceReq(implicit p: Parameters) extends CoreBundle()(p) {
   val hg = Bool()
 }
 
-class TLBReq(lgMaxSize: Int)(implicit p: Parameters) extends CoreBundle()(p) {
+class TLBReq(lgMaxSize: Int) extends Bundle {
   /** request address from CPU. */
   val vaddr = UInt(vaddrBitsExtended.W)
   /** don't lookup TLB, bypass vaddr as paddr */
@@ -65,7 +65,7 @@ class TLBExceptions extends Bundle {
   val inst = Bool()
 }
 
-class TLBResp(implicit p: Parameters) extends CoreBundle()(p) {
+class TLBResp extends Bundle {
   // lookup responses
   val miss = Bool()
   /** physical address */
@@ -88,7 +88,7 @@ class TLBResp(implicit p: Parameters) extends CoreBundle()(p) {
   val prefetchable = Bool()
 }
 
-class TLBEntryData(implicit p: Parameters) extends CoreBundle()(p) {
+class TLBEntryData extends Bundle {
   val ppn = UInt(ppnBits.W)
   /** pte.u user */
   val u = Bool()
@@ -138,7 +138,7 @@ class TLBEntryData(implicit p: Parameters) extends CoreBundle()(p) {
 }
 
 /** basic cell for TLB data */
-class TLBEntry(val nSectors: Int, val superpage: Boolean, val superpageOnly: Boolean)(implicit p: Parameters) extends CoreBundle()(p) {
+class TLBEntry(val nSectors: Int, val superpage: Boolean, val superpageOnly: Boolean) extends Bundle {
   require(nSectors == 1 || !superpage)
   require(!superpageOnly || superpage)
 
@@ -304,7 +304,7 @@ case class TLBConfig(
   * @param cfg [[TLBConfig]]
   * @param edge collect SoC metadata.
   */
-class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(p) {
+class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig, edge: TLEdgeOut) extends Module {
   val io = IO(new Bundle {
     /** request from Core */
     val req = Flipped(Decoupled(new TLBReq(lgMaxSize)))

From ab51a094ccdef2a4c3ab0156d0f71ba8f99647de Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Fri, 5 May 2023 14:56:37 +0800
Subject: [PATCH 02/32] TLB: parameterize

---
 diplomatic/src/rocket/TLB.scala | 47 ++++++++++++++++++++++++++-------
 1 file changed, 38 insertions(+), 9 deletions(-)

diff --git a/diplomatic/src/rocket/TLB.scala b/diplomatic/src/rocket/TLB.scala
index 2c36ad273..ab0b37546 100644
--- a/diplomatic/src/rocket/TLB.scala
+++ b/diplomatic/src/rocket/TLB.scala
@@ -11,10 +11,11 @@ import freechips.rocketchip.subsystem.CacheBlockBytes
 import freechips.rocketchip.diplomacy.RegionType
 import freechips.rocketchip.tile.{CoreModule, CoreBundle}
 import freechips.rocketchip.tilelink._
-import freechips.rocketchip.util._
-import freechips.rocketchip.util.property
+// import freechips.rocketchip.util._
+// import freechips.rocketchip.util.property
 import freechips.rocketchip.devices.debug.DebugModuleKey
 import chisel3.internal.sourceinfo.SourceInfo
+import org.chipsalliance.rocket.constants.MemoryOpConstants
 
 case object PgLevels extends Field[Int](2)
 case object ASIdBits extends Field[Int](0)
@@ -35,7 +36,7 @@ case object VMIdBits extends Field[Int](0)
   * If rs1!=x0 and rs2!=x0, the fence orders only reads and writes made to the leaf page table entry corresponding to the virtual address in rs1, for the address space identified by integer register rs2. Accesses to global mappings are not ordered.
   * }}}
   */
-class SFenceReq extends Bundle {
+class SFenceReq(vaddrBits: Int, asIdBits: Int) extends Bundle {
   val rs1 = Bool()
   val rs2 = Bool()
   val addr = UInt(vaddrBits.W)
@@ -44,7 +45,7 @@ class SFenceReq extends Bundle {
   val hg = Bool()
 }
 
-class TLBReq(lgMaxSize: Int) extends Bundle {
+class TLBReq(lgMaxSize: Int, vaddrBitsExtended: Int) extends Bundle with MemoryOpConstants {
   /** request address from CPU. */
   val vaddr = UInt(vaddrBitsExtended.W)
   /** don't lookup TLB, bypass vaddr as paddr */
@@ -65,7 +66,7 @@ class TLBExceptions extends Bundle {
   val inst = Bool()
 }
 
-class TLBResp extends Bundle {
+class TLBResp(paddrBits: Int, vaddrBitsExtended: Int) extends Bundle {
   // lookup responses
   val miss = Bool()
   /** physical address */
@@ -88,7 +89,7 @@ class TLBResp extends Bundle {
   val prefetchable = Bool()
 }
 
-class TLBEntryData extends Bundle {
+class TLBEntryData(ppnBits: Int) extends Bundle {
   val ppn = UInt(ppnBits.W)
   /** pte.u user */
   val u = Bool()
@@ -138,7 +139,15 @@ class TLBEntryData extends Bundle {
 }
 
 /** basic cell for TLB data */
-class TLBEntry(val nSectors: Int, val superpage: Boolean, val superpageOnly: Boolean) extends Bundle {
+class TLBEntry(
+  val nSectors: Int,
+  val superpage: Boolean,
+  val superpageOnly: Boolean,
+  pgLevels: Int,
+  pgLevelBits: Int,
+  vpnBits: Int,
+  hypervisorExtraAddrBits: Int,
+  usingVM: Boolean) extends Bundle {
   require(nSectors == 1 || !superpage)
   require(!superpageOnly || superpage)
 
@@ -304,7 +313,27 @@ case class TLBConfig(
   * @param cfg [[TLBConfig]]
   * @param edge collect SoC metadata.
   */
-class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig, edge: TLEdgeOut) extends Module {
+class TLB(
+  instruction: Boolean,
+  lgMaxSize: Int,
+  cfg: TLBConfig,
+  edge: TLEdgeOut,
+  pmpGranularity: Int,
+  pgLevels: Int,
+  minPgLevels: Int,
+  pgLevelBits: Int,
+  pgIdxBits: Int,
+  vpnBits: Int,
+  vaddrBits: Int,
+  vaddrBitsExtended: Int,
+  hypervisorExtraAddrBits: Int,
+  xLen: Int,
+  usingHypervisor: Boolean,
+  usingVM: Boolean,
+  usingAtomics: Boolean,
+  usingAtomicsInCache: Boolean,
+  usingAtomicsOnlyForIO: Boolean,
+  usingDataScratchpad: Boolean) extends Module with MemoryOpConstants {
   val io = IO(new Bundle {
     /** request from Core */
     val req = Flipped(Decoupled(new TLBReq(lgMaxSize)))
@@ -413,7 +442,7 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig, edge: TLEdgeOut)
     legal_address && edge.manager.fastProperty(mpu_physaddr, member, (b:Boolean) => b.B)
   // todo: using DataScratchpad doesn't support cacheable.
   val cacheable = fastCheck(_.supportsAcquireB) && (instruction || !usingDataScratchpad).B
-  val homogeneous = TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), BigInt(1) << pgIdxBits)(mpu_physaddr).homogeneous
+  val homogeneous = TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), BigInt(1) << pgIdxBits)(mpu_physaddr).homogeneous // TODO: Remove `p`
   // In M mode, if access DM address(debug module program buffer)
   val deny_access_to_debug = mpu_priv <= PRV.M.U && p(DebugModuleKey).map(dmp => dmp.address.contains(mpu_physaddr)).getOrElse(false.B)
   val prot_r = fastCheck(_.supportsGet) && !deny_access_to_debug && pmp.io.r

From 641b389bfa0d6665d083408273073050f48ff1d4 Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Sun, 7 May 2023 14:39:39 +0800
Subject: [PATCH 03/32] TLB: move source file

---
 {diplomatic/src/rocket => rocket/src}/TLB.scala | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename {diplomatic/src/rocket => rocket/src}/TLB.scala (100%)

diff --git a/diplomatic/src/rocket/TLB.scala b/rocket/src/TLB.scala
similarity index 100%
rename from diplomatic/src/rocket/TLB.scala
rename to rocket/src/TLB.scala

From 727fc84b9670480f0c99e2ae620cc763f1882e10 Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Sun, 7 May 2023 14:49:31 +0800
Subject: [PATCH 04/32] TLB: refactor constructor call parameters

---
 rocket/src/TLB.scala | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/rocket/src/TLB.scala b/rocket/src/TLB.scala
index ab0b37546..8f229c1bd 100644
--- a/rocket/src/TLB.scala
+++ b/rocket/src/TLB.scala
@@ -146,6 +146,7 @@ class TLBEntry(
   pgLevels: Int,
   pgLevelBits: Int,
   vpnBits: Int,
+  ppnBits: Int,
   hypervisorExtraAddrBits: Int,
   usingVM: Boolean) extends Bundle {
   require(nSectors == 1 || !superpage)
@@ -157,15 +158,15 @@ class TLBEntry(
   /** tag in vitualization mode */
   val tag_v = Bool()
   /** entry data */
-  val data = Vec(nSectors, UInt(new TLBEntryData().getWidth.W))
+  val data = Vec(nSectors, UInt(new TLBEntryData(ppnBits).getWidth.W))
   /** valid bit */
   val valid = Vec(nSectors, Bool())
   /** returns all entry data in this entry */
-  def entry_data = data.map(_.asTypeOf(new TLBEntryData))
+  def entry_data = data.map(_.asTypeOf(new TLBEntryData(ppnBits)))
   /** returns the index of sector */
   private def sectorIdx(vpn: UInt) = vpn.extract(nSectors.log2-1, 0)
   /** returns the entry data matched with this vpn*/
-  def getData(vpn: UInt) = OptimizationBarrier(data(sectorIdx(vpn)).asTypeOf(new TLBEntryData))
+  def getData(vpn: UInt) = OptimizationBarrier(data(sectorIdx(vpn)).asTypeOf(new TLBEntryData(ppnBits)))
   /** returns whether a sector hits */
   def sectorHit(vpn: UInt, virtual: Bool) = valid.orR && sectorTagMatch(vpn, virtual)
   /** returns whether tag matches vpn */
@@ -324,9 +325,12 @@ class TLB(
   pgLevelBits: Int,
   pgIdxBits: Int,
   vpnBits: Int,
+  ppnBits: Int,
   vaddrBits: Int,
   vaddrBitsExtended: Int,
+  paddrBits: Int,
   hypervisorExtraAddrBits: Int,
+  asIdBits: Int,
   xLen: Int,
   usingHypervisor: Boolean,
   usingVM: Boolean,
@@ -336,13 +340,13 @@ class TLB(
   usingDataScratchpad: Boolean) extends Module with MemoryOpConstants {
   val io = IO(new Bundle {
     /** request from Core */
-    val req = Flipped(Decoupled(new TLBReq(lgMaxSize)))
+    val req = Flipped(Decoupled(new TLBReq(lgMaxSize, vaddrBitsExtended)))
     /** response to Core */
-    val resp = Output(new TLBResp())
+    val resp = Output(new TLBResp(paddrBits, vaddrBitsExtended))
     /** SFence Input */
-    val sfence = Flipped(Valid(new SFenceReq))
+    val sfence = Flipped(Valid((new SFenceReq(vaddrBits, asIdBits))))
     /** IO to PTW */
-    val ptw = new TLBPTWIO
+    val ptw = new TLBPTWIO()
     /** suppress a TLB refill, one cycle after a miss */
     val kill = Input(Bool())
   })
@@ -352,14 +356,14 @@ class TLB(
   /** index for sectored_Entry */
   val memIdx = vpn.extract(cfg.nSectors.log2 + cfg.nSets.log2 - 1, cfg.nSectors.log2)
   /** TLB Entry */
-  val sectored_entries = Reg(Vec(cfg.nSets, Vec(cfg.nWays / cfg.nSectors, new TLBEntry(cfg.nSectors, false, false))))
+  val sectored_entries = Reg(Vec(cfg.nSets, Vec(cfg.nWays / cfg.nSectors, new TLBEntry(cfg.nSectors, false, false, pgLevels, pgLevelBits, vpnBits, ppnBits, hypervisorExtraAddrBits, usingVM))))
   /** Superpage Entry */
-  val superpage_entries = Reg(Vec(cfg.nSuperpageEntries, new TLBEntry(1, true, true)))
+  val superpage_entries = Reg(Vec(cfg.nSuperpageEntries, new TLBEntry(1, true, true, pgLevels, pgLevelBits, vpnBits, ppnBits, hypervisorExtraAddrBits, usingVM)))
   /** Special Entry
     *
     * If PMP granularity is less than page size, thus need additional "special" entry manage PMP.
     */
-  val special_entry = (!pageGranularityPMPs).option(Reg(new TLBEntry(1, true, false)))
+  val special_entry = (!pageGranularityPMPs).option(Reg(new TLBEntry(1, true, false, pgLevels, pgLevelBits, vpnBits, ppnBits, hypervisorExtraAddrBits, usingVM)))
   def ordinary_entries = sectored_entries(memIdx) ++ superpage_entries
   def all_entries = ordinary_entries ++ special_entry
   def all_real_entries = sectored_entries.flatten ++ superpage_entries ++ special_entry
@@ -465,7 +469,7 @@ class TLB(
   when (do_refill) {
     val pte = io.ptw.resp.bits.pte
     val refill_v = r_vstage1_en || r_stage2_en
-    val newEntry = Wire(new TLBEntryData)
+    val newEntry = Wire(new TLBEntryData(ppnBits))
     newEntry.ppn := pte.ppn
     newEntry.c := cacheable
     newEntry.u := pte.u

From 7754371e9af8d98ffbea83e384dcc6f71f0f795a Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Sun, 7 May 2023 15:00:14 +0800
Subject: [PATCH 05/32] TLB: refactor to Chisel native coverage

---
 rocket/src/TLB.scala | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/rocket/src/TLB.scala b/rocket/src/TLB.scala
index 8f229c1bd..8d10dd49d 100644
--- a/rocket/src/TLB.scala
+++ b/rocket/src/TLB.scala
@@ -11,8 +11,6 @@ import freechips.rocketchip.subsystem.CacheBlockBytes
 import freechips.rocketchip.diplomacy.RegionType
 import freechips.rocketchip.tile.{CoreModule, CoreBundle}
 import freechips.rocketchip.tilelink._
-// import freechips.rocketchip.util._
-// import freechips.rocketchip.util.property
 import freechips.rocketchip.devices.debug.DebugModuleKey
 import chisel3.internal.sourceinfo.SourceInfo
 import org.chipsalliance.rocket.constants.MemoryOpConstants
@@ -332,6 +330,7 @@ class TLB(
   hypervisorExtraAddrBits: Int,
   asIdBits: Int,
   xLen: Int,
+  cacheBlockBytes: Int,
   usingHypervisor: Boolean,
   usingVM: Boolean,
   usingAtomics: Boolean,
@@ -446,7 +445,7 @@ class TLB(
     legal_address && edge.manager.fastProperty(mpu_physaddr, member, (b:Boolean) => b.B)
   // todo: using DataScratchpad doesn't support cacheable.
   val cacheable = fastCheck(_.supportsAcquireB) && (instruction || !usingDataScratchpad).B
-  val homogeneous = TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), BigInt(1) << pgIdxBits)(mpu_physaddr).homogeneous // TODO: Remove `p`
+  val homogeneous = TLBPageLookup(edge.manager.managers, xLen, cacheBlockBytes, BigInt(1) << pgIdxBits)(mpu_physaddr).homogeneous
   // In M mode, if access DM address(debug module program buffer)
   val deny_access_to_debug = mpu_priv <= PRV.M.U && p(DebugModuleKey).map(dmp => dmp.address.contains(mpu_physaddr)).getOrElse(false.B)
   val prot_r = fastCheck(_.supportsGet) && !deny_access_to_debug && pmp.io.r
@@ -764,7 +763,7 @@ class TLB(
   }
 
   def ccover(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) =
-    property.cover(cond, s"${if (instruction) "I" else "D"}TLB_$label", "MemorySystem;;" + desc)
+    cover(cond, s"${if (instruction) "I" else "D"}TLB_$label MemorySystem;; $desc")
   /** Decides which entry to be replaced
     *
     * If there is a invalid entry, replace it with priorityencoder;

From f014cd602f0de6cd8dd2c222435d33c2bf1ef7a3 Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Mon, 8 May 2023 13:59:22 +0800
Subject: [PATCH 06/32] TLB: move `TLBPermissions`

---
 {diplomatic/src/rocket => rocket/src}/TLBPermissions.scala | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename {diplomatic/src/rocket => rocket/src}/TLBPermissions.scala (100%)

diff --git a/diplomatic/src/rocket/TLBPermissions.scala b/rocket/src/TLBPermissions.scala
similarity index 100%
rename from diplomatic/src/rocket/TLBPermissions.scala
rename to rocket/src/TLBPermissions.scala

From 50225e522d4b96f7fd4ca2aff9bf410e58fa4532 Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Mon, 8 May 2023 14:00:41 +0800
Subject: [PATCH 07/32] TLB: move `Consts`

---
 {diplomatic/src/rocket => rocket/src}/Consts.scala | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename {diplomatic/src/rocket => rocket/src}/Consts.scala (100%)

diff --git a/diplomatic/src/rocket/Consts.scala b/rocket/src/Consts.scala
similarity index 100%
rename from diplomatic/src/rocket/Consts.scala
rename to rocket/src/Consts.scala

From b6d23f4089c97d6ffc43ea887d585f7064e18a64 Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Mon, 8 May 2023 14:13:21 +0800
Subject: [PATCH 08/32] TLB: refactor traits in `Consts` to objects

---
 rocket/src/Consts.scala | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/rocket/src/Consts.scala b/rocket/src/Consts.scala
index 8a0d36ba4..ff413037f 100644
--- a/rocket/src/Consts.scala
+++ b/rocket/src/Consts.scala
@@ -4,9 +4,8 @@ package org.chipsalliance.rocket.constants
 
 import chisel3._
 import chisel3.util._
-import freechips.rocketchip.util._
 
-trait ScalarOpConstants {
+object ScalarOpConstants {
   val SZ_BR = 3
   def BR_X    = BitPat("b???")
   def BR_EQ   = 0.U(3.W)
@@ -48,7 +47,7 @@ trait ScalarOpConstants {
   def DW_XPR = DW_64
 }
 
-trait MemoryOpConstants {
+object MemoryOpConstants {
   val NUM_XA_OPS = 9
   val M_SZ      = 5
   def M_X       = BitPat("b?????");

From 46cbcacd359f186ab4b4d53bb50c729673079e24 Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Mon, 8 May 2023 14:14:16 +0800
Subject: [PATCH 09/32] TLB: add minimal `CSR` for usages in `TLB`

---
 rocket/src/CSR.scala | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 rocket/src/CSR.scala

diff --git a/rocket/src/CSR.scala b/rocket/src/CSR.scala
new file mode 100644
index 000000000..1c334f8b6
--- /dev/null
+++ b/rocket/src/CSR.scala
@@ -0,0 +1,22 @@
+// See LICENSE.SiFive for license details.
+// See LICENSE.Berkeley for license details.
+
+package org.chipsalliance.rocket
+
+import chisel3._
+import chisel3.util.{BitPat, Cat, Fill, Mux1H, PopCount, PriorityMux, RegEnable, UIntToOH, Valid, log2Ceil, log2Up}
+import org.chipsalliance.cde.config.Parameters
+import org.chipsalliance.rockettile._
+
+import scala.collection.mutable.LinkedHashMap
+// import Instructions._
+// import CustomInstructions._
+
+object PRV
+{
+  val SZ = 2
+  val U = 0
+  val S = 1
+  val H = 2
+  val M = 3
+}
\ No newline at end of file

From 9f27cd3377f9428a461b939bf646c11ecc87aef3 Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Mon, 8 May 2023 14:14:46 +0800
Subject: [PATCH 10/32] TLB: refactor dependencies

---
 rocket/src/TLB.scala | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/rocket/src/TLB.scala b/rocket/src/TLB.scala
index 8d10dd49d..4616c5e99 100644
--- a/rocket/src/TLB.scala
+++ b/rocket/src/TLB.scala
@@ -7,11 +7,6 @@ import chisel3._
 import chisel3.util._
 
 import org.chipsalliance.cde.config.{Field, Parameters}
-import freechips.rocketchip.subsystem.CacheBlockBytes
-import freechips.rocketchip.diplomacy.RegionType
-import freechips.rocketchip.tile.{CoreModule, CoreBundle}
-import freechips.rocketchip.tilelink._
-import freechips.rocketchip.devices.debug.DebugModuleKey
 import chisel3.internal.sourceinfo.SourceInfo
 import org.chipsalliance.rocket.constants.MemoryOpConstants
 
@@ -43,7 +38,7 @@ class SFenceReq(vaddrBits: Int, asIdBits: Int) extends Bundle {
   val hg = Bool()
 }
 
-class TLBReq(lgMaxSize: Int, vaddrBitsExtended: Int) extends Bundle with MemoryOpConstants {
+class TLBReq(lgMaxSize: Int, vaddrBitsExtended: Int) extends Bundle {
   /** request address from CPU. */
   val vaddr = UInt(vaddrBitsExtended.W)
   /** don't lookup TLB, bypass vaddr as paddr */
@@ -51,7 +46,7 @@ class TLBReq(lgMaxSize: Int, vaddrBitsExtended: Int) extends Bundle with MemoryO
   /** granularity */
   val size = UInt(log2Ceil(lgMaxSize + 1).W)
   /** memory command. */
-  val cmd  = Bits(M_SZ.W)
+  val cmd  = Bits(MemoryOpConstants.M_SZ.W)
   val prv = UInt(PRV.SZ.W)
   /** virtualization mode */
   val v = Bool()
@@ -336,7 +331,7 @@ class TLB(
   usingAtomics: Boolean,
   usingAtomicsInCache: Boolean,
   usingAtomicsOnlyForIO: Boolean,
-  usingDataScratchpad: Boolean) extends Module with MemoryOpConstants {
+  usingDataScratchpad: Boolean) extends Module {
   val io = IO(new Bundle {
     /** request from Core */
     val req = Flipped(Decoupled(new TLBReq(lgMaxSize, vaddrBitsExtended)))
@@ -345,7 +340,7 @@ class TLB(
     /** SFence Input */
     val sfence = Flipped(Valid((new SFenceReq(vaddrBits, asIdBits))))
     /** IO to PTW */
-    val ptw = new TLBPTWIO()
+    val ptw = new TLBPTWIO() // TODO: Dependent on PTW
     /** suppress a TLB refill, one cycle after a miss */
     val kill = Input(Bool())
   })
@@ -432,7 +427,7 @@ class TLB(
                 Mux(vm_enabled && special_entry.nonEmpty.B, special_entry.map(e => e.ppn(vpn, e.getData(vpn))).getOrElse(0.U), io.req.bits.vaddr >> pgIdxBits))
   val mpu_physaddr = Cat(mpu_ppn, io.req.bits.vaddr(pgIdxBits-1, 0))
   val mpu_priv = Mux[UInt](usingVM.B && (do_refill || io.req.bits.passthrough /* PTW */), PRV.S.U, Cat(io.ptw.status.debug, priv))
-  val pmp = Module(new PMPChecker(lgMaxSize))
+  val pmp = Module(new PMPChecker(lgMaxSize)) // TODO: Dependent on PMP
   pmp.io.addr := mpu_physaddr
   pmp.io.size := io.req.bits.size
   pmp.io.pmp := (io.ptw.pmp: Seq[PMP])
@@ -588,13 +583,13 @@ class TLB(
     if (!usingVM || (minPgLevels == pgLevels && vaddrBits == vaddrBitsExtended)) false.B
     else vm_enabled && stage1_en && badVA(false)
 
-  val cmd_lrsc = usingAtomics.B && io.req.bits.cmd.isOneOf(M_XLR, M_XSC)
-  val cmd_amo_logical = usingAtomics.B && isAMOLogical(io.req.bits.cmd)
-  val cmd_amo_arithmetic = usingAtomics.B && isAMOArithmetic(io.req.bits.cmd)
-  val cmd_put_partial = io.req.bits.cmd === M_PWR
-  val cmd_read = isRead(io.req.bits.cmd)
-  val cmd_readx = usingHypervisor.B && io.req.bits.cmd === M_HLVX
-  val cmd_write = isWrite(io.req.bits.cmd)
+  val cmd_lrsc = usingAtomics.B && io.req.bits.cmd.isOneOf(MemoryOpConstants.M_XLR, MemoryOpConstants.M_XSC)
+  val cmd_amo_logical = usingAtomics.B && MemoryOpConstants.isAMOLogical(io.req.bits.cmd)
+  val cmd_amo_arithmetic = usingAtomics.B && MemoryOpConstants.isAMOArithmetic(io.req.bits.cmd)
+  val cmd_put_partial = io.req.bits.cmd === MemoryOpConstants.M_PWR
+  val cmd_read = MemoryOpConstants.isRead(io.req.bits.cmd)
+  val cmd_readx = usingHypervisor.B && io.req.bits.cmd === MemoryOpConstants.M_HLVX
+  val cmd_write = MemoryOpConstants.isWrite(io.req.bits.cmd)
   val cmd_write_perms = cmd_write ||
     io.req.bits.cmd.isOneOf(M_FLUSH_ALL, M_WOK) // not a write, but needs write permissions
 

From f25bb2acb55dbeb395ec9fbd9846b8b328773d21 Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Mon, 8 May 2023 14:39:40 +0800
Subject: [PATCH 11/32] TLB: remove unnecessary dependencies

---
 rocket/src/CSR.scala | 2 --
 rocket/src/TLB.scala | 5 -----
 2 files changed, 7 deletions(-)

diff --git a/rocket/src/CSR.scala b/rocket/src/CSR.scala
index 1c334f8b6..2dda6d47b 100644
--- a/rocket/src/CSR.scala
+++ b/rocket/src/CSR.scala
@@ -5,8 +5,6 @@ package org.chipsalliance.rocket
 
 import chisel3._
 import chisel3.util.{BitPat, Cat, Fill, Mux1H, PopCount, PriorityMux, RegEnable, UIntToOH, Valid, log2Ceil, log2Up}
-import org.chipsalliance.cde.config.Parameters
-import org.chipsalliance.rockettile._
 
 import scala.collection.mutable.LinkedHashMap
 // import Instructions._
diff --git a/rocket/src/TLB.scala b/rocket/src/TLB.scala
index 4616c5e99..bac2bb877 100644
--- a/rocket/src/TLB.scala
+++ b/rocket/src/TLB.scala
@@ -6,14 +6,9 @@ package org.chipsalliance.rocket
 import chisel3._
 import chisel3.util._
 
-import org.chipsalliance.cde.config.{Field, Parameters}
 import chisel3.internal.sourceinfo.SourceInfo
 import org.chipsalliance.rocket.constants.MemoryOpConstants
 
-case object PgLevels extends Field[Int](2)
-case object ASIdBits extends Field[Int](0)
-case object VMIdBits extends Field[Int](0)
-
 /** =SFENCE=
   * rs1 rs2
   * {{{

From 20ae4bcee1c5fbf953fc8613d3a6f606f057ecf7 Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Mon, 8 May 2023 15:10:53 +0800
Subject: [PATCH 12/32] TLB: remove redundant dependencies

---
 rocket/src/TLBPermissions.scala | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/rocket/src/TLBPermissions.scala b/rocket/src/TLBPermissions.scala
index 26c7c055e..9aac16b1c 100644
--- a/rocket/src/TLBPermissions.scala
+++ b/rocket/src/TLBPermissions.scala
@@ -5,9 +5,6 @@ package org.chipsalliance.rocket
 import chisel3._
 import chisel3.util.isPow2
 
-import freechips.rocketchip.diplomacy._
-import freechips.rocketchip.tilelink._
-
 case class TLBPermissions(
   homogeneous: Bool, // if false, the below are undefined
   r: Bool, // readable

From 5a869102b0db88515bc59f811bd2020a1f38aa02 Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Mon, 8 May 2023 16:14:26 +0800
Subject: [PATCH 13/32] TLB: refactor deprecated code style

---
 rocket/src/CSR.scala    |  2 +-
 rocket/src/Consts.scala |  6 +++---
 rocket/src/TLB.scala    | 37 +++++++++++++++++++------------------
 3 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/rocket/src/CSR.scala b/rocket/src/CSR.scala
index 2dda6d47b..a79022b0f 100644
--- a/rocket/src/CSR.scala
+++ b/rocket/src/CSR.scala
@@ -17,4 +17,4 @@ object PRV
   val S = 1
   val H = 2
   val M = 3
-}
\ No newline at end of file
+}
diff --git a/rocket/src/Consts.scala b/rocket/src/Consts.scala
index ff413037f..f408d9e2b 100644
--- a/rocket/src/Consts.scala
+++ b/rocket/src/Consts.scala
@@ -77,11 +77,11 @@ object MemoryOpConstants {
   def M_WOK     = "b10111".U // check write permissions but don't perform a write
   def M_HLVX    = "b10000".U // HLVX instruction
 
-  def isAMOLogical(cmd: UInt) = cmd.isOneOf(M_XA_SWAP, M_XA_XOR, M_XA_OR, M_XA_AND)
-  def isAMOArithmetic(cmd: UInt) = cmd.isOneOf(M_XA_ADD, M_XA_MIN, M_XA_MAX, M_XA_MINU, M_XA_MAXU)
+  def isAMOLogical(cmd: UInt) = Seq(M_XA_SWAP, M_XA_XOR, M_XA_OR, M_XA_AND).map(cmd === _).reduce(_ || _)
+  def isAMOArithmetic(cmd: UInt) = Seq(M_XA_ADD, M_XA_MIN, M_XA_MAX, M_XA_MINU, M_XA_MAXU).map(cmd === _).reduce(_ || _)
   def isAMO(cmd: UInt) = isAMOLogical(cmd) || isAMOArithmetic(cmd)
   def isPrefetch(cmd: UInt) = cmd === M_PFR || cmd === M_PFW
-  def isRead(cmd: UInt) = cmd.isOneOf(M_XRD, M_HLVX, M_XLR, M_XSC) || isAMO(cmd)
+  def isRead(cmd: UInt) = Seq(M_XRD, M_HLVX, M_XLR, M_XSC).map(cmd === _).reduce(_ || _) || isAMO(cmd)
   def isWrite(cmd: UInt) = cmd === M_XWR || cmd === M_PWR || cmd === M_XSC || isAMO(cmd)
   def isWriteIntent(cmd: UInt) = isWrite(cmd) || cmd === M_PFW || cmd === M_XLR
 }
diff --git a/rocket/src/TLB.scala b/rocket/src/TLB.scala
index bac2bb877..9ed7a4eab 100644
--- a/rocket/src/TLB.scala
+++ b/rocket/src/TLB.scala
@@ -152,13 +152,13 @@ class TLBEntry(
   /** returns all entry data in this entry */
   def entry_data = data.map(_.asTypeOf(new TLBEntryData(ppnBits)))
   /** returns the index of sector */
-  private def sectorIdx(vpn: UInt) = vpn.extract(nSectors.log2-1, 0)
+  private def sectorIdx(vpn: UInt) = vpn(log2Ceil(nSectors) - 1, 0)
   /** returns the entry data matched with this vpn*/
   def getData(vpn: UInt) = OptimizationBarrier(data(sectorIdx(vpn)).asTypeOf(new TLBEntryData(ppnBits)))
   /** returns whether a sector hits */
-  def sectorHit(vpn: UInt, virtual: Bool) = valid.orR && sectorTagMatch(vpn, virtual)
+  def sectorHit(vpn: UInt, virtual: Bool) = valid.asUInt.orR && sectorTagMatch(vpn, virtual)
   /** returns whether tag matches vpn */
-  def sectorTagMatch(vpn: UInt, virtual: Bool) = (((tag_vpn ^ vpn) >> nSectors.log2) === 0.U) && (tag_v === virtual)
+  def sectorTagMatch(vpn: UInt, virtual: Bool) = (((tag_vpn ^ vpn) >> log2Ceil(nSectors)) === 0.U) && (tag_v === virtual)
   /** returns hit signal */
   def hit(vpn: UInt, virtual: Bool): Bool = {
     if (superpage && usingVM) {
@@ -197,7 +197,7 @@ class TLBEntry(
   def insert(vpn: UInt, virtual: Bool, level: UInt, entry: TLBEntryData): Unit = {
     this.tag_vpn := vpn
     this.tag_v := virtual
-    this.level := level.extract(log2Ceil(pgLevels - superpageOnly.toInt)-1, 0)
+    this.level := level(log2Ceil(pgLevels - superpageOnly.B.litValue) - 1, 0)
 
     val idx = sectorIdx(vpn)
     valid(idx) := true.B
@@ -352,7 +352,7 @@ class TLB(
     *
     * If PMP granularity is less than page size, thus need additional "special" entry manage PMP.
     */
-  val special_entry = (!pageGranularityPMPs).option(Reg(new TLBEntry(1, true, false, pgLevels, pgLevelBits, vpnBits, ppnBits, hypervisorExtraAddrBits, usingVM)))
+  val special_entry = Option.when(!pageGranularityPMPs)(Reg(new TLBEntry(1, true, false, pgLevels, pgLevelBits, vpnBits, ppnBits, hypervisorExtraAddrBits, usingVM)))
   def ordinary_entries = sectored_entries(memIdx) ++ superpage_entries
   def all_entries = ordinary_entries ++ special_entry
   def all_real_entries = sectored_entries.flatten ++ superpage_entries ++ special_entry
@@ -416,7 +416,7 @@ class TLB(
   /** refill signal */
   val do_refill = usingVM.B && io.ptw.resp.valid
   /** sfence invalidate refill */
-  val invalidate_refill = state.isOneOf(s_request /* don't care */, s_wait_invalidate) || io.sfence.valid
+  val invalidate_refill = Seq(s_request /* don't care */, s_wait_invalidate).map(state === _).reduce(_ || _) || io.sfence.valid
   // PMP
   val mpu_ppn = Mux(do_refill, refill_ppn,
                 Mux(vm_enabled && special_entry.nonEmpty.B, special_entry.map(e => e.ppn(vpn, e.getData(vpn))).getOrElse(0.U), io.req.bits.vaddr >> pgIdxBits))
@@ -492,7 +492,7 @@ class TLB(
       }
     // refill sectored_hit
     }.otherwise {
-      val r_memIdx = r_refill_tag.extract(cfg.nSectors.log2 + cfg.nSets.log2 - 1, cfg.nSectors.log2)
+      val r_memIdx = r_refill_tag(log2Ceil(cfg.nSectors) + log2Ceil(cfg.nSets) - 1, log2Ceil(cfg.nSectors))
       val waddr = Mux(r_sectored_hit.valid, r_sectored_hit.bits, r_sectored_repl_addr)
       for ((e, i) <- sectored_entries(r_memIdx).zipWithIndex) when (waddr === i.U) {
         when (!r_sectored_hit.valid) { e.invalidate() }
@@ -560,16 +560,17 @@ class TLB(
   // vaddr misaligned: vaddr[1:0]=b00
   val misaligned = (io.req.bits.vaddr & (UIntToOH(io.req.bits.size) - 1.U)).orR
   def badVA(guestPA: Boolean): Bool = {
-    val additionalPgLevels = (if (guestPA) io.ptw.hgatp else satp).additionalPgLevels
+    val additionalPgLevels = (if (guestPA) io.ptw.hgatp else satp).additionalPgLevels // TODO: Cannot resolve
     val extraBits = if (guestPA) hypervisorExtraAddrBits else 0
     val signed = !guestPA
     val nPgLevelChoices = pgLevels - minPgLevels + 1
     val minVAddrBits = pgIdxBits + minPgLevels * pgLevelBits + extraBits
-    (for (i <- 0 until nPgLevelChoices) yield {
-      val mask = ((BigInt(1) << vaddrBitsExtended) - (BigInt(1) << (minVAddrBits + i * pgLevelBits - signed.toInt))).U
-      val maskedVAddr = io.req.bits.vaddr & mask
-      additionalPgLevels === i.U && !(maskedVAddr === 0.U || signed.B && maskedVAddr === mask)
-    }).orR
+    VecInit(Seq.range(0, nPgLevelChoices).map {
+      i =>
+        val mask = ((BigInt(1) << vaddrBitsExtended) - (BigInt(1) << (minVAddrBits + i * pgLevelBits - signed.B.litValue.toInt))).U
+        val maskedVAddr = io.req.bits.vaddr & mask
+        additionalPgLevels === i.U && !(maskedVAddr === 0.U || signed.B && maskedVAddr === mask)
+    }).asUInt.orR
   }
   val bad_gpa =
     if (!usingHypervisor) false.B
@@ -578,7 +579,7 @@ class TLB(
     if (!usingVM || (minPgLevels == pgLevels && vaddrBits == vaddrBitsExtended)) false.B
     else vm_enabled && stage1_en && badVA(false)
 
-  val cmd_lrsc = usingAtomics.B && io.req.bits.cmd.isOneOf(MemoryOpConstants.M_XLR, MemoryOpConstants.M_XSC)
+  val cmd_lrsc = usingAtomics.B && VecInit(Seq(MemoryOpConstants.M_XLR, MemoryOpConstants.M_XSC).map(io.req.bits.cmd === _)).asUInt.orR
   val cmd_amo_logical = usingAtomics.B && MemoryOpConstants.isAMOLogical(io.req.bits.cmd)
   val cmd_amo_arithmetic = usingAtomics.B && MemoryOpConstants.isAMOArithmetic(io.req.bits.cmd)
   val cmd_put_partial = io.req.bits.cmd === MemoryOpConstants.M_PWR
@@ -586,7 +587,7 @@ class TLB(
   val cmd_readx = usingHypervisor.B && io.req.bits.cmd === MemoryOpConstants.M_HLVX
   val cmd_write = MemoryOpConstants.isWrite(io.req.bits.cmd)
   val cmd_write_perms = cmd_write ||
-    io.req.bits.cmd.isOneOf(M_FLUSH_ALL, M_WOK) // not a write, but needs write permissions
+    VecInit(Seq(MemoryOpConstants.M_FLUSH_ALL, MemoryOpConstants.M_WOK).map(io.req.bits.cmd === _)).asUInt.orR // not a write, but needs write permissions
 
   val lrscAllowed = Mux((usingDataScratchpad || usingAtomicsOnlyForIO).B, 0.U, c_array)
   val ae_array =
@@ -628,7 +629,7 @@ class TLB(
   when (io.req.valid && vm_enabled) {
     // replace
     when (sector_hits.orR) { sectored_plru.access(memIdx, OHToUInt(sector_hits)) }
-    when (superpage_hits.orR) { superpage_plru.access(OHToUInt(superpage_hits)) }
+    when (VecInit(superpage_hits).asUInt.orR) { superpage_plru.access(OHToUInt(superpage_hits)) }
   }
 
   // Superpages create the possibility that two entries in the TLB may match.
@@ -695,7 +696,7 @@ class TLB(
       r_sectored_repl_addr := replacementEntry(sectored_entries(memIdx), sectored_plru.way(memIdx))
       r_sectored_hit.valid := sector_hits.orR
       r_sectored_hit.bits := OHToUInt(sector_hits)
-      r_superpage_hit.valid := superpage_hits.orR
+      r_superpage_hit.valid := VecInit(superpage_hits).asUInt.orR
       r_superpage_hit.bits := OHToUInt(superpage_hits)
     }
     // Handle SFENCE.VMA when send request to PTW.
@@ -762,7 +763,7 @@ class TLB(
     * @return mask for TLBEntry replacement
     */
   def replacementEntry(set: Seq[TLBEntry], alt: UInt) = {
-    val valids = set.map(_.valid.orR).asUInt
+    val valids = VecInit(set.map(_.valid.asUInt.orR)).asUInt
     Mux(valids.andR, alt, PriorityEncoder(~valids))
   }
 }

From 79bb9fd9b49870364cd5eda110457f0bddfff921 Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Wed, 10 May 2023 16:08:22 +0800
Subject: [PATCH 14/32] TLB: move `Instructions`

---
 {diplomatic/src/rocket => rocket/src}/Instructions.scala | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename {diplomatic/src/rocket => rocket/src}/Instructions.scala (100%)

diff --git a/diplomatic/src/rocket/Instructions.scala b/rocket/src/Instructions.scala
similarity index 100%
rename from diplomatic/src/rocket/Instructions.scala
rename to rocket/src/Instructions.scala

From e951e2a09b58176382f04a44f90ca7a28eb6c54b Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Wed, 10 May 2023 16:09:03 +0800
Subject: [PATCH 15/32] TLB: add `util`

---
 rocket/src/TLB.scala              |   5 +-
 rocket/src/TLBPermissions.scala   |   2 +-
 rocket/src/util/Misc.scala        |  24 ++
 rocket/src/util/Replacement.scala | 541 ++++++++++++++++++++++++++++++
 rocket/src/util/package.scala     |  24 ++
 5 files changed, 593 insertions(+), 3 deletions(-)
 create mode 100644 rocket/src/util/Misc.scala
 create mode 100644 rocket/src/util/Replacement.scala
 create mode 100644 rocket/src/util/package.scala

diff --git a/rocket/src/TLB.scala b/rocket/src/TLB.scala
index 9ed7a4eab..7a1a5d112 100644
--- a/rocket/src/TLB.scala
+++ b/rocket/src/TLB.scala
@@ -8,6 +8,7 @@ import chisel3.util._
 
 import chisel3.internal.sourceinfo.SourceInfo
 import org.chipsalliance.rocket.constants.MemoryOpConstants
+import org.chipsalliance.rocket.util._
 
 /** =SFENCE=
   * rs1 rs2
@@ -306,7 +307,7 @@ class TLB(
   instruction: Boolean,
   lgMaxSize: Int,
   cfg: TLBConfig,
-  edge: TLEdgeOut,
+  edge: TLEdgeOut, // TODO: Decoupled from Tilelink
   pmpGranularity: Int,
   pgLevels: Int,
   minPgLevels: Int,
@@ -431,7 +432,7 @@ class TLB(
   // check exist a slave can consume this address.
   val legal_address = edge.manager.findSafe(mpu_physaddr).reduce(_||_)
   // check utility to help check SoC property.
-  def fastCheck(member: TLManagerParameters => Boolean) =
+  def fastCheck(member: TLManagerParameters => Boolean) = // TODO: Decoupled from Tilelink
     legal_address && edge.manager.fastProperty(mpu_physaddr, member, (b:Boolean) => b.B)
   // todo: using DataScratchpad doesn't support cacheable.
   val cacheable = fastCheck(_.supportsAcquireB) && (instruction || !usingDataScratchpad).B
diff --git a/rocket/src/TLBPermissions.scala b/rocket/src/TLBPermissions.scala
index 9aac16b1c..933793b81 100644
--- a/rocket/src/TLBPermissions.scala
+++ b/rocket/src/TLBPermissions.scala
@@ -27,7 +27,7 @@ object TLBPageLookup
     val useful = r || w || x || c || a || l
   }
 
-  private def groupRegions(managers: Seq[TLManagerParameters]): Map[TLBFixedPermissions, Seq[AddressSet]] = {
+  private def groupRegions(managers: Seq[TLManagerParameters]): Map[TLBFixedPermissions, Seq[AddressSet]] = { // TODO: Decoupled from Tilelink
     val permissions = managers.map { m =>
       (m.address, TLBFixedPermissions(
         e = Seq(RegionType.PUT_EFFECTS, RegionType.GET_EFFECTS) contains m.regionType,
diff --git a/rocket/src/util/Misc.scala b/rocket/src/util/Misc.scala
new file mode 100644
index 000000000..e0a077b88
--- /dev/null
+++ b/rocket/src/util/Misc.scala
@@ -0,0 +1,24 @@
+// See LICENSE.SiFive for license details.
+// See LICENSE.Berkeley for license details.
+
+package org.chipsalliance.rocket.util
+
+import chisel3._
+import chisel3.util._
+
+object PopCountAtLeast {
+  private def two(x: UInt): (Bool, Bool) = x.getWidth match {
+    case 1 => (x.asBool, false.B)
+    case n =>
+      val half = x.getWidth / 2
+      val (leftOne, leftTwo) = two(x(half - 1, 0))
+      val (rightOne, rightTwo) = two(x(x.getWidth - 1, half))
+      (leftOne || rightOne, leftTwo || rightTwo || (leftOne && rightOne))
+  }
+  def apply(x: UInt, n: Int): Bool = n match {
+    case 0 => true.B
+    case 1 => x.orR
+    case 2 => two(x)._2
+    case 3 => PopCount(x) >= n.U
+  }
+}
diff --git a/rocket/src/util/Replacement.scala b/rocket/src/util/Replacement.scala
new file mode 100644
index 000000000..40eab181c
--- /dev/null
+++ b/rocket/src/util/Replacement.scala
@@ -0,0 +1,541 @@
+// See LICENSE.SiFive for license details.
+// See LICENSE.Berkeley for license details.
+
+package org.chipsalliance.rocket.util
+
+import chisel3._
+import chisel3.util._
+
+abstract class ReplacementPolicy {
+  def nBits: Int
+  def perSet: Boolean
+  def way: UInt
+  def miss: Unit
+  def hit: Unit
+  def access(touch_way: UInt): Unit
+  def access(touch_ways: Seq[Valid[UInt]]): Unit
+  def state_read: UInt
+  def get_next_state(state: UInt, touch_way: UInt): UInt
+  def get_next_state(state: UInt, touch_ways: Seq[Valid[UInt]]): UInt = {
+    touch_ways.foldLeft(state)((prev, touch_way) => Mux(touch_way.valid, get_next_state(prev, touch_way.bits), prev))
+  }
+  def get_replace_way(state: UInt): UInt
+}
+
+object ReplacementPolicy {
+  def fromString(s: String, n_ways: Int): ReplacementPolicy = s.toLowerCase match {
+    case "random" => new RandomReplacement(n_ways)
+    case "lru"    => new TrueLRU(n_ways)
+    case "plru"   => new PseudoLRU(n_ways)
+    case t => throw new IllegalArgumentException(s"unknown Replacement Policy type $t")
+  }
+}
+
+class RandomReplacement(n_ways: Int) extends ReplacementPolicy {
+  private val replace = Wire(Bool())
+  replace := false.B
+  def nBits = 16
+  def perSet = false
+  private val lfsr = LFSR(nBits, replace)
+  def state_read = WireDefault(lfsr)
+
+  def way = Random(n_ways, lfsr)
+  def miss = replace := true.B
+  def hit = {}
+  def access(touch_way: UInt) = {}
+  def access(touch_ways: Seq[Valid[UInt]]) = {}
+  def get_next_state(state: UInt, touch_way: UInt) = 0.U //DontCare
+  def get_replace_way(state: UInt) = way
+}
+
+abstract class SeqReplacementPolicy {
+  def access(set: UInt): Unit
+  def update(valid: Bool, hit: Bool, set: UInt, way: UInt): Unit
+  def way: UInt
+}
+
+abstract class SetAssocReplacementPolicy {
+  def access(set: UInt, touch_way: UInt): Unit
+  def access(sets: Seq[UInt], touch_ways: Seq[Valid[UInt]]): Unit
+  def way(set: UInt): UInt
+}
+
+class SeqRandom(n_ways: Int) extends SeqReplacementPolicy {
+  val logic = new RandomReplacement(n_ways)
+  def access(set: UInt) = { }
+  def update(valid: Bool, hit: Bool, set: UInt, way: UInt) = {
+    when (valid && !hit) { logic.miss }
+  }
+  def way = logic.way
+}
+
+class TrueLRU(n_ways: Int) extends ReplacementPolicy {
+  // True LRU replacement policy, using a triangular matrix to track which sets are more recently used than others.
+  // The matrix is packed into a single UInt (or Bits).  Example 4-way (6-bits):
+  // [5] - 3 more recent than 2
+  // [4] - 3 more recent than 1
+  // [3] - 2 more recent than 1
+  // [2] - 3 more recent than 0
+  // [1] - 2 more recent than 0
+  // [0] - 1 more recent than 0
+  def nBits = (n_ways * (n_ways-1)) / 2
+  def perSet = true
+  private val state_reg = RegInit(0.U(nBits.W))
+  def state_read = WireDefault(state_reg)
+
+  private def extractMRUVec(state: UInt): Seq[UInt] = {
+    // Extract per-way information about which higher-indexed ways are more recently used
+    val moreRecentVec = Wire(Vec(n_ways-1, UInt(n_ways.W)))
+    var lsb = 0
+    for (i <- 0 until n_ways-1) {
+      moreRecentVec(i) := Cat(state(lsb+n_ways-i-2,lsb), 0.U((i+1).W))
+      lsb = lsb + (n_ways - i - 1)
+    }
+    moreRecentVec
+  }
+
+  def get_next_state(state: UInt, touch_way: UInt): UInt = {
+    val nextState     = Wire(Vec(n_ways-1, UInt(n_ways.W)))
+    val moreRecentVec = extractMRUVec(state)  // reconstruct lower triangular matrix
+    val wayDec        = UIntToOH(touch_way, n_ways)
+
+    // Compute next value of triangular matrix
+    // set the touched way as more recent than every other way
+    nextState.zipWithIndex.map { case (e, i) =>
+      e := Mux(i.U === touch_way, 0.U(n_ways.W), moreRecentVec(i) | wayDec)
+    }
+
+    nextState.zipWithIndex.tail.foldLeft((nextState.head.apply(n_ways-1,1),0)) { case ((pe,pi),(ce,ci)) => (Cat(ce.apply(n_ways-1,ci+1), pe), ci) }._1
+  }
+
+  def access(touch_way: UInt): Unit = {
+    state_reg := get_next_state(state_reg, touch_way)
+  }
+  def access(touch_ways: Seq[Valid[UInt]]): Unit = {
+    when (touch_ways.map(_.valid).orR) {
+      state_reg := get_next_state(state_reg, touch_ways)
+    }
+    for (i <- 1 until touch_ways.size) {
+      cover(PopCount(touch_ways.map(_.valid)) === i.U, s"LRU_UpdateCount$i", s"LRU Update $i simultaneous")
+    }
+  }
+
+  def get_replace_way(state: UInt): UInt = {
+    val moreRecentVec = extractMRUVec(state)  // reconstruct lower triangular matrix
+    // For each way, determine if all other ways are more recent
+    val mruWayDec     = (0 until n_ways).map { i =>
+      val upperMoreRecent = (if (i == n_ways-1) true.B else moreRecentVec(i).apply(n_ways-1,i+1).andR)
+      val lowerMoreRecent = (if (i == 0)        true.B else moreRecentVec.map(e => !e(i)).reduce(_ && _))
+      upperMoreRecent && lowerMoreRecent
+    }
+    OHToUInt(mruWayDec)
+  }
+
+  def way = get_replace_way(state_reg)
+  def miss = access(way)
+  def hit = {}
+  @deprecated("replace 'replace' with 'way' from abstract class ReplacementPolicy","Rocket Chip 2020.05")
+  def replace: UInt = way
+}
+
+class PseudoLRU(n_ways: Int) extends ReplacementPolicy {
+  // Pseudo-LRU tree algorithm: https://en.wikipedia.org/wiki/Pseudo-LRU#Tree-PLRU
+  //
+  //
+  // - bits storage example for 4-way PLRU binary tree:
+  //                  bit[2]: ways 3+2 older than ways 1+0
+  //                  /                                  \
+  //     bit[1]: way 3 older than way 2    bit[0]: way 1 older than way 0
+  //
+  //
+  // - bits storage example for 3-way PLRU binary tree:
+  //                  bit[1]: way 2 older than ways 1+0
+  //                                                  \
+  //                                       bit[0]: way 1 older than way 0
+  //
+  //
+  // - bits storage example for 8-way PLRU binary tree:
+  //                      bit[6]: ways 7-4 older than ways 3-0
+  //                      /                                  \
+  //            bit[5]: ways 7+6 > 5+4                bit[2]: ways 3+2 > 1+0
+  //            /                    \                /                    \
+  //     bit[4]: way 7>6    bit[3]: way 5>4    bit[1]: way 3>2    bit[0]: way 1>0
+
+  def nBits = n_ways - 1
+  def perSet = true
+  private val state_reg = if (nBits == 0) Reg(UInt(0.W)) else RegInit(0.U(nBits.W))
+  def state_read = WireDefault(state_reg)
+
+  def access(touch_way: UInt): Unit = {
+    state_reg := get_next_state(state_reg, touch_way)
+  }
+  def access(touch_ways: Seq[Valid[UInt]]): Unit = {
+    when (touch_ways.map(_.valid).orR) {
+      state_reg := get_next_state(state_reg, touch_ways)
+    }
+    for (i <- 1 until touch_ways.size) {
+      cover(PopCount(touch_ways.map(_.valid)) === i.U, s"PLRU_UpdateCount$i", s"PLRU Update $i simultaneous")
+    }
+  }
+
+
+  /** @param state state_reg bits for this sub-tree
+    * @param touch_way touched way encoded value bits for this sub-tree
+    * @param tree_nways number of ways in this sub-tree
+    */
+  def get_next_state(state: UInt, touch_way: UInt, tree_nways: Int): UInt = {
+    require(state.getWidth == (tree_nways-1),                   s"wrong state bits width ${state.getWidth} for $tree_nways ways")
+    require(touch_way.getWidth == (log2Ceil(tree_nways) max 1), s"wrong encoded way width ${touch_way.getWidth} for $tree_nways ways")
+
+    if (tree_nways > 2) {
+      // we are at a branching node in the tree, so recurse
+      val right_nways: Int = 1 << (log2Ceil(tree_nways) - 1)  // number of ways in the right sub-tree
+      val left_nways:  Int = tree_nways - right_nways         // number of ways in the left sub-tree
+      val set_left_older      = !touch_way(log2Ceil(tree_nways)-1)
+      val left_subtree_state  = state.extract(tree_nways-3, right_nways-1)
+      val right_subtree_state = state(right_nways-2, 0)
+
+      if (left_nways > 1) {
+        // we are at a branching node in the tree with both left and right sub-trees, so recurse both sub-trees
+        Cat(set_left_older,
+            Mux(set_left_older,
+                left_subtree_state,  // if setting left sub-tree as older, do NOT recurse into left sub-tree
+                get_next_state(left_subtree_state, touch_way.extract(log2Ceil(left_nways)-1,0), left_nways)),  // recurse left if newer
+            Mux(set_left_older,
+                get_next_state(right_subtree_state, touch_way(log2Ceil(right_nways)-1,0), right_nways),  // recurse right if newer
+                right_subtree_state))  // if setting right sub-tree as older, do NOT recurse into right sub-tree
+      } else {
+        // we are at a branching node in the tree with only a right sub-tree, so recurse only right sub-tree
+        Cat(set_left_older,
+            Mux(set_left_older,
+                get_next_state(right_subtree_state, touch_way(log2Ceil(right_nways)-1,0), right_nways),  // recurse right if newer
+                right_subtree_state))  // if setting right sub-tree as older, do NOT recurse into right sub-tree
+      }
+    } else if (tree_nways == 2) {
+      // we are at a leaf node at the end of the tree, so set the single state bit opposite of the lsb of the touched way encoded value
+      !touch_way(0)
+    } else {  // tree_nways <= 1
+      // we are at an empty node in an empty tree for 1 way, so return single zero bit for Chisel (no zero-width wires)
+      0.U(1.W)
+    }
+  }
+
+  def get_next_state(state: UInt, touch_way: UInt): UInt = {
+    val touch_way_sized = if (touch_way.getWidth < log2Ceil(n_ways)) touch_way.padTo  (log2Ceil(n_ways))
+                                                                else touch_way.extract(log2Ceil(n_ways)-1,0)
+    get_next_state(state, touch_way_sized, n_ways)
+  }
+
+
+  /** @param state state_reg bits for this sub-tree
+    * @param tree_nways number of ways in this sub-tree
+    */
+  def get_replace_way(state: UInt, tree_nways: Int): UInt = {
+    require(state.getWidth == (tree_nways-1), s"wrong state bits width ${state.getWidth} for $tree_nways ways")
+
+    // this algorithm recursively descends the binary tree, filling in the way-to-replace encoded value from msb to lsb
+    if (tree_nways > 2) {
+      // we are at a branching node in the tree, so recurse
+      val right_nways: Int = 1 << (log2Ceil(tree_nways) - 1)  // number of ways in the right sub-tree
+      val left_nways:  Int = tree_nways - right_nways         // number of ways in the left sub-tree
+      val left_subtree_older  = state(tree_nways-2)
+      val left_subtree_state  = state.extract(tree_nways-3, right_nways-1)
+      val right_subtree_state = state(right_nways-2, 0)
+
+      if (left_nways > 1) {
+        // we are at a branching node in the tree with both left and right sub-trees, so recurse both sub-trees
+        Cat(left_subtree_older,      // return the top state bit (current tree node) as msb of the way-to-replace encoded value
+            Mux(left_subtree_older,  // if left sub-tree is older, recurse left, else recurse right
+                get_replace_way(left_subtree_state,  left_nways),    // recurse left
+                get_replace_way(right_subtree_state, right_nways)))  // recurse right
+      } else {
+        // we are at a branching node in the tree with only a right sub-tree, so recurse only right sub-tree
+        Cat(left_subtree_older,      // return the top state bit (current tree node) as msb of the way-to-replace encoded value
+            Mux(left_subtree_older,  // if left sub-tree is older, return and do not recurse right
+                0.U(1.W),
+                get_replace_way(right_subtree_state, right_nways)))  // recurse right
+      }
+    } else if (tree_nways == 2) {
+      // we are at a leaf node at the end of the tree, so just return the single state bit as lsb of the way-to-replace encoded value
+      state(0)
+    } else {  // tree_nways <= 1
+      // we are at an empty node in an unbalanced tree for non-power-of-2 ways, so return single zero bit as lsb of the way-to-replace encoded value
+      0.U(1.W)
+    }
+  }
+
+  def get_replace_way(state: UInt): UInt = get_replace_way(state, n_ways)
+
+  def way = get_replace_way(state_reg)
+  def miss = access(way)
+  def hit = {}
+}
+
+class SeqPLRU(n_sets: Int, n_ways: Int) extends SeqReplacementPolicy {
+  val logic = new PseudoLRU(n_ways)
+  val state = SyncReadMem(n_sets, UInt(logic.nBits.W))
+  val current_state = Wire(UInt(logic.nBits.W))
+  val next_state    = Wire(UInt(logic.nBits.W))
+  val plru_way = logic.get_replace_way(current_state)
+
+  def access(set: UInt) = {
+    current_state := state.read(set)
+  }
+
+  def update(valid: Bool, hit: Bool, set: UInt, way: UInt) = {
+    val update_way = Mux(hit, way, plru_way)
+    next_state := logic.get_next_state(current_state, update_way)
+    when (valid) { state.write(set, next_state) }
+  }
+
+  def way = plru_way
+}
+
+
+class SetAssocLRU(n_sets: Int, n_ways: Int, policy: String) extends SetAssocReplacementPolicy {
+  val logic = policy.toLowerCase match {
+    case "plru"  => new PseudoLRU(n_ways)
+    case "lru"   => new TrueLRU(n_ways)
+    case t => throw new IllegalArgumentException(s"unknown Replacement Policy type $t")
+  }
+  val state_vec =
+    if (logic.nBits == 0) Reg(Vec(n_sets, UInt(logic.nBits.W))) // Work around elaboration error on following line
+    else RegInit(VecInit(Seq.fill(n_sets)(0.U(logic.nBits.W))))
+
+  def access(set: UInt, touch_way: UInt) = {
+    state_vec(set) := logic.get_next_state(state_vec(set), touch_way)
+  }
+
+  def access(sets: Seq[UInt], touch_ways: Seq[Valid[UInt]]) = {
+    require(sets.size == touch_ways.size, "internal consistency check: should be same number of simultaneous updates for sets and touch_ways")
+    for (set <- 0 until n_sets) {
+      val set_touch_ways = (sets zip touch_ways).map { case (touch_set, touch_way) =>
+        Pipe(touch_way.valid && (touch_set === set.U), touch_way.bits, 0)}
+      when (set_touch_ways.map(_.valid).orR) {
+        state_vec(set) := logic.get_next_state(state_vec(set), set_touch_ways)
+      }
+    }
+  }
+
+  def way(set: UInt) = logic.get_replace_way(state_vec(set))
+
+}
+
+// Synthesizable unit tests
+import freechips.rocketchip.unittest._
+
+class PLRUTest(n_ways: Int, timeout: Int = 500) extends UnitTest(timeout) {
+  val plru = new PseudoLRU(n_ways)
+
+  // step
+  io.finished := RegNext(true.B, false.B)
+
+  val get_replace_ways = (0 until (1 << (n_ways-1))).map(state =>
+    plru.get_replace_way(state = state.U((n_ways-1).W)))
+  val get_next_states  = (0 until (1 << (n_ways-1))).map(state => (0 until n_ways).map(way =>
+    plru.get_next_state (state = state.U((n_ways-1).W), touch_way = way.U(log2Ceil(n_ways).W))))
+
+  n_ways match {
+    case 2 => {
+      assert(get_replace_ways(0) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=0: expected=0 actual=%d", get_replace_ways(0))
+      assert(get_replace_ways(1) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=1: expected=1 actual=%d", get_replace_ways(1))
+      assert(get_next_states(0)(0) === 1.U(plru.nBits.W), s"get_next_state state=0 way=0: expected=1 actual=%d", get_next_states(0)(0))
+      assert(get_next_states(0)(1) === 0.U(plru.nBits.W), s"get_next_state state=0 way=1: expected=0 actual=%d", get_next_states(0)(1))
+      assert(get_next_states(1)(0) === 1.U(plru.nBits.W), s"get_next_state state=1 way=0: expected=1 actual=%d", get_next_states(1)(0))
+      assert(get_next_states(1)(1) === 0.U(plru.nBits.W), s"get_next_state state=1 way=1: expected=0 actual=%d", get_next_states(1)(1))
+    }
+    case 3 => {
+      assert(get_replace_ways(0) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=0: expected=0 actual=%d", get_replace_ways(0))
+      assert(get_replace_ways(1) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=1: expected=1 actual=%d", get_replace_ways(1))
+      assert(get_replace_ways(2) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=2: expected=2 actual=%d", get_replace_ways(2))
+      assert(get_replace_ways(3) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=3: expected=2 actual=%d", get_replace_ways(3))
+      assert(get_next_states(0)(0) === 3.U(plru.nBits.W), s"get_next_state state=0 way=0: expected=3 actual=%d", get_next_states(0)(0))
+      assert(get_next_states(0)(1) === 2.U(plru.nBits.W), s"get_next_state state=0 way=1: expected=2 actual=%d", get_next_states(0)(1))
+      assert(get_next_states(0)(2) === 0.U(plru.nBits.W), s"get_next_state state=0 way=2: expected=0 actual=%d", get_next_states(0)(2))
+      assert(get_next_states(1)(0) === 3.U(plru.nBits.W), s"get_next_state state=1 way=0: expected=3 actual=%d", get_next_states(1)(0))
+      assert(get_next_states(1)(1) === 2.U(plru.nBits.W), s"get_next_state state=1 way=1: expected=2 actual=%d", get_next_states(1)(1))
+      assert(get_next_states(1)(2) === 1.U(plru.nBits.W), s"get_next_state state=1 way=2: expected=1 actual=%d", get_next_states(1)(2))
+      assert(get_next_states(2)(0) === 3.U(plru.nBits.W), s"get_next_state state=2 way=0: expected=3 actual=%d", get_next_states(2)(0))
+      assert(get_next_states(2)(1) === 2.U(plru.nBits.W), s"get_next_state state=2 way=1: expected=2 actual=%d", get_next_states(2)(1))
+      assert(get_next_states(2)(2) === 0.U(plru.nBits.W), s"get_next_state state=2 way=2: expected=0 actual=%d", get_next_states(2)(2))
+      assert(get_next_states(3)(0) === 3.U(plru.nBits.W), s"get_next_state state=3 way=0: expected=3 actual=%d", get_next_states(3)(0))
+      assert(get_next_states(3)(1) === 2.U(plru.nBits.W), s"get_next_state state=3 way=1: expected=2 actual=%d", get_next_states(3)(1))
+      assert(get_next_states(3)(2) === 1.U(plru.nBits.W), s"get_next_state state=3 way=2: expected=1 actual=%d", get_next_states(3)(2))
+    }
+    case 4 => {
+      assert(get_replace_ways(0) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=0: expected=0 actual=%d", get_replace_ways(0))
+      assert(get_replace_ways(1) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=1: expected=1 actual=%d", get_replace_ways(1))
+      assert(get_replace_ways(2) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=2: expected=0 actual=%d", get_replace_ways(2))
+      assert(get_replace_ways(3) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=3: expected=1 actual=%d", get_replace_ways(3))
+      assert(get_replace_ways(4) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=4: expected=2 actual=%d", get_replace_ways(4))
+      assert(get_replace_ways(5) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=5: expected=2 actual=%d", get_replace_ways(5))
+      assert(get_replace_ways(6) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=6: expected=3 actual=%d", get_replace_ways(6))
+      assert(get_replace_ways(7) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=7: expected=3 actual=%d", get_replace_ways(7))
+      assert(get_next_states(0)(0) === 5.U(plru.nBits.W), s"get_next_state state=0 way=0: expected=5 actual=%d", get_next_states(0)(0))
+      assert(get_next_states(0)(1) === 4.U(plru.nBits.W), s"get_next_state state=0 way=1: expected=4 actual=%d", get_next_states(0)(1))
+      assert(get_next_states(0)(2) === 2.U(plru.nBits.W), s"get_next_state state=0 way=2: expected=2 actual=%d", get_next_states(0)(2))
+      assert(get_next_states(0)(3) === 0.U(plru.nBits.W), s"get_next_state state=0 way=3: expected=0 actual=%d", get_next_states(0)(3))
+      assert(get_next_states(1)(0) === 5.U(plru.nBits.W), s"get_next_state state=1 way=0: expected=5 actual=%d", get_next_states(1)(0))
+      assert(get_next_states(1)(1) === 4.U(plru.nBits.W), s"get_next_state state=1 way=1: expected=4 actual=%d", get_next_states(1)(1))
+      assert(get_next_states(1)(2) === 3.U(plru.nBits.W), s"get_next_state state=1 way=2: expected=3 actual=%d", get_next_states(1)(2))
+      assert(get_next_states(1)(3) === 1.U(plru.nBits.W), s"get_next_state state=1 way=3: expected=1 actual=%d", get_next_states(1)(3))
+      assert(get_next_states(2)(0) === 7.U(plru.nBits.W), s"get_next_state state=2 way=0: expected=7 actual=%d", get_next_states(2)(0))
+      assert(get_next_states(2)(1) === 6.U(plru.nBits.W), s"get_next_state state=2 way=1: expected=6 actual=%d", get_next_states(2)(1))
+      assert(get_next_states(2)(2) === 2.U(plru.nBits.W), s"get_next_state state=2 way=2: expected=2 actual=%d", get_next_states(2)(2))
+      assert(get_next_states(2)(3) === 0.U(plru.nBits.W), s"get_next_state state=2 way=3: expected=0 actual=%d", get_next_states(2)(3))
+      assert(get_next_states(3)(0) === 7.U(plru.nBits.W), s"get_next_state state=3 way=0: expected=7 actual=%d", get_next_states(3)(0))
+      assert(get_next_states(3)(1) === 6.U(plru.nBits.W), s"get_next_state state=3 way=1: expected=6 actual=%d", get_next_states(3)(1))
+      assert(get_next_states(3)(2) === 3.U(plru.nBits.W), s"get_next_state state=3 way=2: expected=3 actual=%d", get_next_states(3)(2))
+      assert(get_next_states(3)(3) === 1.U(plru.nBits.W), s"get_next_state state=3 way=3: expected=1 actual=%d", get_next_states(3)(3))
+      assert(get_next_states(4)(0) === 5.U(plru.nBits.W), s"get_next_state state=4 way=0: expected=5 actual=%d", get_next_states(4)(0))
+      assert(get_next_states(4)(1) === 4.U(plru.nBits.W), s"get_next_state state=4 way=1: expected=4 actual=%d", get_next_states(4)(1))
+      assert(get_next_states(4)(2) === 2.U(plru.nBits.W), s"get_next_state state=4 way=2: expected=2 actual=%d", get_next_states(4)(2))
+      assert(get_next_states(4)(3) === 0.U(plru.nBits.W), s"get_next_state state=4 way=3: expected=0 actual=%d", get_next_states(4)(3))
+      assert(get_next_states(5)(0) === 5.U(plru.nBits.W), s"get_next_state state=5 way=0: expected=5 actual=%d", get_next_states(5)(0))
+      assert(get_next_states(5)(1) === 4.U(plru.nBits.W), s"get_next_state state=5 way=1: expected=4 actual=%d", get_next_states(5)(1))
+      assert(get_next_states(5)(2) === 3.U(plru.nBits.W), s"get_next_state state=5 way=2: expected=3 actual=%d", get_next_states(5)(2))
+      assert(get_next_states(5)(3) === 1.U(plru.nBits.W), s"get_next_state state=5 way=3: expected=1 actual=%d", get_next_states(5)(3))
+      assert(get_next_states(6)(0) === 7.U(plru.nBits.W), s"get_next_state state=6 way=0: expected=7 actual=%d", get_next_states(6)(0))
+      assert(get_next_states(6)(1) === 6.U(plru.nBits.W), s"get_next_state state=6 way=1: expected=6 actual=%d", get_next_states(6)(1))
+      assert(get_next_states(6)(2) === 2.U(plru.nBits.W), s"get_next_state state=6 way=2: expected=2 actual=%d", get_next_states(6)(2))
+      assert(get_next_states(6)(3) === 0.U(plru.nBits.W), s"get_next_state state=6 way=3: expected=0 actual=%d", get_next_states(6)(3))
+      assert(get_next_states(7)(0) === 7.U(plru.nBits.W), s"get_next_state state=7 way=0: expected=7 actual=%d", get_next_states(7)(0))
+      assert(get_next_states(7)(1) === 6.U(plru.nBits.W), s"get_next_state state=7 way=5: expected=6 actual=%d", get_next_states(7)(1))
+      assert(get_next_states(7)(2) === 3.U(plru.nBits.W), s"get_next_state state=7 way=2: expected=3 actual=%d", get_next_states(7)(2))
+      assert(get_next_states(7)(3) === 1.U(plru.nBits.W), s"get_next_state state=7 way=3: expected=1 actual=%d", get_next_states(7)(3))
+    }
+    case 5 => {
+      assert(get_replace_ways( 0) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=00: expected=0 actual=%d", get_replace_ways( 0))
+      assert(get_replace_ways( 1) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=01: expected=1 actual=%d", get_replace_ways( 1))
+      assert(get_replace_ways( 2) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=02: expected=0 actual=%d", get_replace_ways( 2))
+      assert(get_replace_ways( 3) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=03: expected=1 actual=%d", get_replace_ways( 3))
+      assert(get_replace_ways( 4) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=04: expected=2 actual=%d", get_replace_ways( 4))
+      assert(get_replace_ways( 5) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=05: expected=2 actual=%d", get_replace_ways( 5))
+      assert(get_replace_ways( 6) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=06: expected=3 actual=%d", get_replace_ways( 6))
+      assert(get_replace_ways( 7) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=07: expected=3 actual=%d", get_replace_ways( 7))
+      assert(get_replace_ways( 8) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=08: expected=4 actual=%d", get_replace_ways( 8))
+      assert(get_replace_ways( 9) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=09: expected=4 actual=%d", get_replace_ways( 9))
+      assert(get_replace_ways(10) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=10: expected=4 actual=%d", get_replace_ways(10))
+      assert(get_replace_ways(11) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=11: expected=4 actual=%d", get_replace_ways(11))
+      assert(get_replace_ways(12) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=12: expected=4 actual=%d", get_replace_ways(12))
+      assert(get_replace_ways(13) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=13: expected=4 actual=%d", get_replace_ways(13))
+      assert(get_replace_ways(14) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=14: expected=4 actual=%d", get_replace_ways(14))
+      assert(get_replace_ways(15) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=15: expected=4 actual=%d", get_replace_ways(15))
+      assert(get_next_states( 0)(0) === 13.U(plru.nBits.W), s"get_next_state state=00 way=0: expected=13 actual=%d", get_next_states( 0)(0))
+      assert(get_next_states( 0)(1) === 12.U(plru.nBits.W), s"get_next_state state=00 way=1: expected=12 actual=%d", get_next_states( 0)(1))
+      assert(get_next_states( 0)(2) === 10.U(plru.nBits.W), s"get_next_state state=00 way=2: expected=10 actual=%d", get_next_states( 0)(2))
+      assert(get_next_states( 0)(3) ===  8.U(plru.nBits.W), s"get_next_state state=00 way=3: expected=08 actual=%d", get_next_states( 0)(3))
+      assert(get_next_states( 0)(4) ===  0.U(plru.nBits.W), s"get_next_state state=00 way=4: expected=00 actual=%d", get_next_states( 0)(4))
+      assert(get_next_states( 1)(0) === 13.U(plru.nBits.W), s"get_next_state state=01 way=0: expected=13 actual=%d", get_next_states( 1)(0))
+      assert(get_next_states( 1)(1) === 12.U(plru.nBits.W), s"get_next_state state=01 way=1: expected=12 actual=%d", get_next_states( 1)(1))
+      assert(get_next_states( 1)(2) === 11.U(plru.nBits.W), s"get_next_state state=01 way=2: expected=11 actual=%d", get_next_states( 1)(2))
+      assert(get_next_states( 1)(3) ===  9.U(plru.nBits.W), s"get_next_state state=01 way=3: expected=09 actual=%d", get_next_states( 1)(3))
+      assert(get_next_states( 1)(4) ===  1.U(plru.nBits.W), s"get_next_state state=01 way=4: expected=01 actual=%d", get_next_states( 1)(4))
+      assert(get_next_states( 2)(0) === 15.U(plru.nBits.W), s"get_next_state state=02 way=0: expected=15 actual=%d", get_next_states( 2)(0))
+      assert(get_next_states( 2)(1) === 14.U(plru.nBits.W), s"get_next_state state=02 way=1: expected=14 actual=%d", get_next_states( 2)(1))
+      assert(get_next_states( 2)(2) === 10.U(plru.nBits.W), s"get_next_state state=02 way=2: expected=10 actual=%d", get_next_states( 2)(2))
+      assert(get_next_states( 2)(3) ===  8.U(plru.nBits.W), s"get_next_state state=02 way=3: expected=08 actual=%d", get_next_states( 2)(3))
+      assert(get_next_states( 2)(4) ===  2.U(plru.nBits.W), s"get_next_state state=02 way=4: expected=02 actual=%d", get_next_states( 2)(4))
+      assert(get_next_states( 3)(0) === 15.U(plru.nBits.W), s"get_next_state state=03 way=0: expected=15 actual=%d", get_next_states( 3)(0))
+      assert(get_next_states( 3)(1) === 14.U(plru.nBits.W), s"get_next_state state=03 way=1: expected=14 actual=%d", get_next_states( 3)(1))
+      assert(get_next_states( 3)(2) === 11.U(plru.nBits.W), s"get_next_state state=03 way=2: expected=11 actual=%d", get_next_states( 3)(2))
+      assert(get_next_states( 3)(3) ===  9.U(plru.nBits.W), s"get_next_state state=03 way=3: expected=09 actual=%d", get_next_states( 3)(3))
+      assert(get_next_states( 3)(4) ===  3.U(plru.nBits.W), s"get_next_state state=03 way=4: expected=03 actual=%d", get_next_states( 3)(4))
+      assert(get_next_states( 4)(0) === 13.U(plru.nBits.W), s"get_next_state state=04 way=0: expected=13 actual=%d", get_next_states( 4)(0))
+      assert(get_next_states( 4)(1) === 12.U(plru.nBits.W), s"get_next_state state=04 way=1: expected=12 actual=%d", get_next_states( 4)(1))
+      assert(get_next_states( 4)(2) === 10.U(plru.nBits.W), s"get_next_state state=04 way=2: expected=10 actual=%d", get_next_states( 4)(2))
+      assert(get_next_states( 4)(3) ===  8.U(plru.nBits.W), s"get_next_state state=04 way=3: expected=08 actual=%d", get_next_states( 4)(3))
+      assert(get_next_states( 4)(4) ===  4.U(plru.nBits.W), s"get_next_state state=04 way=4: expected=04 actual=%d", get_next_states( 4)(4))
+      assert(get_next_states( 5)(0) === 13.U(plru.nBits.W), s"get_next_state state=05 way=0: expected=13 actual=%d", get_next_states( 5)(0))
+      assert(get_next_states( 5)(1) === 12.U(plru.nBits.W), s"get_next_state state=05 way=1: expected=12 actual=%d", get_next_states( 5)(1))
+      assert(get_next_states( 5)(2) === 11.U(plru.nBits.W), s"get_next_state state=05 way=2: expected=11 actual=%d", get_next_states( 5)(2))
+      assert(get_next_states( 5)(3) ===  9.U(plru.nBits.W), s"get_next_state state=05 way=3: expected=09 actual=%d", get_next_states( 5)(3))
+      assert(get_next_states( 5)(4) ===  5.U(plru.nBits.W), s"get_next_state state=05 way=4: expected=05 actual=%d", get_next_states( 5)(4))
+      assert(get_next_states( 6)(0) === 15.U(plru.nBits.W), s"get_next_state state=06 way=0: expected=15 actual=%d", get_next_states( 6)(0))
+      assert(get_next_states( 6)(1) === 14.U(plru.nBits.W), s"get_next_state state=06 way=1: expected=14 actual=%d", get_next_states( 6)(1))
+      assert(get_next_states( 6)(2) === 10.U(plru.nBits.W), s"get_next_state state=06 way=2: expected=10 actual=%d", get_next_states( 6)(2))
+      assert(get_next_states( 6)(3) ===  8.U(plru.nBits.W), s"get_next_state state=06 way=3: expected=08 actual=%d", get_next_states( 6)(3))
+      assert(get_next_states( 6)(4) ===  6.U(plru.nBits.W), s"get_next_state state=06 way=4: expected=06 actual=%d", get_next_states( 6)(4))
+      assert(get_next_states( 7)(0) === 15.U(plru.nBits.W), s"get_next_state state=07 way=0: expected=15 actual=%d", get_next_states( 7)(0))
+      assert(get_next_states( 7)(1) === 14.U(plru.nBits.W), s"get_next_state state=07 way=5: expected=14 actual=%d", get_next_states( 7)(1))
+      assert(get_next_states( 7)(2) === 11.U(plru.nBits.W), s"get_next_state state=07 way=2: expected=11 actual=%d", get_next_states( 7)(2))
+      assert(get_next_states( 7)(3) ===  9.U(plru.nBits.W), s"get_next_state state=07 way=3: expected=09 actual=%d", get_next_states( 7)(3))
+      assert(get_next_states( 7)(4) ===  7.U(plru.nBits.W), s"get_next_state state=07 way=4: expected=07 actual=%d", get_next_states( 7)(4))
+      assert(get_next_states( 8)(0) === 13.U(plru.nBits.W), s"get_next_state state=08 way=0: expected=13 actual=%d", get_next_states( 8)(0))
+      assert(get_next_states( 8)(1) === 12.U(plru.nBits.W), s"get_next_state state=08 way=1: expected=12 actual=%d", get_next_states( 8)(1))
+      assert(get_next_states( 8)(2) === 10.U(plru.nBits.W), s"get_next_state state=08 way=2: expected=10 actual=%d", get_next_states( 8)(2))
+      assert(get_next_states( 8)(3) ===  8.U(plru.nBits.W), s"get_next_state state=08 way=3: expected=08 actual=%d", get_next_states( 8)(3))
+      assert(get_next_states( 8)(4) ===  0.U(plru.nBits.W), s"get_next_state state=08 way=4: expected=00 actual=%d", get_next_states( 8)(4))
+      assert(get_next_states( 9)(0) === 13.U(plru.nBits.W), s"get_next_state state=09 way=0: expected=13 actual=%d", get_next_states( 9)(0))
+      assert(get_next_states( 9)(1) === 12.U(plru.nBits.W), s"get_next_state state=09 way=1: expected=12 actual=%d", get_next_states( 9)(1))
+      assert(get_next_states( 9)(2) === 11.U(plru.nBits.W), s"get_next_state state=09 way=2: expected=11 actual=%d", get_next_states( 9)(2))
+      assert(get_next_states( 9)(3) ===  9.U(plru.nBits.W), s"get_next_state state=09 way=3: expected=09 actual=%d", get_next_states( 9)(3))
+      assert(get_next_states( 9)(4) ===  1.U(plru.nBits.W), s"get_next_state state=09 way=4: expected=01 actual=%d", get_next_states( 9)(4))
+      assert(get_next_states(10)(0) === 15.U(plru.nBits.W), s"get_next_state state=10 way=0: expected=15 actual=%d", get_next_states(10)(0))
+      assert(get_next_states(10)(1) === 14.U(plru.nBits.W), s"get_next_state state=10 way=1: expected=14 actual=%d", get_next_states(10)(1))
+      assert(get_next_states(10)(2) === 10.U(plru.nBits.W), s"get_next_state state=10 way=2: expected=10 actual=%d", get_next_states(10)(2))
+      assert(get_next_states(10)(3) ===  8.U(plru.nBits.W), s"get_next_state state=10 way=3: expected=08 actual=%d", get_next_states(10)(3))
+      assert(get_next_states(10)(4) ===  2.U(plru.nBits.W), s"get_next_state state=10 way=4: expected=02 actual=%d", get_next_states(10)(4))
+      assert(get_next_states(11)(0) === 15.U(plru.nBits.W), s"get_next_state state=11 way=0: expected=15 actual=%d", get_next_states(11)(0))
+      assert(get_next_states(11)(1) === 14.U(plru.nBits.W), s"get_next_state state=11 way=1: expected=14 actual=%d", get_next_states(11)(1))
+      assert(get_next_states(11)(2) === 11.U(plru.nBits.W), s"get_next_state state=11 way=2: expected=11 actual=%d", get_next_states(11)(2))
+      assert(get_next_states(11)(3) ===  9.U(plru.nBits.W), s"get_next_state state=11 way=3: expected=09 actual=%d", get_next_states(11)(3))
+      assert(get_next_states(11)(4) ===  3.U(plru.nBits.W), s"get_next_state state=11 way=4: expected=03 actual=%d", get_next_states(11)(4))
+      assert(get_next_states(12)(0) === 13.U(plru.nBits.W), s"get_next_state state=12 way=0: expected=13 actual=%d", get_next_states(12)(0))
+      assert(get_next_states(12)(1) === 12.U(plru.nBits.W), s"get_next_state state=12 way=1: expected=12 actual=%d", get_next_states(12)(1))
+      assert(get_next_states(12)(2) === 10.U(plru.nBits.W), s"get_next_state state=12 way=2: expected=10 actual=%d", get_next_states(12)(2))
+      assert(get_next_states(12)(3) ===  8.U(plru.nBits.W), s"get_next_state state=12 way=3: expected=08 actual=%d", get_next_states(12)(3))
+      assert(get_next_states(12)(4) ===  4.U(plru.nBits.W), s"get_next_state state=12 way=4: expected=04 actual=%d", get_next_states(12)(4))
+      assert(get_next_states(13)(0) === 13.U(plru.nBits.W), s"get_next_state state=13 way=0: expected=13 actual=%d", get_next_states(13)(0))
+      assert(get_next_states(13)(1) === 12.U(plru.nBits.W), s"get_next_state state=13 way=1: expected=12 actual=%d", get_next_states(13)(1))
+      assert(get_next_states(13)(2) === 11.U(plru.nBits.W), s"get_next_state state=13 way=2: expected=11 actual=%d", get_next_states(13)(2))
+      assert(get_next_states(13)(3) ===  9.U(plru.nBits.W), s"get_next_state state=13 way=3: expected=09 actual=%d", get_next_states(13)(3))
+      assert(get_next_states(13)(4) ===  5.U(plru.nBits.W), s"get_next_state state=13 way=4: expected=05 actual=%d", get_next_states(13)(4))
+      assert(get_next_states(14)(0) === 15.U(plru.nBits.W), s"get_next_state state=14 way=0: expected=15 actual=%d", get_next_states(14)(0))
+      assert(get_next_states(14)(1) === 14.U(plru.nBits.W), s"get_next_state state=14 way=1: expected=14 actual=%d", get_next_states(14)(1))
+      assert(get_next_states(14)(2) === 10.U(plru.nBits.W), s"get_next_state state=14 way=2: expected=10 actual=%d", get_next_states(14)(2))
+      assert(get_next_states(14)(3) ===  8.U(plru.nBits.W), s"get_next_state state=14 way=3: expected=08 actual=%d", get_next_states(14)(3))
+      assert(get_next_states(14)(4) ===  6.U(plru.nBits.W), s"get_next_state state=14 way=4: expected=06 actual=%d", get_next_states(14)(4))
+      assert(get_next_states(15)(0) === 15.U(plru.nBits.W), s"get_next_state state=15 way=0: expected=15 actual=%d", get_next_states(15)(0))
+      assert(get_next_states(15)(1) === 14.U(plru.nBits.W), s"get_next_state state=15 way=5: expected=14 actual=%d", get_next_states(15)(1))
+      assert(get_next_states(15)(2) === 11.U(plru.nBits.W), s"get_next_state state=15 way=2: expected=11 actual=%d", get_next_states(15)(2))
+      assert(get_next_states(15)(3) ===  9.U(plru.nBits.W), s"get_next_state state=15 way=3: expected=09 actual=%d", get_next_states(15)(3))
+      assert(get_next_states(15)(4) ===  7.U(plru.nBits.W), s"get_next_state state=15 way=4: expected=07 actual=%d", get_next_states(15)(4))
+    }
+    case 6 => {
+      assert(get_replace_ways( 0) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=00: expected=0 actual=%d", get_replace_ways( 0))
+      assert(get_replace_ways( 1) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=01: expected=1 actual=%d", get_replace_ways( 1))
+      assert(get_replace_ways( 2) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=02: expected=0 actual=%d", get_replace_ways( 2))
+      assert(get_replace_ways( 3) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=03: expected=1 actual=%d", get_replace_ways( 3))
+      assert(get_replace_ways( 4) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=04: expected=2 actual=%d", get_replace_ways( 4))
+      assert(get_replace_ways( 5) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=05: expected=2 actual=%d", get_replace_ways( 5))
+      assert(get_replace_ways( 6) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=06: expected=3 actual=%d", get_replace_ways( 6))
+      assert(get_replace_ways( 7) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=07: expected=3 actual=%d", get_replace_ways( 7))
+      assert(get_replace_ways( 8) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=08: expected=0 actual=%d", get_replace_ways( 8))
+      assert(get_replace_ways( 9) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=09: expected=1 actual=%d", get_replace_ways( 9))
+      assert(get_replace_ways(10) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=10: expected=0 actual=%d", get_replace_ways(10))
+      assert(get_replace_ways(11) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=11: expected=1 actual=%d", get_replace_ways(11))
+      assert(get_replace_ways(12) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=12: expected=2 actual=%d", get_replace_ways(12))
+      assert(get_replace_ways(13) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=13: expected=2 actual=%d", get_replace_ways(13))
+      assert(get_replace_ways(14) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=14: expected=3 actual=%d", get_replace_ways(14))
+      assert(get_replace_ways(15) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=15: expected=3 actual=%d", get_replace_ways(15))
+      assert(get_replace_ways(16) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=16: expected=4 actual=%d", get_replace_ways(16))
+      assert(get_replace_ways(17) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=17: expected=4 actual=%d", get_replace_ways(17))
+      assert(get_replace_ways(18) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=18: expected=4 actual=%d", get_replace_ways(18))
+      assert(get_replace_ways(19) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=19: expected=4 actual=%d", get_replace_ways(19))
+      assert(get_replace_ways(20) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=20: expected=4 actual=%d", get_replace_ways(20))
+      assert(get_replace_ways(21) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=21: expected=4 actual=%d", get_replace_ways(21))
+      assert(get_replace_ways(22) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=22: expected=4 actual=%d", get_replace_ways(22))
+      assert(get_replace_ways(23) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=23: expected=4 actual=%d", get_replace_ways(23))
+      assert(get_replace_ways(24) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=24: expected=5 actual=%d", get_replace_ways(24))
+      assert(get_replace_ways(25) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=25: expected=5 actual=%d", get_replace_ways(25))
+      assert(get_replace_ways(26) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=26: expected=5 actual=%d", get_replace_ways(26))
+      assert(get_replace_ways(27) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=27: expected=5 actual=%d", get_replace_ways(27))
+      assert(get_replace_ways(28) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=28: expected=5 actual=%d", get_replace_ways(28))
+      assert(get_replace_ways(29) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=29: expected=5 actual=%d", get_replace_ways(29))
+      assert(get_replace_ways(30) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=30: expected=5 actual=%d", get_replace_ways(30))
+      assert(get_replace_ways(31) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=31: expected=5 actual=%d", get_replace_ways(31))
+    }
+    case _ => throw new IllegalArgumentException(s"no test pattern found for n_ways=$n_ways")
+  }
+}
\ No newline at end of file
diff --git a/rocket/src/util/package.scala b/rocket/src/util/package.scala
new file mode 100644
index 000000000..89c147071
--- /dev/null
+++ b/rocket/src/util/package.scala
@@ -0,0 +1,24 @@
+// See LICENSE.SiFive for license details.
+// See LICENSE.Berkeley for license details.
+
+package org.chipsalliance.rocket
+
+import chisel3._
+import chisel3.util._
+import scala.math.min
+import scala.collection.{immutable, mutable}
+
+package object util {
+  def OptimizationBarrier[T <: Data](in: T): T = {
+    val barrier = Module(new Module {
+      val io = IO(new Bundle {
+        val x = Input(chiselTypeOf(in))
+        val y = Output(chiselTypeOf(in))
+      })
+      io.y := io.x
+      override def desiredName = "OptimizationBarrier"
+    })
+    barrier.io.x := in
+    barrier.io.y
+  }
+}

From d53136bf5e0bcbcfa9e79147ba93b4bb0b482811 Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Mon, 15 May 2023 20:45:59 +0800
Subject: [PATCH 16/32] TLB: refactor `TLSlaveParameters` related things

---
 rocket/src/AddressDecoder.scala   | 136 +++++++++++++++++++
 rocket/src/TLB.scala              |  34 +++--
 rocket/src/TLBPermissions.scala   |  51 +++----
 rocket/src/util/Memory.scala      | 212 ++++++++++++++++++++++++++++++
 rocket/src/util/Replacement.scala |   1 +
 rocket/src/util/package.scala     |  10 ++
 6 files changed, 401 insertions(+), 43 deletions(-)
 create mode 100644 rocket/src/AddressDecoder.scala
 create mode 100644 rocket/src/util/Memory.scala

diff --git a/rocket/src/AddressDecoder.scala b/rocket/src/AddressDecoder.scala
new file mode 100644
index 000000000..fea3e515a
--- /dev/null
+++ b/rocket/src/AddressDecoder.scala
@@ -0,0 +1,136 @@
+// See LICENSE.SiFive for license details.
+
+package org.chipsalliance.rocket
+
+import Chisel.log2Ceil
+
+import org.chipsalliance.rocket.util._
+
+object AddressDecoder
+{
+  type Port = Seq[AddressSet]
+  type Ports = Seq[Port]
+  type Partition = Ports
+  type Partitions = Seq[Partition]
+
+  val addressOrder = Ordering.ordered[AddressSet]
+  val portOrder = Ordering.Iterable(addressOrder)
+  val partitionOrder = Ordering.Iterable(portOrder)
+
+  // Find the minimum subset of bits needed to disambiguate port addresses.
+  // ie: inspecting only the bits in the output, you can look at an address
+  //     and decide to which port (outer Seq) the address belongs.
+  def apply(ports: Ports, givenBits: BigInt = BigInt(0)): BigInt = {
+    val nonEmptyPorts = ports.filter(_.nonEmpty)
+    if (nonEmptyPorts.size <= 1) {
+      givenBits
+    } else {
+      // Verify the user did not give us an impossible problem
+      nonEmptyPorts.combinations(2).foreach { case Seq(x, y) =>
+        x.foreach { a => y.foreach { b =>
+          require (!a.overlaps(b), s"Ports cannot overlap: $a $b")
+        } }
+      }
+
+      val maxBits = log2Ceil(1 + nonEmptyPorts.map(_.map(_.base).max).max)
+      val (bitsToTry, bitsToTake) = (0 until maxBits).map(BigInt(1) << _).partition(b => (givenBits & b) == 0)
+      val partitions = Seq(nonEmptyPorts.map(_.sorted).sorted(portOrder))
+      val givenPartitions = bitsToTake.foldLeft(partitions) { (p, b) => partitionPartitions(p, b) }
+      val selected = recurse(givenPartitions, bitsToTry.reverse.toSeq)
+      val output = selected.reduceLeft(_ | _) | givenBits
+
+      // Modify the AddressSets to allow the new wider match functions
+      val widePorts = nonEmptyPorts.map { _.map { _.widen(~output) } }
+      // Verify that it remains possible to disambiguate all ports
+      widePorts.combinations(2).foreach { case Seq(x, y) =>
+        x.foreach { a => y.foreach { b =>
+          require (!a.overlaps(b), s"Ports cannot overlap: $a $b")
+        } }
+      }
+
+      output
+    }
+  }
+
+  // A simpler version that works for a Seq[Int]
+  def apply(keys: Seq[Int]): Int = {
+    val ports = keys.map(b => Seq(AddressSet(b, 0)))
+    apply(ports).toInt
+  }
+
+  // The algorithm has a set of partitions, discriminated by the selected bits.
+  // Each partion has a set of ports, listing all addresses that lead to that port.
+  // Seq[Seq[Seq[AddressSet]]]
+  //         ^^^^^^^^^^^^^^^ set of addresses that are routed out this port
+  //     ^^^ the list of ports
+  // ^^^ cases already distinguished by the selected bits thus far
+  //
+  // Solving this problem is NP-hard, so we use a simple greedy heuristic:
+  //   pick the bit which minimizes the number of ports in each partition
+  //   as a secondary goal, reduce the number of AddressSets within a partition
+
+  def bitScore(partitions: Partitions): Seq[Int] = {
+    val maxPortsPerPartition = partitions.map(_.size).max
+    val maxSetsPerPartition = partitions.map(_.map(_.size).sum).max
+    val sumSquarePortsPerPartition = partitions.map(p => p.size * p.size).sum
+    val sumSquareSetsPerPartition = partitions.map(_.map(p => p.size * p.size).sum).max
+    Seq(maxPortsPerPartition, maxSetsPerPartition, sumSquarePortsPerPartition, sumSquareSetsPerPartition)
+  }
+
+  def partitionPort(port: Port, bit: BigInt): (Port, Port) = {
+    val addr_a = AddressSet(0, ~bit)
+    val addr_b = AddressSet(bit, ~bit)
+    // The addresses were sorted, so the filtered addresses are still sorted
+    val subset_a = port.filter(_.overlaps(addr_a))
+    val subset_b = port.filter(_.overlaps(addr_b))
+    (subset_a, subset_b)
+  }
+
+  def partitionPorts(ports: Ports, bit: BigInt): (Ports, Ports) = {
+    val partitioned_ports = ports.map(p => partitionPort(p, bit))
+    // because partitionPort dropped AddresSets, the ports might no longer be sorted
+    val case_a_ports = partitioned_ports.map(_._1).filter(!_.isEmpty).sorted(portOrder)
+    val case_b_ports = partitioned_ports.map(_._2).filter(!_.isEmpty).sorted(portOrder)
+    (case_a_ports, case_b_ports)
+  }
+  
+  def partitionPartitions(partitions: Partitions, bit: BigInt): Partitions = {
+    val partitioned_partitions = partitions.map(p => partitionPorts(p, bit))
+    val case_a_partitions = partitioned_partitions.map(_._1).filter(!_.isEmpty)
+    val case_b_partitions = partitioned_partitions.map(_._2).filter(!_.isEmpty)
+    val new_partitions = (case_a_partitions ++ case_b_partitions).sorted(partitionOrder)
+    // Prevent combinational memory explosion; if two partitions are equal, keep only one
+    // Note: AddressSets in a port are sorted, and ports in a partition are sorted.
+    // This makes it easy to structurally compare two partitions for equality
+    val keep = (new_partitions.init zip new_partitions.tail) filter { case (a,b) => partitionOrder.compare(a,b) != 0 } map { _._2 }
+    new_partitions.head +: keep
+  }
+
+  // requirement: ports have sorted addresses and are sorted lexicographically
+  val debug = false
+  def recurse(partitions: Partitions, bits: Seq[BigInt]): Seq[BigInt] = {
+    if (partitions.map(_.size <= 1).reduce(_ && _)) Seq() else {
+      if (debug) {
+        println("Partitioning:")
+        partitions.foreach { partition =>
+          println("  Partition:")
+          partition.foreach { port =>
+            print("   ")
+            port.foreach { a => print(s" ${a}") }
+            println("")
+          }
+        }
+      }
+      val candidates = bits.map { bit =>
+        val result = partitionPartitions(partitions, bit)
+        val score = bitScore(result)
+        if (debug)
+          println("  For bit %x, %s".format(bit, score.toString))
+        (score, bit, result)
+      }
+      val (bestScore, bestBit, bestPartitions) = candidates.min(Ordering.by[(Seq[Int], BigInt, Partitions), Iterable[Int]](_._1.toIterable))
+      if (debug) println("=> Selected bit 0x%x".format(bestBit))
+      bestBit +: recurse(bestPartitions, bits.filter(_ != bestBit))
+    }
+  }
+}
diff --git a/rocket/src/TLB.scala b/rocket/src/TLB.scala
index 7a1a5d112..8db673b6b 100644
--- a/rocket/src/TLB.scala
+++ b/rocket/src/TLB.scala
@@ -307,7 +307,7 @@ class TLB(
   instruction: Boolean,
   lgMaxSize: Int,
   cfg: TLBConfig,
-  edge: TLEdgeOut, // TODO: Decoupled from Tilelink
+  memParameters: MemoryParameters,
   pmpGranularity: Int,
   pgLevels: Int,
   minPgLevels: Int,
@@ -322,6 +322,8 @@ class TLB(
   asIdBits: Int,
   xLen: Int,
   cacheBlockBytes: Int,
+  memoryCacheable: Boolean,
+  memoryHomogenous: Boolean,
   usingHypervisor: Boolean,
   usingVM: Boolean,
   usingAtomics: Boolean,
@@ -430,22 +432,16 @@ class TLB(
   pmp.io.prv := mpu_priv
   // PMA
   // check exist a slave can consume this address.
-  val legal_address = edge.manager.findSafe(mpu_physaddr).reduce(_||_)
-  // check utility to help check SoC property.
-  def fastCheck(member: TLManagerParameters => Boolean) = // TODO: Decoupled from Tilelink
+  val legal_address = Memory.findSafe(mpu_physaddr).reduce(_||_)
     legal_address && edge.manager.fastProperty(mpu_physaddr, member, (b:Boolean) => b.B)
-  // todo: using DataScratchpad doesn't support cacheable.
-  val cacheable = fastCheck(_.supportsAcquireB) && (instruction || !usingDataScratchpad).B
-  val homogeneous = TLBPageLookup(edge.manager.managers, xLen, cacheBlockBytes, BigInt(1) << pgIdxBits)(mpu_physaddr).homogeneous
-  // In M mode, if access DM address(debug module program buffer)
-  val deny_access_to_debug = mpu_priv <= PRV.M.U && p(DebugModuleKey).map(dmp => dmp.address.contains(mpu_physaddr)).getOrElse(false.B)
-  val prot_r = fastCheck(_.supportsGet) && !deny_access_to_debug && pmp.io.r
-  val prot_w = fastCheck(_.supportsPutFull) && !deny_access_to_debug && pmp.io.w
-  val prot_pp = fastCheck(_.supportsPutPartial)
-  val prot_al = fastCheck(_.supportsLogical)
-  val prot_aa = fastCheck(_.supportsArithmetic)
-  val prot_x = fastCheck(_.executable) && !deny_access_to_debug && pmp.io.x
-  val prot_eff = fastCheck(Seq(RegionType.PUT_EFFECTS, RegionType.GET_EFFECTS) contains _.regionType)
+
+  val prot_r = memParameters.readable.B && pmp.io.r
+  val prot_w = memParameters.writeable.B && pmp.io.w
+  val prot_pp = !(memParameters.supportsPutPartial.none).B
+  val prot_al = !(memParameters.supportsLogical.none).B
+  val prot_aa = !(memParameters.supportsArithmetic.none).B
+  val prot_x = memParameters.executable.B && pmp.io.x
+  val prot_eff = (memParameters.hasPutEffects || memParameters.hasGetEffects).B
 
   // hit check
   val sector_hits = sectored_entries(memIdx).map(_.sectorHit(vpn, priv_v))
@@ -461,7 +457,7 @@ class TLB(
     val refill_v = r_vstage1_en || r_stage2_en
     val newEntry = Wire(new TLBEntryData(ppnBits))
     newEntry.ppn := pte.ppn
-    newEntry.c := cacheable
+    newEntry.c := memoryCacheable.B
     newEntry.u := pte.u
     newEntry.g := pte.g && pte.v
     newEntry.ae_ptw := io.ptw.resp.bits.ae_ptw
@@ -546,7 +542,7 @@ class TLB(
   // put effect
   val eff_array = Cat(Fill(nPhysicalEntries, prot_eff), normal_entries.map(_.eff).asUInt)
   // cacheable
-  val c_array = Cat(Fill(nPhysicalEntries, cacheable), normal_entries.map(_.c).asUInt)
+  val c_array = Cat(Fill(nPhysicalEntries, memoryCacheable.B), normal_entries.map(_.c).asUInt)
   // put partial
   val ppp_array = Cat(Fill(nPhysicalEntries, prot_pp), normal_entries.map(_.ppp).asUInt)
   // atomic arithmetic
@@ -556,7 +552,7 @@ class TLB(
   val ppp_array_if_cached = ppp_array | c_array
   val paa_array_if_cached = paa_array | (if(usingAtomicsInCache) c_array else 0.U)
   val pal_array_if_cached = pal_array | (if(usingAtomicsInCache) c_array else 0.U)
-  val prefetchable_array = Cat((cacheable && homogeneous) << (nPhysicalEntries-1), normal_entries.map(_.c).asUInt)
+  val prefetchable_array = Cat((memoryCacheable && memoryHomogenous).B << (nPhysicalEntries-1), normal_entries.map(_.c).asUInt)
 
   // vaddr misaligned: vaddr[1:0]=b00
   val misaligned = (io.req.bits.vaddr & (UIntToOH(io.req.bits.size) - 1.U)).orR
diff --git a/rocket/src/TLBPermissions.scala b/rocket/src/TLBPermissions.scala
index 933793b81..481197b49 100644
--- a/rocket/src/TLBPermissions.scala
+++ b/rocket/src/TLBPermissions.scala
@@ -3,7 +3,9 @@
 package org.chipsalliance.rocket
 
 import chisel3._
-import chisel3.util.isPow2
+import chisel3.util._
+
+import org.chipsalliance.rocket.util._
 
 case class TLBPermissions(
   homogeneous: Bool, // if false, the below are undefined
@@ -27,16 +29,16 @@ object TLBPageLookup
     val useful = r || w || x || c || a || l
   }
 
-  private def groupRegions(managers: Seq[TLManagerParameters]): Map[TLBFixedPermissions, Seq[AddressSet]] = { // TODO: Decoupled from Tilelink
-    val permissions = managers.map { m =>
-      (m.address, TLBFixedPermissions(
-        e = Seq(RegionType.PUT_EFFECTS, RegionType.GET_EFFECTS) contains m.regionType,
-        r = m.supportsGet     || m.supportsAcquireB, // if cached, never uses Get
-        w = m.supportsPutFull || m.supportsAcquireT, // if cached, never uses Put
-        x = m.executable,
-        c = m.supportsAcquireB,
-        a = m.supportsArithmetic,
-        l = m.supportsLogical))
+  private def groupRegions(memParameters: Seq[MemoryParameters]): Map[TLBFixedPermissions, Seq[AddressSet]] = { // TODO: Decoupled from Tilelink
+    val permissions = memParameters.map { p =>
+      (p.address, TLBFixedPermissions(
+        e = p.hasPutEffects   || p.hasGetEffects,
+        r = p.supportsGet     || p.supportsAcquireB, // if cached, never uses Get
+        w = p.supportsPutFull || p.supportsAcquireT, // if cached, never uses Put
+        x = p.executable,
+        c = p.supportsAcquireB,
+        a = p.supportsArithmetic,
+        l = p.supportsLogical))
     }
 
     permissions
@@ -47,8 +49,9 @@ object TLBPageLookup
       .toMap
   }
 
+  // TODO
   // Unmapped memory is considered to be inhomogeneous
-  def apply(managers: Seq[TLManagerParameters], xLen: Int, cacheBlockBytes: Int, pageSize: BigInt): UInt => TLBPermissions = {
+  def apply(memParameters: Seq[MemoryParameters], xLen: Int, cacheBlockBytes: Int, pageSize: BigInt): UInt => TLBPermissions = {
     require (isPow2(xLen) && xLen >= 8)
     require (isPow2(cacheBlockBytes) && cacheBlockBytes >= xLen/8)
     require (isPow2(pageSize) && pageSize >= cacheBlockBytes)
@@ -57,18 +60,18 @@ object TLBPageLookup
     val allSizes = TransferSizes(1, cacheBlockBytes)
     val amoSizes = TransferSizes(4, xLen/8)
 
-    val permissions = managers.foreach { m =>
-      require (!m.supportsGet        || m.supportsGet       .contains(allSizes),  s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsGet} Get, but must support ${allSizes}")
-      require (!m.supportsPutFull    || m.supportsPutFull   .contains(allSizes),  s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsPutFull} PutFull, but must support ${allSizes}")
-      require (!m.supportsPutPartial || m.supportsPutPartial.contains(allSizes),  s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsPutPartial} PutPartial, but must support ${allSizes}")
-      require (!m.supportsAcquireB   || m.supportsAcquireB  .contains(xferSizes), s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsAcquireB} AcquireB, but must support ${xferSizes}")
-      require (!m.supportsAcquireT   || m.supportsAcquireT  .contains(xferSizes), s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsAcquireT} AcquireT, but must support ${xferSizes}")
-      require (!m.supportsLogical    || m.supportsLogical   .contains(amoSizes),  s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsLogical} Logical, but must support ${amoSizes}")
-      require (!m.supportsArithmetic || m.supportsArithmetic.contains(amoSizes),  s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsArithmetic} Arithmetic, but must support ${amoSizes}")
-      require (!(m.supportsAcquireB && m.supportsPutFull && !m.supportsAcquireT), s"Memory region '${m.name}' supports AcquireB (cached read) and PutFull (un-cached write) but not AcquireT (cached write)")
+    val permissions = memParameters.foreach { p =>
+      require (!p.supportsGet        || p.supportsGet       .contains(allSizes),  s"Memory region '${p.name}' at ${p.address} only supports ${p.supportsGet} Get, but must support ${allSizes}")
+      require (!p.supportsPutFull    || p.supportsPutFull   .contains(allSizes),  s"Memory region '${p.name}' at ${p.address} only supports ${p.supportsPutFull} PutFull, but must support ${allSizes}")
+      require (!p.supportsPutPartial || p.supportsPutPartial.contains(allSizes),  s"Memory region '${p.name}' at ${p.address} only supports ${p.supportsPutPartial} PutPartial, but must support ${allSizes}")
+      require (!p.supportsAcquireB   || p.supportsAcquireB  .contains(xferSizes), s"Memory region '${p.name}' at ${p.address} only supports ${p.supportsAcquireB} AcquireB, but must support ${xferSizes}")
+      require (!p.supportsAcquireT   || p.supportsAcquireT  .contains(xferSizes), s"Memory region '${p.name}' at ${p.address} only supports ${p.supportsAcquireT} AcquireT, but must support ${xferSizes}")
+      require (!p.supportsLogical    || p.supportsLogical   .contains(amoSizes),  s"Memory region '${p.name}' at ${p.address} only supports ${p.supportsLogical} Logical, but must support ${amoSizes}")
+      require (!p.supportsArithmetic || p.supportsArithmetic.contains(amoSizes),  s"Memory region '${p.name}' at ${p.address} only supports ${p.supportsArithmetic} Arithmetic, but must support ${amoSizes}")
+      require (!(p.supportsAcquireB && p.supportsPutFull && !p.supportsAcquireT), s"Memory region '${p.name}' supports AcquireB (cached read) and PutFull (un-cached write) but not AcquireT (cached write)")
     }
 
-    val grouped = groupRegions(managers)
+    val grouped = groupRegions(memParameters)
       .mapValues(_.filter(_.alignment >= pageSize)) // discard any region that's not big enough
 
     def lowCostProperty(prop: TLBFixedPermissions => Boolean): UInt => Bool = {
@@ -105,7 +108,7 @@ object TLBPageLookup
   }
 
   // Are all pageSize intervals of mapped regions homogeneous?
-  def homogeneous(managers: Seq[TLManagerParameters], pageSize: BigInt): Boolean = {
-    groupRegions(managers).values.forall(_.forall(_.alignment >= pageSize))
+  def homogeneous(memParameters: Seq[MemoryParameters], pageSize: BigInt): Boolean = {
+    groupRegions(memParameters).values.forall(_.forall(_.alignment >= pageSize))
   }
 }
diff --git a/rocket/src/util/Memory.scala b/rocket/src/util/Memory.scala
new file mode 100644
index 000000000..bbaa2332c
--- /dev/null
+++ b/rocket/src/util/Memory.scala
@@ -0,0 +1,212 @@
+// See LICENSE.SiFive for license details.
+
+package org.chipsalliance.rocket.util
+
+import chisel3._
+import chisel3.util._
+
+object Memory {
+  // The safe version will check the entire address
+  def findSafe(address: UInt, slaveAddressSets: Seq[AddressSet]) = VecInit(slaveAddressSets.map(_.contains(address))).asUInt.orR
+}
+
+// An potentially empty inclusive range of 2-powers [min, max] (in bytes)
+case class TransferSizes(min: Int, max: Int)
+{
+  def this(x: Int) = this(x, x)
+
+  require (min <= max, s"Min transfer $min > max transfer $max")
+  require (min >= 0 && max >= 0, s"TransferSizes must be positive, got: ($min, $max)")
+  require (max == 0 || isPow2(max), s"TransferSizes must be a power of 2, got: $max")
+  require (min == 0 || isPow2(min), s"TransferSizes must be a power of 2, got: $min")
+  require (max == 0 || min != 0, s"TransferSize 0 is forbidden unless (0,0), got: ($min, $max)")
+
+  def none = min == 0
+  def contains(x: Int) = isPow2(x) && min <= x && x <= max
+  def containsLg(x: Int) = contains(1 << x)
+  def containsLg(x: UInt) =
+    if (none) Bool(false)
+    else if (min == max) { UInt(log2Ceil(min)) === x }
+    else { UInt(log2Ceil(min)) <= x && x <= UInt(log2Ceil(max)) }
+
+  def contains(x: TransferSizes) = x.none || (min <= x.min && x.max <= max)
+
+  def intersect(x: TransferSizes) =
+    if (x.max < min || max < x.min) TransferSizes.none
+    else TransferSizes(scala.math.max(min, x.min), scala.math.min(max, x.max))
+
+  // Not a union, because the result may contain sizes contained by neither term
+  // NOT TO BE CONFUSED WITH COVERPOINTS
+  def mincover(x: TransferSizes) = {
+    if (none) {
+      x
+    } else if (x.none) {
+      this
+    } else {
+      TransferSizes(scala.math.min(min, x.min), scala.math.max(max, x.max))
+    }
+  }
+
+  override def toString() = "TransferSizes[%d, %d]".format(min, max)
+}
+
+object TransferSizes {
+  def apply(x: Int) = new TransferSizes(x)
+  val none = new TransferSizes(0)
+
+  def mincover(seq: Seq[TransferSizes]) = seq.foldLeft(none)(_ mincover _)
+  def intersect(seq: Seq[TransferSizes]) = seq.reduce(_ intersect _)
+
+  implicit def asBool(x: TransferSizes) = !x.none
+}
+
+// AddressSets specify the address space managed by the manager
+// Base is the base address, and mask are the bits consumed by the manager
+// e.g: base=0x200, mask=0xff describes a device managing 0x200-0x2ff
+// e.g: base=0x1000, mask=0xf0f decribes a device managing 0x1000-0x100f, 0x1100-0x110f, ...
+case class AddressSet(base: BigInt, mask: BigInt) extends Ordered[AddressSet]
+{
+  // Forbid misaligned base address (and empty sets)
+  require ((base & mask) == 0, s"Mis-aligned AddressSets are forbidden, got: ${this.toString}")
+  require (base >= 0, s"AddressSet negative base is ambiguous: $base") // TL2 address widths are not fixed => negative is ambiguous
+  // We do allow negative mask (=> ignore all high bits)
+
+  def contains(x: BigInt) = ((x ^ base) & ~mask) == 0
+  def contains(x: UInt) = ((x ^ UInt(base)).zext & SInt(~mask)) === SInt(0)
+
+  // turn x into an address contained in this set
+  def legalize(x: UInt): UInt = base.U | (mask.U & x)
+
+  // overlap iff bitwise: both care (~mask0 & ~mask1) => both equal (base0=base1)
+  def overlaps(x: AddressSet) = (~(mask | x.mask) & (base ^ x.base)) == 0
+  // contains iff bitwise: x.mask => mask && contains(x.base)
+  def contains(x: AddressSet) = ((x.mask | (base ^ x.base)) & ~mask) == 0
+
+  // The number of bytes to which the manager must be aligned
+  def alignment = ((mask + 1) & ~mask)
+  // Is this a contiguous memory range
+  def contiguous = alignment == mask+1
+
+  def finite = mask >= 0
+  def max = { require (finite, "Max cannot be calculated on infinite mask"); base | mask }
+
+  // Widen the match function to ignore all bits in imask
+  def widen(imask: BigInt) = AddressSet(base & ~imask, mask | imask)
+
+  // Return an AddressSet that only contains the addresses both sets contain
+  def intersect(x: AddressSet): Option[AddressSet] = {
+    if (!overlaps(x)) {
+      None
+    } else {
+      val r_mask = mask & x.mask
+      val r_base = base | x.base
+      Some(AddressSet(r_base, r_mask))
+    }
+  }
+
+  def subtract(x: AddressSet): Seq[AddressSet] = {
+    intersect(x) match {
+      case None => Seq(this)
+      case Some(remove) => AddressSet.enumerateBits(mask & ~remove.mask).map { bit =>
+        val nmask = (mask & (bit-1)) | remove.mask
+        val nbase = (remove.base ^ bit) & ~nmask
+        AddressSet(nbase, nmask)
+      }
+    }
+  }
+
+  // AddressSets have one natural Ordering (the containment order, if contiguous)
+  def compare(x: AddressSet) = {
+    val primary   = (this.base - x.base).signum // smallest address first
+    val secondary = (x.mask - this.mask).signum // largest mask first
+    if (primary != 0) primary else secondary
+  }
+
+  // We always want to see things in hex
+  override def toString() = {
+    if (mask >= 0) {
+      "AddressSet(0x%x, 0x%x)".format(base, mask)
+    } else {
+      "AddressSet(0x%x, ~0x%x)".format(base, ~mask)
+    }
+  }
+
+  def toRanges = {
+    require (finite, "Ranges cannot be calculated on infinite mask")
+    val size = alignment
+    val fragments = mask & ~(size-1)
+    val bits = bitIndexes(fragments)
+    (BigInt(0) until (BigInt(1) << bits.size)).map { i =>
+      val off = bitIndexes(i).foldLeft(base) { case (a, b) => a.setBit(bits(b)) }
+      AddressSet(off, size)
+    }
+  }
+}
+
+object AddressSet
+{
+  val everything = AddressSet(0, -1)
+  def misaligned(base: BigInt, size: BigInt, tail: Seq[AddressSet] = Seq()): Seq[AddressSet] = {
+    if (size == 0) tail.reverse else {
+      val maxBaseAlignment = base & (-base) // 0 for infinite (LSB)
+      val maxSizeAlignment = BigInt(1) << log2Floor(size) // MSB of size
+      val step =
+        if (maxBaseAlignment == 0 || maxBaseAlignment > maxSizeAlignment)
+        maxSizeAlignment else maxBaseAlignment
+      misaligned(base+step, size-step, AddressSet(base, step-1) +: tail)
+    }
+  }
+
+  def unify(seq: Seq[AddressSet], bit: BigInt): Seq[AddressSet] = {
+    // Pair terms up by ignoring 'bit'
+    seq.distinct.groupBy(x => x.copy(base = x.base & ~bit)).map { case (key, seq) =>
+      if (seq.size == 1) {
+        seq.head // singleton -> unaffected
+      } else {
+        key.copy(mask = key.mask | bit) // pair - widen mask by bit
+      }
+    }.toList
+  }
+
+  def unify(seq: Seq[AddressSet]): Seq[AddressSet] = {
+    val bits = seq.map(_.base).foldLeft(BigInt(0))(_ | _)
+    AddressSet.enumerateBits(bits).foldLeft(seq) { case (acc, bit) => unify(acc, bit) }.sorted
+  }
+
+  def enumerateMask(mask: BigInt): Seq[BigInt] = {
+    def helper(id: BigInt, tail: Seq[BigInt]): Seq[BigInt] =
+      if (id == mask) (id +: tail).reverse else helper(((~mask | id) + 1) & mask, id +: tail)
+    helper(0, Nil)
+  }
+
+  def enumerateBits(mask: BigInt): Seq[BigInt] = {
+    def helper(x: BigInt): Seq[BigInt] = {
+      if (x == 0) {
+        Nil
+      } else {
+        val bit = x & (-x)
+        bit +: helper(x & ~bit)
+      }
+    }
+    helper(mask)
+  }
+}
+
+case class MemoryParameters(
+  val readable: Boolean,
+  val writeable: Boolean,
+  val executable: Boolean,
+  val supportsLogical: TransferSizes,
+  val supportsArithmetic: TransferSizes,
+  val supportsPutFull: TransferSizes,
+  val supportsPutPartial: TransferSizes,
+  val supportsGet: TransferSizes,
+  val supportsAcquireB: TransferSizes,
+  val supportsAcquireT: TransferSizes,
+
+  val hasPutEffects: Boolean,
+  val hasGetEffects: Boolean,
+
+  val name: String,
+  val address: Seq[AddressSet]
+)
\ No newline at end of file
diff --git a/rocket/src/util/Replacement.scala b/rocket/src/util/Replacement.scala
index 40eab181c..236aca177 100644
--- a/rocket/src/util/Replacement.scala
+++ b/rocket/src/util/Replacement.scala
@@ -5,6 +5,7 @@ package org.chipsalliance.rocket.util
 
 import chisel3._
 import chisel3.util._
+import chisel3.util.random._
 
 abstract class ReplacementPolicy {
   def nBits: Int
diff --git a/rocket/src/util/package.scala b/rocket/src/util/package.scala
index 89c147071..dbecdf551 100644
--- a/rocket/src/util/package.scala
+++ b/rocket/src/util/package.scala
@@ -21,4 +21,14 @@ package object util {
     barrier.io.x := in
     barrier.io.y
   }
+
+  def bitIndexes(x: BigInt, tail: Seq[Int] = Nil): Seq[Int] = {
+    require (x >= 0)
+    if (x == 0) {
+      tail.reverse
+    } else {
+      val lowest = x.lowestSetBit
+      bitIndexes(x.clearBit(lowest), lowest +: tail)
+    }
+  }
 }

From f590f918a7a9e3089044c155e0cfb2bcfa8b87ed Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Wed, 17 May 2023 19:25:58 +0800
Subject: [PATCH 17/32] TLB: refactor memory slave parameters

---
 rocket/src/TLB.scala            | 26 ++++++------
 rocket/src/TLBPermissions.scala | 16 ++++----
 rocket/src/util/Memory.scala    | 70 +++++++++++++++++++++++----------
 rocket/src/util/package.scala   | 13 ++++++
 4 files changed, 85 insertions(+), 40 deletions(-)

diff --git a/rocket/src/TLB.scala b/rocket/src/TLB.scala
index 8db673b6b..41bfa0d36 100644
--- a/rocket/src/TLB.scala
+++ b/rocket/src/TLB.scala
@@ -301,13 +301,12 @@ case class TLBConfig(
   * @param instruction true for ITLB, false for DTLB
   * @param lgMaxSize @todo seems granularity
   * @param cfg [[TLBConfig]]
-  * @param edge collect SoC metadata.
   */
 class TLB(
   instruction: Boolean,
   lgMaxSize: Int,
   cfg: TLBConfig,
-  memParameters: MemoryParameters,
+  memSlaves: Seq[MemSlaveParameters],
   pmpGranularity: Int,
   pgLevels: Int,
   minPgLevels: Int,
@@ -432,16 +431,19 @@ class TLB(
   pmp.io.prv := mpu_priv
   // PMA
   // check exist a slave can consume this address.
-  val legal_address = Memory.findSafe(mpu_physaddr).reduce(_||_)
-    legal_address && edge.manager.fastProperty(mpu_physaddr, member, (b:Boolean) => b.B)
-
-  val prot_r = memParameters.readable.B && pmp.io.r
-  val prot_w = memParameters.writeable.B && pmp.io.w
-  val prot_pp = !(memParameters.supportsPutPartial.none).B
-  val prot_al = !(memParameters.supportsLogical.none).B
-  val prot_aa = !(memParameters.supportsArithmetic.none).B
-  val prot_x = memParameters.executable.B && pmp.io.x
-  val prot_eff = (memParameters.hasPutEffects || memParameters.hasGetEffects).B
+  val legal_address = Memory.findSafe(mpu_physaddr, memSlaves).reduce(_ || _)
+  // check utility to help check SoC property
+  def fastCheck(member: MemSlaveParameters => Boolean) = 
+    legal_address && Memory.fastProperty(mpu_physaddr, member, (b:Boolean) => b.B, memSlaves)
+  // In M mode, if access DM address(debug module program buffer)
+  val deny_access_to_debug = mpu_priv <= PRV.M.U && p(DebugModuleKey).map(dmp => dmp.address.contains(mpu_physaddr)).getOrElse(false.B) // TODO: Refactor `p`
+  val prot_r = fastCheck(_.supportsGet) && !deny_access_to_debug && pmp.io.r
+  val prot_w = fastCheck(_.supportsPutFull) && !deny_access_to_debug && pmp.io.w
+  val prot_pp = fastCheck(_.supportsPutPartial)
+  val prot_al = fastCheck(_.supportsLogical)
+  val prot_aa = fastCheck(_.supportsArithmetic)
+  val prot_x = fastCheck(_.executable) && !deny_access_to_debug && pmp.io.x
+  val prot_eff = fastCheck(Seq(RegionType.PUT_EFFECTS, RegionType.GET_EFFECTS) contains _.regionType)
 
   // hit check
   val sector_hits = sectored_entries(memIdx).map(_.sectorHit(vpn, priv_v))
diff --git a/rocket/src/TLBPermissions.scala b/rocket/src/TLBPermissions.scala
index 481197b49..706304f40 100644
--- a/rocket/src/TLBPermissions.scala
+++ b/rocket/src/TLBPermissions.scala
@@ -29,10 +29,10 @@ object TLBPageLookup
     val useful = r || w || x || c || a || l
   }
 
-  private def groupRegions(memParameters: Seq[MemoryParameters]): Map[TLBFixedPermissions, Seq[AddressSet]] = { // TODO: Decoupled from Tilelink
-    val permissions = memParameters.map { p =>
+  private def groupRegions(memSlaves: Seq[MemSlaveParameters]): Map[TLBFixedPermissions, Seq[AddressSet]] = { // TODO: Decoupled from Tilelink
+    val permissions = memSlaves.map { p =>
       (p.address, TLBFixedPermissions(
-        e = p.hasPutEffects   || p.hasGetEffects,
+        e = Seq(RegionType.PUT_EFFECTS, RegionType.GET_EFFECTS) contains p.regionType,
         r = p.supportsGet     || p.supportsAcquireB, // if cached, never uses Get
         w = p.supportsPutFull || p.supportsAcquireT, // if cached, never uses Put
         x = p.executable,
@@ -51,7 +51,7 @@ object TLBPageLookup
 
   // TODO
   // Unmapped memory is considered to be inhomogeneous
-  def apply(memParameters: Seq[MemoryParameters], xLen: Int, cacheBlockBytes: Int, pageSize: BigInt): UInt => TLBPermissions = {
+  def apply(memSlaves: Seq[MemSlaveParameters], xLen: Int, cacheBlockBytes: Int, pageSize: BigInt): UInt => TLBPermissions = {
     require (isPow2(xLen) && xLen >= 8)
     require (isPow2(cacheBlockBytes) && cacheBlockBytes >= xLen/8)
     require (isPow2(pageSize) && pageSize >= cacheBlockBytes)
@@ -60,7 +60,7 @@ object TLBPageLookup
     val allSizes = TransferSizes(1, cacheBlockBytes)
     val amoSizes = TransferSizes(4, xLen/8)
 
-    val permissions = memParameters.foreach { p =>
+    val permissions = memSlaves.foreach { p =>
       require (!p.supportsGet        || p.supportsGet       .contains(allSizes),  s"Memory region '${p.name}' at ${p.address} only supports ${p.supportsGet} Get, but must support ${allSizes}")
       require (!p.supportsPutFull    || p.supportsPutFull   .contains(allSizes),  s"Memory region '${p.name}' at ${p.address} only supports ${p.supportsPutFull} PutFull, but must support ${allSizes}")
       require (!p.supportsPutPartial || p.supportsPutPartial.contains(allSizes),  s"Memory region '${p.name}' at ${p.address} only supports ${p.supportsPutPartial} PutPartial, but must support ${allSizes}")
@@ -71,7 +71,7 @@ object TLBPageLookup
       require (!(p.supportsAcquireB && p.supportsPutFull && !p.supportsAcquireT), s"Memory region '${p.name}' supports AcquireB (cached read) and PutFull (un-cached write) but not AcquireT (cached write)")
     }
 
-    val grouped = groupRegions(memParameters)
+    val grouped = groupRegions(memSlaves)
       .mapValues(_.filter(_.alignment >= pageSize)) // discard any region that's not big enough
 
     def lowCostProperty(prop: TLBFixedPermissions => Boolean): UInt => Bool = {
@@ -108,7 +108,7 @@ object TLBPageLookup
   }
 
   // Are all pageSize intervals of mapped regions homogeneous?
-  def homogeneous(memParameters: Seq[MemoryParameters], pageSize: BigInt): Boolean = {
-    groupRegions(memParameters).values.forall(_.forall(_.alignment >= pageSize))
+  def homogeneous(memSlaves: Seq[MemSlaveParameters], pageSize: BigInt): Boolean = {
+    groupRegions(memSlaves).values.forall(_.forall(_.alignment >= pageSize))
   }
 }
diff --git a/rocket/src/util/Memory.scala b/rocket/src/util/Memory.scala
index bbaa2332c..4d64945f1 100644
--- a/rocket/src/util/Memory.scala
+++ b/rocket/src/util/Memory.scala
@@ -5,9 +5,40 @@ package org.chipsalliance.rocket.util
 import chisel3._
 import chisel3.util._
 
+import org.chipsalliance.rocket._
+
 object Memory {
   // The safe version will check the entire address
-  def findSafe(address: UInt, slaveAddressSets: Seq[AddressSet]) = VecInit(slaveAddressSets.map(_.contains(address))).asUInt.orR
+  def findSafe(address: UInt, slaves: Seq[MemSlaveParameters]) = VecInit(slaves.map(_.address.map(_.contains(address)).reduce(_ || _)))
+
+  // Compute the simplest AddressSets that decide a key
+  def fastPropertyGroup[K](p: MemSlaveParameters => K, slaves: Seq[MemSlaveParameters]): Seq[(K, Seq[AddressSet])] = {
+    val groups = groupByIntoSeq(slaves.map(m => (p(m), m.address)))( _._1).map { case (k, vs) =>
+      k -> vs.flatMap(_._2)
+    }
+    val reductionMask = AddressDecoder(groups.map(_._2))
+    groups.map { case (k, seq) => k -> AddressSet.unify(seq.map(_.widen(~reductionMask)).distinct) }
+  }
+  // Select a property
+  def fastProperty[K, D <: Data](address: UInt, p: MemSlaveParameters => K, d: K => D, slaves: Seq[MemSlaveParameters]): D =
+    Mux1H(fastPropertyGroup(p, slaves).map { case (v, a) => (a.map(_.contains(address)).reduce(_||_), d(v)) })
+}
+
+/** Options for describing the attributes of memory regions */
+object RegionType {
+  // Define the 'more relaxed than' ordering
+  val cases = Seq(CACHED, TRACKED, UNCACHED, IDEMPOTENT, VOLATILE, PUT_EFFECTS, GET_EFFECTS)
+  sealed trait T extends Ordered[T] {
+    def compare(that: T): Int = cases.indexOf(that) compare cases.indexOf(this)
+  }
+
+  case object CACHED      extends T // an intermediate agent may have cached a copy of the region for you
+  case object TRACKED     extends T // the region may have been cached by another master, but coherence is being provided
+  case object UNCACHED    extends T // the region has not been cached yet, but should be cached when possible
+  case object IDEMPOTENT  extends T // gets return most recently put content, but content should not be cached
+  case object VOLATILE    extends T // content may change without a put, but puts and gets have no side effects
+  case object PUT_EFFECTS extends T // puts produce side effects and so must not be combined/delayed
+  case object GET_EFFECTS extends T // gets produce side effects and so must not be issued speculatively
 }
 
 // An potentially empty inclusive range of 2-powers [min, max] (in bytes)
@@ -25,9 +56,9 @@ case class TransferSizes(min: Int, max: Int)
   def contains(x: Int) = isPow2(x) && min <= x && x <= max
   def containsLg(x: Int) = contains(1 << x)
   def containsLg(x: UInt) =
-    if (none) Bool(false)
-    else if (min == max) { UInt(log2Ceil(min)) === x }
-    else { UInt(log2Ceil(min)) <= x && x <= UInt(log2Ceil(max)) }
+    if (none) false.B
+    else if (min == max) { log2Ceil(min).U === x }
+    else { log2Ceil(min).U <= x && x <= log2Ceil(max).U }
 
   def contains(x: TransferSizes) = x.none || (min <= x.min && x.max <= max)
 
@@ -72,7 +103,7 @@ case class AddressSet(base: BigInt, mask: BigInt) extends Ordered[AddressSet]
   // We do allow negative mask (=> ignore all high bits)
 
   def contains(x: BigInt) = ((x ^ base) & ~mask) == 0
-  def contains(x: UInt) = ((x ^ UInt(base)).zext & SInt(~mask)) === SInt(0)
+  def contains(x: UInt) = ((x ^ base.U).zext & (~mask).S) === 0.S
 
   // turn x into an address contained in this set
   def legalize(x: UInt): UInt = base.U | (mask.U & x)
@@ -192,21 +223,20 @@ object AddressSet
   }
 }
 
-case class MemoryParameters(
-  val readable: Boolean,
-  val writeable: Boolean,
-  val executable: Boolean,
-  val supportsLogical: TransferSizes,
-  val supportsArithmetic: TransferSizes,
-  val supportsPutFull: TransferSizes,
-  val supportsPutPartial: TransferSizes,
-  val supportsGet: TransferSizes,
-  val supportsAcquireB: TransferSizes,
-  val supportsAcquireT: TransferSizes,
-
-  val hasPutEffects: Boolean,
-  val hasGetEffects: Boolean,
+case class MemSlaveParameters(
+  val address: Seq[AddressSet],
+  val regionType:         RegionType.T  = RegionType.GET_EFFECTS,
+
+  val executable:         Boolean       = false,
+
+  val supportsAcquireT:   TransferSizes = TransferSizes.none,
+  val supportsAcquireB:   TransferSizes = TransferSizes.none,
+  val supportsArithmetic: TransferSizes = TransferSizes.none,
+  val supportsLogical:    TransferSizes = TransferSizes.none,
+  val supportsGet:        TransferSizes = TransferSizes.none,
+  val supportsPutFull:    TransferSizes = TransferSizes.none,
+  val supportsPutPartial: TransferSizes = TransferSizes.none,
+  val supportsHint:       TransferSizes = TransferSizes.none,
 
   val name: String,
-  val address: Seq[AddressSet]
 )
\ No newline at end of file
diff --git a/rocket/src/util/package.scala b/rocket/src/util/package.scala
index dbecdf551..aec4ad6a4 100644
--- a/rocket/src/util/package.scala
+++ b/rocket/src/util/package.scala
@@ -31,4 +31,17 @@ package object util {
       bitIndexes(x.clearBit(lowest), lowest +: tail)
     }
   }
+
+  /** Similar to Seq.groupBy except this returns a Seq instead of a Map
+    * Useful for deterministic code generation
+    */
+  def groupByIntoSeq[A, K](xs: Seq[A])(f: A => K): immutable.Seq[(K, immutable.Seq[A])] = {
+    val map = mutable.LinkedHashMap.empty[K, mutable.ListBuffer[A]]
+    for (x <- xs) {
+      val key = f(x)
+      val l = map.getOrElseUpdate(key, mutable.ListBuffer.empty[A])
+      l += x
+    }
+    map.view.map({ case (k, vs) => k -> vs.toList }).toList
+  }
 }

From b10b5603021c2f7f9fce7e66f30e976e394e32a7 Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Fri, 19 May 2023 16:56:05 +0800
Subject: [PATCH 18/32] TLB: refactor `AddressSet`

---
 rocket/src/util/Memory.scala | 46 +++++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/rocket/src/util/Memory.scala b/rocket/src/util/Memory.scala
index 4d64945f1..a5bcf7822 100644
--- a/rocket/src/util/Memory.scala
+++ b/rocket/src/util/Memory.scala
@@ -4,6 +4,7 @@ package org.chipsalliance.rocket.util
 
 import chisel3._
 import chisel3.util._
+import chisel3.util.experimental._
 
 import org.chipsalliance.rocket._
 
@@ -95,23 +96,24 @@ object TransferSizes {
 // Base is the base address, and mask are the bits consumed by the manager
 // e.g: base=0x200, mask=0xff describes a device managing 0x200-0x2ff
 // e.g: base=0x1000, mask=0xf0f decribes a device managing 0x1000-0x100f, 0x1100-0x110f, ...
-case class AddressSet(base: BigInt, mask: BigInt) extends Ordered[AddressSet]
+case class AddressSet(val bitSet: BitSet) extends Ordered[AddressSet]
 {
-  // Forbid misaligned base address (and empty sets)
-  require ((base & mask) == 0, s"Mis-aligned AddressSets are forbidden, got: ${this.toString}")
-  require (base >= 0, s"AddressSet negative base is ambiguous: $base") // TL2 address widths are not fixed => negative is ambiguous
-  // We do allow negative mask (=> ignore all high bits)
+  // TODO: This assumption might not hold true after BitSet intersection or subtraction. It is highly depended on the concrete implementation of BitSet.
+  require(bitSet.terms.size == 1, "The wrapped BitSet should only have one BitPat")
 
-  def contains(x: BigInt) = ((x ^ base) & ~mask) == 0
-  def contains(x: UInt) = ((x ^ base.U).zext & (~mask).S) === 0.S
+  val base = bitSet.terms.head.value
+  val mask = bitSet.terms.head.mask
+
+  def contains(x: BigInt) = bitSet matches x.U
+  def contains(x: UInt) = bitSet matches x
 
   // turn x into an address contained in this set
   def legalize(x: UInt): UInt = base.U | (mask.U & x)
 
   // overlap iff bitwise: both care (~mask0 & ~mask1) => both equal (base0=base1)
-  def overlaps(x: AddressSet) = (~(mask | x.mask) & (base ^ x.base)) == 0
+  def overlaps(x: AddressSet) = bitSet overlap x.bitSet
   // contains iff bitwise: x.mask => mask && contains(x.base)
-  def contains(x: AddressSet) = ((x.mask | (base ^ x.base)) & ~mask) == 0
+  def contains(x: AddressSet) = bitSet cover x.bitSet
 
   // The number of bytes to which the manager must be aligned
   def alignment = ((mask + 1) & ~mask)
@@ -129,21 +131,12 @@ case class AddressSet(base: BigInt, mask: BigInt) extends Ordered[AddressSet]
     if (!overlaps(x)) {
       None
     } else {
-      val r_mask = mask & x.mask
-      val r_base = base | x.base
-      Some(AddressSet(r_base, r_mask))
+      Some(AddressSet(bitSet intersect x.bitSet))
     }
   }
 
   def subtract(x: AddressSet): Seq[AddressSet] = {
-    intersect(x) match {
-      case None => Seq(this)
-      case Some(remove) => AddressSet.enumerateBits(mask & ~remove.mask).map { bit =>
-        val nmask = (mask & (bit-1)) | remove.mask
-        val nbase = (remove.base ^ bit) & ~nmask
-        AddressSet(nbase, nmask)
-      }
-    }
+    (bitSet intersect x.bitSet).terms.toSeq.map(p => AddressSet(BitSet(p)))
   }
 
   // AddressSets have one natural Ordering (the containment order, if contiguous)
@@ -176,6 +169,15 @@ case class AddressSet(base: BigInt, mask: BigInt) extends Ordered[AddressSet]
 
 object AddressSet
 {
+  def apply(base: BigInt, mask: BigInt): AddressSet = {
+    // Forbid misaligned base address (and empty sets)
+    require ((base & mask) == 0, s"Mis-aligned AddressSets are forbidden, got: ${this.toString}")
+    require (base >= 0, s"AddressSet negative base is ambiguous: $base") // TL2 address widths are not fixed => negative is ambiguous
+    // We do allow negative mask (=> ignore all high bits)
+
+    AddressSet(BitSet(new BitPat(base, mask, base.U.getWidth max mask.U.getWidth)))
+  }
+
   val everything = AddressSet(0, -1)
   def misaligned(base: BigInt, size: BigInt, tail: Seq[AddressSet] = Seq()): Seq[AddressSet] = {
     if (size == 0) tail.reverse else {
@@ -190,11 +192,11 @@ object AddressSet
 
   def unify(seq: Seq[AddressSet], bit: BigInt): Seq[AddressSet] = {
     // Pair terms up by ignoring 'bit'
-    seq.distinct.groupBy(x => x.copy(base = x.base & ~bit)).map { case (key, seq) =>
+    seq.distinct.groupBy(x => AddressSet(x.base & ~bit, x.mask)).map { case (key, seq) =>
       if (seq.size == 1) {
         seq.head // singleton -> unaffected
       } else {
-        key.copy(mask = key.mask | bit) // pair - widen mask by bit
+        AddressSet(key.base, key.mask | bit) // pair - widen mask by bit
       }
     }.toList
   }

From c0b907d03d4a3022b436d334158c8eb3e820bbfd Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Mon, 22 May 2023 16:14:01 +0800
Subject: [PATCH 19/32] TLB: minor modifications

---
 rocket/src/TLB.scala              |  2 +-
 rocket/src/util/Replacement.scala |  2 ++
 rocket/src/util/package.scala     | 13 -------------
 3 files changed, 3 insertions(+), 14 deletions(-)

diff --git a/rocket/src/TLB.scala b/rocket/src/TLB.scala
index 41bfa0d36..15ae59036 100644
--- a/rocket/src/TLB.scala
+++ b/rocket/src/TLB.scala
@@ -155,7 +155,7 @@ class TLBEntry(
   /** returns the index of sector */
   private def sectorIdx(vpn: UInt) = vpn(log2Ceil(nSectors) - 1, 0)
   /** returns the entry data matched with this vpn*/
-  def getData(vpn: UInt) = OptimizationBarrier(data(sectorIdx(vpn)).asTypeOf(new TLBEntryData(ppnBits)))
+  def getData(vpn: UInt) = data(sectorIdx(vpn)).asTypeOf(new TLBEntryData(ppnBits))
   /** returns whether a sector hits */
   def sectorHit(vpn: UInt, virtual: Bool) = valid.asUInt.orR && sectorTagMatch(vpn, virtual)
   /** returns whether tag matches vpn */
diff --git a/rocket/src/util/Replacement.scala b/rocket/src/util/Replacement.scala
index 236aca177..f7ccc116a 100644
--- a/rocket/src/util/Replacement.scala
+++ b/rocket/src/util/Replacement.scala
@@ -1,6 +1,8 @@
 // See LICENSE.SiFive for license details.
 // See LICENSE.Berkeley for license details.
 
+// TODO: Should be upstreamed to Chisel
+
 package org.chipsalliance.rocket.util
 
 import chisel3._
diff --git a/rocket/src/util/package.scala b/rocket/src/util/package.scala
index aec4ad6a4..27ffb5c79 100644
--- a/rocket/src/util/package.scala
+++ b/rocket/src/util/package.scala
@@ -9,19 +9,6 @@ import scala.math.min
 import scala.collection.{immutable, mutable}
 
 package object util {
-  def OptimizationBarrier[T <: Data](in: T): T = {
-    val barrier = Module(new Module {
-      val io = IO(new Bundle {
-        val x = Input(chiselTypeOf(in))
-        val y = Output(chiselTypeOf(in))
-      })
-      io.y := io.x
-      override def desiredName = "OptimizationBarrier"
-    })
-    barrier.io.x := in
-    barrier.io.y
-  }
-
   def bitIndexes(x: BigInt, tail: Seq[Int] = Nil): Seq[Int] = {
     require (x >= 0)
     if (x == 0) {

From 58c6fe466be9c4cfdc9b8cb8f0c60b5304359251 Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Mon, 22 May 2023 16:19:24 +0800
Subject: [PATCH 20/32] TLB: move `AddressDecoder`

---
 rocket/src/{ => util}/AddressDecoder.scala | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename rocket/src/{ => util}/AddressDecoder.scala (100%)

diff --git a/rocket/src/AddressDecoder.scala b/rocket/src/util/AddressDecoder.scala
similarity index 100%
rename from rocket/src/AddressDecoder.scala
rename to rocket/src/util/AddressDecoder.scala

From 2d670543b8e1b32905163978ca290a9a3b646f6d Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Thu, 1 Jun 2023 20:27:06 +0800
Subject: [PATCH 21/32] TLB: remove some clutters

---
 rocket/src/Consts.scala | 87 -----------------------------------------
 rocket/src/TLB.scala    |  9 +++--
 2 files changed, 5 insertions(+), 91 deletions(-)
 delete mode 100644 rocket/src/Consts.scala

diff --git a/rocket/src/Consts.scala b/rocket/src/Consts.scala
deleted file mode 100644
index f408d9e2b..000000000
--- a/rocket/src/Consts.scala
+++ /dev/null
@@ -1,87 +0,0 @@
-// See LICENSE.Berkeley for license details.
-
-package org.chipsalliance.rocket.constants
-
-import chisel3._
-import chisel3.util._
-
-object ScalarOpConstants {
-  val SZ_BR = 3
-  def BR_X    = BitPat("b???")
-  def BR_EQ   = 0.U(3.W)
-  def BR_NE   = 1.U(3.W)
-  def BR_J    = 2.U(3.W)
-  def BR_N    = 3.U(3.W)
-  def BR_LT   = 4.U(3.W)
-  def BR_GE   = 5.U(3.W)
-  def BR_LTU  = 6.U(3.W)
-  def BR_GEU  = 7.U(3.W)
-
-  def A1_X    = BitPat("b??")
-  def A1_ZERO = 0.U(2.W)
-  def A1_RS1  = 1.U(2.W)
-  def A1_PC   = 2.U(2.W)
-
-  def IMM_X  = BitPat("b???")
-  def IMM_S  = 0.U(3.W)
-  def IMM_SB = 1.U(3.W)
-  def IMM_U  = 2.U(3.W)
-  def IMM_UJ = 3.U(3.W)
-  def IMM_I  = 4.U(3.W)
-  def IMM_Z  = 5.U(3.W)
-
-  def A2_X    = BitPat("b??")
-  def A2_ZERO = 0.U(2.W)
-  def A2_SIZE = 1.U(2.W)
-  def A2_RS2  = 2.U(2.W)
-  def A2_IMM  = 3.U(2.W)
-
-  def X = BitPat("b?")
-  def N = BitPat("b0")
-  def Y = BitPat("b1")
-
-  val SZ_DW = 1
-  def DW_X  = X
-  def DW_32 = false.B
-  def DW_64 = true.B
-  def DW_XPR = DW_64
-}
-
-object MemoryOpConstants {
-  val NUM_XA_OPS = 9
-  val M_SZ      = 5
-  def M_X       = BitPat("b?????");
-  def M_XRD     = "b00000".U; // int load
-  def M_XWR     = "b00001".U; // int store
-  def M_PFR     = "b00010".U; // prefetch with intent to read
-  def M_PFW     = "b00011".U; // prefetch with intent to write
-  def M_XA_SWAP = "b00100".U
-  def M_FLUSH_ALL = "b00101".U  // flush all lines
-  def M_XLR     = "b00110".U
-  def M_XSC     = "b00111".U
-  def M_XA_ADD  = "b01000".U
-  def M_XA_XOR  = "b01001".U
-  def M_XA_OR   = "b01010".U
-  def M_XA_AND  = "b01011".U
-  def M_XA_MIN  = "b01100".U
-  def M_XA_MAX  = "b01101".U
-  def M_XA_MINU = "b01110".U
-  def M_XA_MAXU = "b01111".U
-  def M_FLUSH   = "b10000".U // write back dirty data and cede R/W permissions
-  def M_PWR     = "b10001".U // partial (masked) store
-  def M_PRODUCE = "b10010".U // write back dirty data and cede W permissions
-  def M_CLEAN   = "b10011".U // write back dirty data and retain R/W permissions
-  def M_SFENCE  = "b10100".U // SFENCE.VMA
-  def M_HFENCEV = "b10101".U // HFENCE.VVMA
-  def M_HFENCEG = "b10110".U // HFENCE.GVMA
-  def M_WOK     = "b10111".U // check write permissions but don't perform a write
-  def M_HLVX    = "b10000".U // HLVX instruction
-
-  def isAMOLogical(cmd: UInt) = Seq(M_XA_SWAP, M_XA_XOR, M_XA_OR, M_XA_AND).map(cmd === _).reduce(_ || _)
-  def isAMOArithmetic(cmd: UInt) = Seq(M_XA_ADD, M_XA_MIN, M_XA_MAX, M_XA_MINU, M_XA_MAXU).map(cmd === _).reduce(_ || _)
-  def isAMO(cmd: UInt) = isAMOLogical(cmd) || isAMOArithmetic(cmd)
-  def isPrefetch(cmd: UInt) = cmd === M_PFR || cmd === M_PFW
-  def isRead(cmd: UInt) = Seq(M_XRD, M_HLVX, M_XLR, M_XSC).map(cmd === _).reduce(_ || _) || isAMO(cmd)
-  def isWrite(cmd: UInt) = cmd === M_XWR || cmd === M_PWR || cmd === M_XSC || isAMO(cmd)
-  def isWriteIntent(cmd: UInt) = isWrite(cmd) || cmd === M_PFW || cmd === M_XLR
-}
diff --git a/rocket/src/TLB.scala b/rocket/src/TLB.scala
index 15ae59036..b8bbe065d 100644
--- a/rocket/src/TLB.scala
+++ b/rocket/src/TLB.scala
@@ -321,6 +321,7 @@ class TLB(
   asIdBits: Int,
   xLen: Int,
   cacheBlockBytes: Int,
+  debugModuleAddress: Some(AddressSet),
   memoryCacheable: Boolean,
   memoryHomogenous: Boolean,
   usingHypervisor: Boolean,
@@ -337,7 +338,7 @@ class TLB(
     /** SFence Input */
     val sfence = Flipped(Valid((new SFenceReq(vaddrBits, asIdBits))))
     /** IO to PTW */
-    val ptw = new TLBPTWIO() // TODO: Dependent on PTW
+    val ptw = new TLBPTWIO()
     /** suppress a TLB refill, one cycle after a miss */
     val kill = Input(Bool())
   })
@@ -424,7 +425,7 @@ class TLB(
                 Mux(vm_enabled && special_entry.nonEmpty.B, special_entry.map(e => e.ppn(vpn, e.getData(vpn))).getOrElse(0.U), io.req.bits.vaddr >> pgIdxBits))
   val mpu_physaddr = Cat(mpu_ppn, io.req.bits.vaddr(pgIdxBits-1, 0))
   val mpu_priv = Mux[UInt](usingVM.B && (do_refill || io.req.bits.passthrough /* PTW */), PRV.S.U, Cat(io.ptw.status.debug, priv))
-  val pmp = Module(new PMPChecker(lgMaxSize)) // TODO: Dependent on PMP
+  val pmp = Module(new PMPChecker(lgMaxSize))
   pmp.io.addr := mpu_physaddr
   pmp.io.size := io.req.bits.size
   pmp.io.pmp := (io.ptw.pmp: Seq[PMP])
@@ -436,7 +437,7 @@ class TLB(
   def fastCheck(member: MemSlaveParameters => Boolean) = 
     legal_address && Memory.fastProperty(mpu_physaddr, member, (b:Boolean) => b.B, memSlaves)
   // In M mode, if access DM address(debug module program buffer)
-  val deny_access_to_debug = mpu_priv <= PRV.M.U && p(DebugModuleKey).map(dmp => dmp.address.contains(mpu_physaddr)).getOrElse(false.B) // TODO: Refactor `p`
+  val deny_access_to_debug = mpu_priv <= PRV.M.U && debugModuleAddress.map(_.contains(mpu_physaddr)).getOrElse(false.B)
   val prot_r = fastCheck(_.supportsGet) && !deny_access_to_debug && pmp.io.r
   val prot_w = fastCheck(_.supportsPutFull) && !deny_access_to_debug && pmp.io.w
   val prot_pp = fastCheck(_.supportsPutPartial)
@@ -559,7 +560,7 @@ class TLB(
   // vaddr misaligned: vaddr[1:0]=b00
   val misaligned = (io.req.bits.vaddr & (UIntToOH(io.req.bits.size) - 1.U)).orR
   def badVA(guestPA: Boolean): Bool = {
-    val additionalPgLevels = (if (guestPA) io.ptw.hgatp else satp).additionalPgLevels // TODO: Cannot resolve
+    val additionalPgLevels = (if (guestPA) io.ptw.hgatp else satp).additionalPgLevels
     val extraBits = if (guestPA) hypervisorExtraAddrBits else 0
     val signed = !guestPA
     val nPgLevelChoices = pgLevels - minPgLevels + 1

From 8974671503bb9cb31a87254aae7225fa63439cea Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Thu, 1 Jun 2023 20:28:14 +0800
Subject: [PATCH 22/32] TLB: fix Option syntax

---
 rocket/src/TLB.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rocket/src/TLB.scala b/rocket/src/TLB.scala
index b8bbe065d..773f8afc0 100644
--- a/rocket/src/TLB.scala
+++ b/rocket/src/TLB.scala
@@ -321,7 +321,7 @@ class TLB(
   asIdBits: Int,
   xLen: Int,
   cacheBlockBytes: Int,
-  debugModuleAddress: Some(AddressSet),
+  debugModuleAddress: Option[AddressSet],
   memoryCacheable: Boolean,
   memoryHomogenous: Boolean,
   usingHypervisor: Boolean,

From 3e7498b73b4e5c44325d1c7e0deaa476d10b063d Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Sat, 3 Jun 2023 18:23:11 +0800
Subject: [PATCH 23/32] TLB: resolve collisions

---
 rocket/src/CSR.scala          | 20 --------------------
 rocket/src/util.scala         | 22 ++++++++++++++++++++++
 rocket/src/util/package.scala | 34 ----------------------------------
 3 files changed, 22 insertions(+), 54 deletions(-)
 delete mode 100644 rocket/src/CSR.scala
 delete mode 100644 rocket/src/util/package.scala

diff --git a/rocket/src/CSR.scala b/rocket/src/CSR.scala
deleted file mode 100644
index a79022b0f..000000000
--- a/rocket/src/CSR.scala
+++ /dev/null
@@ -1,20 +0,0 @@
-// See LICENSE.SiFive for license details.
-// See LICENSE.Berkeley for license details.
-
-package org.chipsalliance.rocket
-
-import chisel3._
-import chisel3.util.{BitPat, Cat, Fill, Mux1H, PopCount, PriorityMux, RegEnable, UIntToOH, Valid, log2Ceil, log2Up}
-
-import scala.collection.mutable.LinkedHashMap
-// import Instructions._
-// import CustomInstructions._
-
-object PRV
-{
-  val SZ = 2
-  val U = 0
-  val S = 1
-  val H = 2
-  val M = 3
-}
diff --git a/rocket/src/util.scala b/rocket/src/util.scala
index 316fd572a..89cde5247 100644
--- a/rocket/src/util.scala
+++ b/rocket/src/util.scala
@@ -189,4 +189,26 @@ package object util {
 
   implicit def uintToBitPat(x: UInt): BitPat = BitPat(x)
 
+  def bitIndexes(x: BigInt, tail: Seq[Int] = Nil): Seq[Int] = {
+    require (x >= 0)
+    if (x == 0) {
+      tail.reverse
+    } else {
+      val lowest = x.lowestSetBit
+      bitIndexes(x.clearBit(lowest), lowest +: tail)
+    }
+  }
+
+  /** Similar to Seq.groupBy except this returns a Seq instead of a Map
+    * Useful for deterministic code generation
+    */
+  def groupByIntoSeq[A, K](xs: Seq[A])(f: A => K): immutable.Seq[(K, immutable.Seq[A])] = {
+    val map = mutable.LinkedHashMap.empty[K, mutable.ListBuffer[A]]
+    for (x <- xs) {
+      val key = f(x)
+      val l = map.getOrElseUpdate(key, mutable.ListBuffer.empty[A])
+      l += x
+    }
+    map.view.map({ case (k, vs) => k -> vs.toList }).toList
+  }
 }
diff --git a/rocket/src/util/package.scala b/rocket/src/util/package.scala
deleted file mode 100644
index 27ffb5c79..000000000
--- a/rocket/src/util/package.scala
+++ /dev/null
@@ -1,34 +0,0 @@
-// See LICENSE.SiFive for license details.
-// See LICENSE.Berkeley for license details.
-
-package org.chipsalliance.rocket
-
-import chisel3._
-import chisel3.util._
-import scala.math.min
-import scala.collection.{immutable, mutable}
-
-package object util {
-  def bitIndexes(x: BigInt, tail: Seq[Int] = Nil): Seq[Int] = {
-    require (x >= 0)
-    if (x == 0) {
-      tail.reverse
-    } else {
-      val lowest = x.lowestSetBit
-      bitIndexes(x.clearBit(lowest), lowest +: tail)
-    }
-  }
-
-  /** Similar to Seq.groupBy except this returns a Seq instead of a Map
-    * Useful for deterministic code generation
-    */
-  def groupByIntoSeq[A, K](xs: Seq[A])(f: A => K): immutable.Seq[(K, immutable.Seq[A])] = {
-    val map = mutable.LinkedHashMap.empty[K, mutable.ListBuffer[A]]
-    for (x <- xs) {
-      val key = f(x)
-      val l = map.getOrElseUpdate(key, mutable.ListBuffer.empty[A])
-      l += x
-    }
-    map.view.map({ case (k, vs) => k -> vs.toList }).toList
-  }
-}

From 6134d136547f60abe608098d8469f70e09983832 Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Sat, 3 Jun 2023 19:31:40 +0800
Subject: [PATCH 24/32] TLB: resolve dependencies

---
 rocket/src/TLB.scala              |   5 +-
 rocket/src/util.scala             |   2 +
 rocket/src/util/Misc.scala        |  19 +++
 rocket/src/util/Replacement.scala | 223 +-----------------------------
 4 files changed, 26 insertions(+), 223 deletions(-)

diff --git a/rocket/src/TLB.scala b/rocket/src/TLB.scala
index 773f8afc0..d72e49bb4 100644
--- a/rocket/src/TLB.scala
+++ b/rocket/src/TLB.scala
@@ -7,7 +7,7 @@ import chisel3._
 import chisel3.util._
 
 import chisel3.internal.sourceinfo.SourceInfo
-import org.chipsalliance.rocket.constants.MemoryOpConstants
+import org.chipsalliance.rocket._
 import org.chipsalliance.rocket.util._
 
 /** =SFENCE=
@@ -308,6 +308,7 @@ class TLB(
   cfg: TLBConfig,
   memSlaves: Seq[MemSlaveParameters],
   pmpGranularity: Int,
+  nPMPs: Int,
   pgLevels: Int,
   minPgLevels: Int,
   pgLevelBits: Int,
@@ -425,7 +426,7 @@ class TLB(
                 Mux(vm_enabled && special_entry.nonEmpty.B, special_entry.map(e => e.ppn(vpn, e.getData(vpn))).getOrElse(0.U), io.req.bits.vaddr >> pgIdxBits))
   val mpu_physaddr = Cat(mpu_ppn, io.req.bits.vaddr(pgIdxBits-1, 0))
   val mpu_priv = Mux[UInt](usingVM.B && (do_refill || io.req.bits.passthrough /* PTW */), PRV.S.U, Cat(io.ptw.status.debug, priv))
-  val pmp = Module(new PMPChecker(lgMaxSize))
+  val pmp = Module(new PMPChecker(lgMaxSize, paddrBits, pmpGranularity, nPMPs, pgIdxBits, pgLevels, pgLevelBits))
   pmp.io.addr := mpu_physaddr
   pmp.io.size := io.req.bits.size
   pmp.io.pmp := (io.ptw.pmp: Seq[PMP])
diff --git a/rocket/src/util.scala b/rocket/src/util.scala
index 89cde5247..f7a667704 100644
--- a/rocket/src/util.scala
+++ b/rocket/src/util.scala
@@ -2,6 +2,8 @@ package org.chipsalliance.rocket
 
 import chisel3._
 import chisel3.util._
+import scala.collection.immutable
+import scala.collection.mutable
 
 //todo: remove util
 package object util {
diff --git a/rocket/src/util/Misc.scala b/rocket/src/util/Misc.scala
index e0a077b88..46171b55f 100644
--- a/rocket/src/util/Misc.scala
+++ b/rocket/src/util/Misc.scala
@@ -5,6 +5,7 @@ package org.chipsalliance.rocket.util
 
 import chisel3._
 import chisel3.util._
+import chisel3.util.random._
 
 object PopCountAtLeast {
   private def two(x: UInt): (Bool, Bool) = x.getWidth match {
@@ -22,3 +23,21 @@ object PopCountAtLeast {
     case 3 => PopCount(x) >= n.U
   }
 }
+
+object Random
+{
+  def apply(mod: Int, random: UInt): UInt = {
+    if (isPow2(mod)) random.extract(log2Ceil(mod)-1,0)
+    else PriorityEncoder(partition(apply(1 << log2Up(mod*8), random), mod))
+  }
+  def apply(mod: Int): UInt = apply(mod, randomizer)
+  def oneHot(mod: Int, random: UInt): UInt = {
+    if (isPow2(mod)) UIntToOH(random(log2Up(mod)-1,0))
+    else PriorityEncoderOH(partition(apply(1 << log2Up(mod*8), random), mod)).asUInt
+  }
+  def oneHot(mod: Int): UInt = oneHot(mod, randomizer)
+
+  private def randomizer = LFSR(16)
+  private def partition(value: UInt, slices: Int) =
+    Seq.tabulate(slices)(i => value < UInt((((i + 1) << value.getWidth) / slices).W))
+}
\ No newline at end of file
diff --git a/rocket/src/util/Replacement.scala b/rocket/src/util/Replacement.scala
index f7ccc116a..2d4dbb266 100644
--- a/rocket/src/util/Replacement.scala
+++ b/rocket/src/util/Replacement.scala
@@ -119,7 +119,7 @@ class TrueLRU(n_ways: Int) extends ReplacementPolicy {
       state_reg := get_next_state(state_reg, touch_ways)
     }
     for (i <- 1 until touch_ways.size) {
-      cover(PopCount(touch_ways.map(_.valid)) === i.U, s"LRU_UpdateCount$i", s"LRU Update $i simultaneous")
+      cover(PopCount(touch_ways.map(_.valid)) === i.U, s"LRU_UpdateCount$i; LRU Update $i simultaneous")
     }
   }
 
@@ -177,7 +177,7 @@ class PseudoLRU(n_ways: Int) extends ReplacementPolicy {
       state_reg := get_next_state(state_reg, touch_ways)
     }
     for (i <- 1 until touch_ways.size) {
-      cover(PopCount(touch_ways.map(_.valid)) === i.U, s"PLRU_UpdateCount$i", s"PLRU Update $i simultaneous")
+      cover(PopCount(touch_ways.map(_.valid)) === i.U, s"PLRU_UpdateCount$i; PLRU Update $i simultaneous")
     }
   }
 
@@ -322,223 +322,4 @@ class SetAssocLRU(n_sets: Int, n_ways: Int, policy: String) extends SetAssocRepl
 
   def way(set: UInt) = logic.get_replace_way(state_vec(set))
 
-}
-
-// Synthesizable unit tests
-import freechips.rocketchip.unittest._
-
-class PLRUTest(n_ways: Int, timeout: Int = 500) extends UnitTest(timeout) {
-  val plru = new PseudoLRU(n_ways)
-
-  // step
-  io.finished := RegNext(true.B, false.B)
-
-  val get_replace_ways = (0 until (1 << (n_ways-1))).map(state =>
-    plru.get_replace_way(state = state.U((n_ways-1).W)))
-  val get_next_states  = (0 until (1 << (n_ways-1))).map(state => (0 until n_ways).map(way =>
-    plru.get_next_state (state = state.U((n_ways-1).W), touch_way = way.U(log2Ceil(n_ways).W))))
-
-  n_ways match {
-    case 2 => {
-      assert(get_replace_ways(0) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=0: expected=0 actual=%d", get_replace_ways(0))
-      assert(get_replace_ways(1) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=1: expected=1 actual=%d", get_replace_ways(1))
-      assert(get_next_states(0)(0) === 1.U(plru.nBits.W), s"get_next_state state=0 way=0: expected=1 actual=%d", get_next_states(0)(0))
-      assert(get_next_states(0)(1) === 0.U(plru.nBits.W), s"get_next_state state=0 way=1: expected=0 actual=%d", get_next_states(0)(1))
-      assert(get_next_states(1)(0) === 1.U(plru.nBits.W), s"get_next_state state=1 way=0: expected=1 actual=%d", get_next_states(1)(0))
-      assert(get_next_states(1)(1) === 0.U(plru.nBits.W), s"get_next_state state=1 way=1: expected=0 actual=%d", get_next_states(1)(1))
-    }
-    case 3 => {
-      assert(get_replace_ways(0) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=0: expected=0 actual=%d", get_replace_ways(0))
-      assert(get_replace_ways(1) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=1: expected=1 actual=%d", get_replace_ways(1))
-      assert(get_replace_ways(2) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=2: expected=2 actual=%d", get_replace_ways(2))
-      assert(get_replace_ways(3) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=3: expected=2 actual=%d", get_replace_ways(3))
-      assert(get_next_states(0)(0) === 3.U(plru.nBits.W), s"get_next_state state=0 way=0: expected=3 actual=%d", get_next_states(0)(0))
-      assert(get_next_states(0)(1) === 2.U(plru.nBits.W), s"get_next_state state=0 way=1: expected=2 actual=%d", get_next_states(0)(1))
-      assert(get_next_states(0)(2) === 0.U(plru.nBits.W), s"get_next_state state=0 way=2: expected=0 actual=%d", get_next_states(0)(2))
-      assert(get_next_states(1)(0) === 3.U(plru.nBits.W), s"get_next_state state=1 way=0: expected=3 actual=%d", get_next_states(1)(0))
-      assert(get_next_states(1)(1) === 2.U(plru.nBits.W), s"get_next_state state=1 way=1: expected=2 actual=%d", get_next_states(1)(1))
-      assert(get_next_states(1)(2) === 1.U(plru.nBits.W), s"get_next_state state=1 way=2: expected=1 actual=%d", get_next_states(1)(2))
-      assert(get_next_states(2)(0) === 3.U(plru.nBits.W), s"get_next_state state=2 way=0: expected=3 actual=%d", get_next_states(2)(0))
-      assert(get_next_states(2)(1) === 2.U(plru.nBits.W), s"get_next_state state=2 way=1: expected=2 actual=%d", get_next_states(2)(1))
-      assert(get_next_states(2)(2) === 0.U(plru.nBits.W), s"get_next_state state=2 way=2: expected=0 actual=%d", get_next_states(2)(2))
-      assert(get_next_states(3)(0) === 3.U(plru.nBits.W), s"get_next_state state=3 way=0: expected=3 actual=%d", get_next_states(3)(0))
-      assert(get_next_states(3)(1) === 2.U(plru.nBits.W), s"get_next_state state=3 way=1: expected=2 actual=%d", get_next_states(3)(1))
-      assert(get_next_states(3)(2) === 1.U(plru.nBits.W), s"get_next_state state=3 way=2: expected=1 actual=%d", get_next_states(3)(2))
-    }
-    case 4 => {
-      assert(get_replace_ways(0) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=0: expected=0 actual=%d", get_replace_ways(0))
-      assert(get_replace_ways(1) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=1: expected=1 actual=%d", get_replace_ways(1))
-      assert(get_replace_ways(2) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=2: expected=0 actual=%d", get_replace_ways(2))
-      assert(get_replace_ways(3) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=3: expected=1 actual=%d", get_replace_ways(3))
-      assert(get_replace_ways(4) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=4: expected=2 actual=%d", get_replace_ways(4))
-      assert(get_replace_ways(5) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=5: expected=2 actual=%d", get_replace_ways(5))
-      assert(get_replace_ways(6) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=6: expected=3 actual=%d", get_replace_ways(6))
-      assert(get_replace_ways(7) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=7: expected=3 actual=%d", get_replace_ways(7))
-      assert(get_next_states(0)(0) === 5.U(plru.nBits.W), s"get_next_state state=0 way=0: expected=5 actual=%d", get_next_states(0)(0))
-      assert(get_next_states(0)(1) === 4.U(plru.nBits.W), s"get_next_state state=0 way=1: expected=4 actual=%d", get_next_states(0)(1))
-      assert(get_next_states(0)(2) === 2.U(plru.nBits.W), s"get_next_state state=0 way=2: expected=2 actual=%d", get_next_states(0)(2))
-      assert(get_next_states(0)(3) === 0.U(plru.nBits.W), s"get_next_state state=0 way=3: expected=0 actual=%d", get_next_states(0)(3))
-      assert(get_next_states(1)(0) === 5.U(plru.nBits.W), s"get_next_state state=1 way=0: expected=5 actual=%d", get_next_states(1)(0))
-      assert(get_next_states(1)(1) === 4.U(plru.nBits.W), s"get_next_state state=1 way=1: expected=4 actual=%d", get_next_states(1)(1))
-      assert(get_next_states(1)(2) === 3.U(plru.nBits.W), s"get_next_state state=1 way=2: expected=3 actual=%d", get_next_states(1)(2))
-      assert(get_next_states(1)(3) === 1.U(plru.nBits.W), s"get_next_state state=1 way=3: expected=1 actual=%d", get_next_states(1)(3))
-      assert(get_next_states(2)(0) === 7.U(plru.nBits.W), s"get_next_state state=2 way=0: expected=7 actual=%d", get_next_states(2)(0))
-      assert(get_next_states(2)(1) === 6.U(plru.nBits.W), s"get_next_state state=2 way=1: expected=6 actual=%d", get_next_states(2)(1))
-      assert(get_next_states(2)(2) === 2.U(plru.nBits.W), s"get_next_state state=2 way=2: expected=2 actual=%d", get_next_states(2)(2))
-      assert(get_next_states(2)(3) === 0.U(plru.nBits.W), s"get_next_state state=2 way=3: expected=0 actual=%d", get_next_states(2)(3))
-      assert(get_next_states(3)(0) === 7.U(plru.nBits.W), s"get_next_state state=3 way=0: expected=7 actual=%d", get_next_states(3)(0))
-      assert(get_next_states(3)(1) === 6.U(plru.nBits.W), s"get_next_state state=3 way=1: expected=6 actual=%d", get_next_states(3)(1))
-      assert(get_next_states(3)(2) === 3.U(plru.nBits.W), s"get_next_state state=3 way=2: expected=3 actual=%d", get_next_states(3)(2))
-      assert(get_next_states(3)(3) === 1.U(plru.nBits.W), s"get_next_state state=3 way=3: expected=1 actual=%d", get_next_states(3)(3))
-      assert(get_next_states(4)(0) === 5.U(plru.nBits.W), s"get_next_state state=4 way=0: expected=5 actual=%d", get_next_states(4)(0))
-      assert(get_next_states(4)(1) === 4.U(plru.nBits.W), s"get_next_state state=4 way=1: expected=4 actual=%d", get_next_states(4)(1))
-      assert(get_next_states(4)(2) === 2.U(plru.nBits.W), s"get_next_state state=4 way=2: expected=2 actual=%d", get_next_states(4)(2))
-      assert(get_next_states(4)(3) === 0.U(plru.nBits.W), s"get_next_state state=4 way=3: expected=0 actual=%d", get_next_states(4)(3))
-      assert(get_next_states(5)(0) === 5.U(plru.nBits.W), s"get_next_state state=5 way=0: expected=5 actual=%d", get_next_states(5)(0))
-      assert(get_next_states(5)(1) === 4.U(plru.nBits.W), s"get_next_state state=5 way=1: expected=4 actual=%d", get_next_states(5)(1))
-      assert(get_next_states(5)(2) === 3.U(plru.nBits.W), s"get_next_state state=5 way=2: expected=3 actual=%d", get_next_states(5)(2))
-      assert(get_next_states(5)(3) === 1.U(plru.nBits.W), s"get_next_state state=5 way=3: expected=1 actual=%d", get_next_states(5)(3))
-      assert(get_next_states(6)(0) === 7.U(plru.nBits.W), s"get_next_state state=6 way=0: expected=7 actual=%d", get_next_states(6)(0))
-      assert(get_next_states(6)(1) === 6.U(plru.nBits.W), s"get_next_state state=6 way=1: expected=6 actual=%d", get_next_states(6)(1))
-      assert(get_next_states(6)(2) === 2.U(plru.nBits.W), s"get_next_state state=6 way=2: expected=2 actual=%d", get_next_states(6)(2))
-      assert(get_next_states(6)(3) === 0.U(plru.nBits.W), s"get_next_state state=6 way=3: expected=0 actual=%d", get_next_states(6)(3))
-      assert(get_next_states(7)(0) === 7.U(plru.nBits.W), s"get_next_state state=7 way=0: expected=7 actual=%d", get_next_states(7)(0))
-      assert(get_next_states(7)(1) === 6.U(plru.nBits.W), s"get_next_state state=7 way=5: expected=6 actual=%d", get_next_states(7)(1))
-      assert(get_next_states(7)(2) === 3.U(plru.nBits.W), s"get_next_state state=7 way=2: expected=3 actual=%d", get_next_states(7)(2))
-      assert(get_next_states(7)(3) === 1.U(plru.nBits.W), s"get_next_state state=7 way=3: expected=1 actual=%d", get_next_states(7)(3))
-    }
-    case 5 => {
-      assert(get_replace_ways( 0) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=00: expected=0 actual=%d", get_replace_ways( 0))
-      assert(get_replace_ways( 1) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=01: expected=1 actual=%d", get_replace_ways( 1))
-      assert(get_replace_ways( 2) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=02: expected=0 actual=%d", get_replace_ways( 2))
-      assert(get_replace_ways( 3) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=03: expected=1 actual=%d", get_replace_ways( 3))
-      assert(get_replace_ways( 4) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=04: expected=2 actual=%d", get_replace_ways( 4))
-      assert(get_replace_ways( 5) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=05: expected=2 actual=%d", get_replace_ways( 5))
-      assert(get_replace_ways( 6) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=06: expected=3 actual=%d", get_replace_ways( 6))
-      assert(get_replace_ways( 7) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=07: expected=3 actual=%d", get_replace_ways( 7))
-      assert(get_replace_ways( 8) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=08: expected=4 actual=%d", get_replace_ways( 8))
-      assert(get_replace_ways( 9) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=09: expected=4 actual=%d", get_replace_ways( 9))
-      assert(get_replace_ways(10) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=10: expected=4 actual=%d", get_replace_ways(10))
-      assert(get_replace_ways(11) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=11: expected=4 actual=%d", get_replace_ways(11))
-      assert(get_replace_ways(12) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=12: expected=4 actual=%d", get_replace_ways(12))
-      assert(get_replace_ways(13) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=13: expected=4 actual=%d", get_replace_ways(13))
-      assert(get_replace_ways(14) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=14: expected=4 actual=%d", get_replace_ways(14))
-      assert(get_replace_ways(15) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=15: expected=4 actual=%d", get_replace_ways(15))
-      assert(get_next_states( 0)(0) === 13.U(plru.nBits.W), s"get_next_state state=00 way=0: expected=13 actual=%d", get_next_states( 0)(0))
-      assert(get_next_states( 0)(1) === 12.U(plru.nBits.W), s"get_next_state state=00 way=1: expected=12 actual=%d", get_next_states( 0)(1))
-      assert(get_next_states( 0)(2) === 10.U(plru.nBits.W), s"get_next_state state=00 way=2: expected=10 actual=%d", get_next_states( 0)(2))
-      assert(get_next_states( 0)(3) ===  8.U(plru.nBits.W), s"get_next_state state=00 way=3: expected=08 actual=%d", get_next_states( 0)(3))
-      assert(get_next_states( 0)(4) ===  0.U(plru.nBits.W), s"get_next_state state=00 way=4: expected=00 actual=%d", get_next_states( 0)(4))
-      assert(get_next_states( 1)(0) === 13.U(plru.nBits.W), s"get_next_state state=01 way=0: expected=13 actual=%d", get_next_states( 1)(0))
-      assert(get_next_states( 1)(1) === 12.U(plru.nBits.W), s"get_next_state state=01 way=1: expected=12 actual=%d", get_next_states( 1)(1))
-      assert(get_next_states( 1)(2) === 11.U(plru.nBits.W), s"get_next_state state=01 way=2: expected=11 actual=%d", get_next_states( 1)(2))
-      assert(get_next_states( 1)(3) ===  9.U(plru.nBits.W), s"get_next_state state=01 way=3: expected=09 actual=%d", get_next_states( 1)(3))
-      assert(get_next_states( 1)(4) ===  1.U(plru.nBits.W), s"get_next_state state=01 way=4: expected=01 actual=%d", get_next_states( 1)(4))
-      assert(get_next_states( 2)(0) === 15.U(plru.nBits.W), s"get_next_state state=02 way=0: expected=15 actual=%d", get_next_states( 2)(0))
-      assert(get_next_states( 2)(1) === 14.U(plru.nBits.W), s"get_next_state state=02 way=1: expected=14 actual=%d", get_next_states( 2)(1))
-      assert(get_next_states( 2)(2) === 10.U(plru.nBits.W), s"get_next_state state=02 way=2: expected=10 actual=%d", get_next_states( 2)(2))
-      assert(get_next_states( 2)(3) ===  8.U(plru.nBits.W), s"get_next_state state=02 way=3: expected=08 actual=%d", get_next_states( 2)(3))
-      assert(get_next_states( 2)(4) ===  2.U(plru.nBits.W), s"get_next_state state=02 way=4: expected=02 actual=%d", get_next_states( 2)(4))
-      assert(get_next_states( 3)(0) === 15.U(plru.nBits.W), s"get_next_state state=03 way=0: expected=15 actual=%d", get_next_states( 3)(0))
-      assert(get_next_states( 3)(1) === 14.U(plru.nBits.W), s"get_next_state state=03 way=1: expected=14 actual=%d", get_next_states( 3)(1))
-      assert(get_next_states( 3)(2) === 11.U(plru.nBits.W), s"get_next_state state=03 way=2: expected=11 actual=%d", get_next_states( 3)(2))
-      assert(get_next_states( 3)(3) ===  9.U(plru.nBits.W), s"get_next_state state=03 way=3: expected=09 actual=%d", get_next_states( 3)(3))
-      assert(get_next_states( 3)(4) ===  3.U(plru.nBits.W), s"get_next_state state=03 way=4: expected=03 actual=%d", get_next_states( 3)(4))
-      assert(get_next_states( 4)(0) === 13.U(plru.nBits.W), s"get_next_state state=04 way=0: expected=13 actual=%d", get_next_states( 4)(0))
-      assert(get_next_states( 4)(1) === 12.U(plru.nBits.W), s"get_next_state state=04 way=1: expected=12 actual=%d", get_next_states( 4)(1))
-      assert(get_next_states( 4)(2) === 10.U(plru.nBits.W), s"get_next_state state=04 way=2: expected=10 actual=%d", get_next_states( 4)(2))
-      assert(get_next_states( 4)(3) ===  8.U(plru.nBits.W), s"get_next_state state=04 way=3: expected=08 actual=%d", get_next_states( 4)(3))
-      assert(get_next_states( 4)(4) ===  4.U(plru.nBits.W), s"get_next_state state=04 way=4: expected=04 actual=%d", get_next_states( 4)(4))
-      assert(get_next_states( 5)(0) === 13.U(plru.nBits.W), s"get_next_state state=05 way=0: expected=13 actual=%d", get_next_states( 5)(0))
-      assert(get_next_states( 5)(1) === 12.U(plru.nBits.W), s"get_next_state state=05 way=1: expected=12 actual=%d", get_next_states( 5)(1))
-      assert(get_next_states( 5)(2) === 11.U(plru.nBits.W), s"get_next_state state=05 way=2: expected=11 actual=%d", get_next_states( 5)(2))
-      assert(get_next_states( 5)(3) ===  9.U(plru.nBits.W), s"get_next_state state=05 way=3: expected=09 actual=%d", get_next_states( 5)(3))
-      assert(get_next_states( 5)(4) ===  5.U(plru.nBits.W), s"get_next_state state=05 way=4: expected=05 actual=%d", get_next_states( 5)(4))
-      assert(get_next_states( 6)(0) === 15.U(plru.nBits.W), s"get_next_state state=06 way=0: expected=15 actual=%d", get_next_states( 6)(0))
-      assert(get_next_states( 6)(1) === 14.U(plru.nBits.W), s"get_next_state state=06 way=1: expected=14 actual=%d", get_next_states( 6)(1))
-      assert(get_next_states( 6)(2) === 10.U(plru.nBits.W), s"get_next_state state=06 way=2: expected=10 actual=%d", get_next_states( 6)(2))
-      assert(get_next_states( 6)(3) ===  8.U(plru.nBits.W), s"get_next_state state=06 way=3: expected=08 actual=%d", get_next_states( 6)(3))
-      assert(get_next_states( 6)(4) ===  6.U(plru.nBits.W), s"get_next_state state=06 way=4: expected=06 actual=%d", get_next_states( 6)(4))
-      assert(get_next_states( 7)(0) === 15.U(plru.nBits.W), s"get_next_state state=07 way=0: expected=15 actual=%d", get_next_states( 7)(0))
-      assert(get_next_states( 7)(1) === 14.U(plru.nBits.W), s"get_next_state state=07 way=5: expected=14 actual=%d", get_next_states( 7)(1))
-      assert(get_next_states( 7)(2) === 11.U(plru.nBits.W), s"get_next_state state=07 way=2: expected=11 actual=%d", get_next_states( 7)(2))
-      assert(get_next_states( 7)(3) ===  9.U(plru.nBits.W), s"get_next_state state=07 way=3: expected=09 actual=%d", get_next_states( 7)(3))
-      assert(get_next_states( 7)(4) ===  7.U(plru.nBits.W), s"get_next_state state=07 way=4: expected=07 actual=%d", get_next_states( 7)(4))
-      assert(get_next_states( 8)(0) === 13.U(plru.nBits.W), s"get_next_state state=08 way=0: expected=13 actual=%d", get_next_states( 8)(0))
-      assert(get_next_states( 8)(1) === 12.U(plru.nBits.W), s"get_next_state state=08 way=1: expected=12 actual=%d", get_next_states( 8)(1))
-      assert(get_next_states( 8)(2) === 10.U(plru.nBits.W), s"get_next_state state=08 way=2: expected=10 actual=%d", get_next_states( 8)(2))
-      assert(get_next_states( 8)(3) ===  8.U(plru.nBits.W), s"get_next_state state=08 way=3: expected=08 actual=%d", get_next_states( 8)(3))
-      assert(get_next_states( 8)(4) ===  0.U(plru.nBits.W), s"get_next_state state=08 way=4: expected=00 actual=%d", get_next_states( 8)(4))
-      assert(get_next_states( 9)(0) === 13.U(plru.nBits.W), s"get_next_state state=09 way=0: expected=13 actual=%d", get_next_states( 9)(0))
-      assert(get_next_states( 9)(1) === 12.U(plru.nBits.W), s"get_next_state state=09 way=1: expected=12 actual=%d", get_next_states( 9)(1))
-      assert(get_next_states( 9)(2) === 11.U(plru.nBits.W), s"get_next_state state=09 way=2: expected=11 actual=%d", get_next_states( 9)(2))
-      assert(get_next_states( 9)(3) ===  9.U(plru.nBits.W), s"get_next_state state=09 way=3: expected=09 actual=%d", get_next_states( 9)(3))
-      assert(get_next_states( 9)(4) ===  1.U(plru.nBits.W), s"get_next_state state=09 way=4: expected=01 actual=%d", get_next_states( 9)(4))
-      assert(get_next_states(10)(0) === 15.U(plru.nBits.W), s"get_next_state state=10 way=0: expected=15 actual=%d", get_next_states(10)(0))
-      assert(get_next_states(10)(1) === 14.U(plru.nBits.W), s"get_next_state state=10 way=1: expected=14 actual=%d", get_next_states(10)(1))
-      assert(get_next_states(10)(2) === 10.U(plru.nBits.W), s"get_next_state state=10 way=2: expected=10 actual=%d", get_next_states(10)(2))
-      assert(get_next_states(10)(3) ===  8.U(plru.nBits.W), s"get_next_state state=10 way=3: expected=08 actual=%d", get_next_states(10)(3))
-      assert(get_next_states(10)(4) ===  2.U(plru.nBits.W), s"get_next_state state=10 way=4: expected=02 actual=%d", get_next_states(10)(4))
-      assert(get_next_states(11)(0) === 15.U(plru.nBits.W), s"get_next_state state=11 way=0: expected=15 actual=%d", get_next_states(11)(0))
-      assert(get_next_states(11)(1) === 14.U(plru.nBits.W), s"get_next_state state=11 way=1: expected=14 actual=%d", get_next_states(11)(1))
-      assert(get_next_states(11)(2) === 11.U(plru.nBits.W), s"get_next_state state=11 way=2: expected=11 actual=%d", get_next_states(11)(2))
-      assert(get_next_states(11)(3) ===  9.U(plru.nBits.W), s"get_next_state state=11 way=3: expected=09 actual=%d", get_next_states(11)(3))
-      assert(get_next_states(11)(4) ===  3.U(plru.nBits.W), s"get_next_state state=11 way=4: expected=03 actual=%d", get_next_states(11)(4))
-      assert(get_next_states(12)(0) === 13.U(plru.nBits.W), s"get_next_state state=12 way=0: expected=13 actual=%d", get_next_states(12)(0))
-      assert(get_next_states(12)(1) === 12.U(plru.nBits.W), s"get_next_state state=12 way=1: expected=12 actual=%d", get_next_states(12)(1))
-      assert(get_next_states(12)(2) === 10.U(plru.nBits.W), s"get_next_state state=12 way=2: expected=10 actual=%d", get_next_states(12)(2))
-      assert(get_next_states(12)(3) ===  8.U(plru.nBits.W), s"get_next_state state=12 way=3: expected=08 actual=%d", get_next_states(12)(3))
-      assert(get_next_states(12)(4) ===  4.U(plru.nBits.W), s"get_next_state state=12 way=4: expected=04 actual=%d", get_next_states(12)(4))
-      assert(get_next_states(13)(0) === 13.U(plru.nBits.W), s"get_next_state state=13 way=0: expected=13 actual=%d", get_next_states(13)(0))
-      assert(get_next_states(13)(1) === 12.U(plru.nBits.W), s"get_next_state state=13 way=1: expected=12 actual=%d", get_next_states(13)(1))
-      assert(get_next_states(13)(2) === 11.U(plru.nBits.W), s"get_next_state state=13 way=2: expected=11 actual=%d", get_next_states(13)(2))
-      assert(get_next_states(13)(3) ===  9.U(plru.nBits.W), s"get_next_state state=13 way=3: expected=09 actual=%d", get_next_states(13)(3))
-      assert(get_next_states(13)(4) ===  5.U(plru.nBits.W), s"get_next_state state=13 way=4: expected=05 actual=%d", get_next_states(13)(4))
-      assert(get_next_states(14)(0) === 15.U(plru.nBits.W), s"get_next_state state=14 way=0: expected=15 actual=%d", get_next_states(14)(0))
-      assert(get_next_states(14)(1) === 14.U(plru.nBits.W), s"get_next_state state=14 way=1: expected=14 actual=%d", get_next_states(14)(1))
-      assert(get_next_states(14)(2) === 10.U(plru.nBits.W), s"get_next_state state=14 way=2: expected=10 actual=%d", get_next_states(14)(2))
-      assert(get_next_states(14)(3) ===  8.U(plru.nBits.W), s"get_next_state state=14 way=3: expected=08 actual=%d", get_next_states(14)(3))
-      assert(get_next_states(14)(4) ===  6.U(plru.nBits.W), s"get_next_state state=14 way=4: expected=06 actual=%d", get_next_states(14)(4))
-      assert(get_next_states(15)(0) === 15.U(plru.nBits.W), s"get_next_state state=15 way=0: expected=15 actual=%d", get_next_states(15)(0))
-      assert(get_next_states(15)(1) === 14.U(plru.nBits.W), s"get_next_state state=15 way=5: expected=14 actual=%d", get_next_states(15)(1))
-      assert(get_next_states(15)(2) === 11.U(plru.nBits.W), s"get_next_state state=15 way=2: expected=11 actual=%d", get_next_states(15)(2))
-      assert(get_next_states(15)(3) ===  9.U(plru.nBits.W), s"get_next_state state=15 way=3: expected=09 actual=%d", get_next_states(15)(3))
-      assert(get_next_states(15)(4) ===  7.U(plru.nBits.W), s"get_next_state state=15 way=4: expected=07 actual=%d", get_next_states(15)(4))
-    }
-    case 6 => {
-      assert(get_replace_ways( 0) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=00: expected=0 actual=%d", get_replace_ways( 0))
-      assert(get_replace_ways( 1) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=01: expected=1 actual=%d", get_replace_ways( 1))
-      assert(get_replace_ways( 2) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=02: expected=0 actual=%d", get_replace_ways( 2))
-      assert(get_replace_ways( 3) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=03: expected=1 actual=%d", get_replace_ways( 3))
-      assert(get_replace_ways( 4) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=04: expected=2 actual=%d", get_replace_ways( 4))
-      assert(get_replace_ways( 5) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=05: expected=2 actual=%d", get_replace_ways( 5))
-      assert(get_replace_ways( 6) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=06: expected=3 actual=%d", get_replace_ways( 6))
-      assert(get_replace_ways( 7) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=07: expected=3 actual=%d", get_replace_ways( 7))
-      assert(get_replace_ways( 8) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=08: expected=0 actual=%d", get_replace_ways( 8))
-      assert(get_replace_ways( 9) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=09: expected=1 actual=%d", get_replace_ways( 9))
-      assert(get_replace_ways(10) === 0.U(log2Ceil(n_ways).W), s"get_replace_way state=10: expected=0 actual=%d", get_replace_ways(10))
-      assert(get_replace_ways(11) === 1.U(log2Ceil(n_ways).W), s"get_replace_way state=11: expected=1 actual=%d", get_replace_ways(11))
-      assert(get_replace_ways(12) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=12: expected=2 actual=%d", get_replace_ways(12))
-      assert(get_replace_ways(13) === 2.U(log2Ceil(n_ways).W), s"get_replace_way state=13: expected=2 actual=%d", get_replace_ways(13))
-      assert(get_replace_ways(14) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=14: expected=3 actual=%d", get_replace_ways(14))
-      assert(get_replace_ways(15) === 3.U(log2Ceil(n_ways).W), s"get_replace_way state=15: expected=3 actual=%d", get_replace_ways(15))
-      assert(get_replace_ways(16) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=16: expected=4 actual=%d", get_replace_ways(16))
-      assert(get_replace_ways(17) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=17: expected=4 actual=%d", get_replace_ways(17))
-      assert(get_replace_ways(18) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=18: expected=4 actual=%d", get_replace_ways(18))
-      assert(get_replace_ways(19) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=19: expected=4 actual=%d", get_replace_ways(19))
-      assert(get_replace_ways(20) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=20: expected=4 actual=%d", get_replace_ways(20))
-      assert(get_replace_ways(21) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=21: expected=4 actual=%d", get_replace_ways(21))
-      assert(get_replace_ways(22) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=22: expected=4 actual=%d", get_replace_ways(22))
-      assert(get_replace_ways(23) === 4.U(log2Ceil(n_ways).W), s"get_replace_way state=23: expected=4 actual=%d", get_replace_ways(23))
-      assert(get_replace_ways(24) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=24: expected=5 actual=%d", get_replace_ways(24))
-      assert(get_replace_ways(25) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=25: expected=5 actual=%d", get_replace_ways(25))
-      assert(get_replace_ways(26) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=26: expected=5 actual=%d", get_replace_ways(26))
-      assert(get_replace_ways(27) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=27: expected=5 actual=%d", get_replace_ways(27))
-      assert(get_replace_ways(28) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=28: expected=5 actual=%d", get_replace_ways(28))
-      assert(get_replace_ways(29) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=29: expected=5 actual=%d", get_replace_ways(29))
-      assert(get_replace_ways(30) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=30: expected=5 actual=%d", get_replace_ways(30))
-      assert(get_replace_ways(31) === 5.U(log2Ceil(n_ways).W), s"get_replace_way state=31: expected=5 actual=%d", get_replace_ways(31))
-    }
-    case _ => throw new IllegalArgumentException(s"no test pattern found for n_ways=$n_ways")
-  }
 }
\ No newline at end of file

From 8bda8692eccc7522cbe09086c1f45a44b9350e52 Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Sat, 3 Jun 2023 20:52:17 +0800
Subject: [PATCH 25/32] PTW: migration and introduce dependencies

---
 rocket/src/CSR.scala                 | 140 +++++
 rocket/src/ConstInCSR.scala          |  51 +-
 rocket/src/CustomCSRs.scala          |  63 ++
 rocket/src/CustomInstructions.scala  |  19 -
 rocket/src/PTW.scala                 | 855 +++++++++++++++++++++++++++
 rocket/src/TLB.scala                 |  13 +-
 rocket/src/util/AddressDecoder.scala |   4 +-
 rocket/src/util/Annotations.scala    | 298 ++++++++++
 rocket/src/util/ClockGate.scala      |  53 ++
 rocket/src/util/DescribedSRAM.scala  |  40 ++
 10 files changed, 1460 insertions(+), 76 deletions(-)
 create mode 100644 rocket/src/CSR.scala
 create mode 100644 rocket/src/CustomCSRs.scala
 create mode 100644 rocket/src/PTW.scala
 create mode 100644 rocket/src/util/Annotations.scala
 create mode 100644 rocket/src/util/ClockGate.scala
 create mode 100644 rocket/src/util/DescribedSRAM.scala

diff --git a/rocket/src/CSR.scala b/rocket/src/CSR.scala
new file mode 100644
index 000000000..947f7974f
--- /dev/null
+++ b/rocket/src/CSR.scala
@@ -0,0 +1,140 @@
+// See LICENSE.SiFive for license details.
+// See LICENSE.Berkeley for license details.
+
+package org.chipsalliance.rocket
+
+import chisel3._
+import chisel3.util._
+
+object CSR
+{
+  // commands
+  val SZ = 3
+  def X = BitPat.dontCare(SZ)
+  def N = 0.U(SZ.W)
+  def R = 2.U(SZ.W)
+  def I = 4.U(SZ.W)
+  def W = 5.U(SZ.W)
+  def S = 6.U(SZ.W)
+  def C = 7.U(SZ.W)
+
+  // mask a CSR cmd with a valid bit
+  def maskCmd(valid: Bool, cmd: UInt): UInt = {
+    // all commands less than CSR.I are treated by CSRFile as NOPs
+    cmd & ~Mux(valid, 0.U, CSR.I)
+  }
+
+  val ADDRSZ = 12
+
+  def modeLSB: Int = 8
+  def mode(addr: Int): Int = (addr >> modeLSB) % (1 << PRV.SZ)
+  def mode(addr: UInt): UInt = addr(modeLSB + PRV.SZ - 1, modeLSB)
+
+  def busErrorIntCause = 128
+  def debugIntCause = 14 // keep in sync with MIP.debug
+  def debugTriggerCause = {
+    val res = debugIntCause
+    require(!(Causes.all contains res))
+    res
+  }
+  def rnmiIntCause = 13  // NMI: Higher numbers = higher priority, must not reuse debugIntCause
+  def rnmiBEUCause = 12
+
+  val firstCtr = CSRs.cycle
+  val firstCtrH = CSRs.cycleh
+  val firstHPC = CSRs.hpmcounter3
+  val firstHPCH = CSRs.hpmcounter3h
+  val firstHPE = CSRs.mhpmevent3
+  val firstMHPC = CSRs.mhpmcounter3
+  val firstMHPCH = CSRs.mhpmcounter3h
+  val firstHPM = 3
+  val nCtr = 32
+  val nHPM = nCtr - firstHPM
+  val hpmWidth = 40
+
+  val maxPMPs = 16
+}
+
+
+class MStatus extends Bundle {
+  // not truly part of mstatus, but convenient
+  val debug = Bool()
+  val cease = Bool()
+  val wfi = Bool()
+  val isa = UInt(32.W)
+
+  val dprv = UInt(PRV.SZ.W) // effective prv for data accesses
+  val dv = Bool() // effective v for data accesses
+  val prv = UInt(PRV.SZ.W)
+  val v = Bool()
+
+  val sd = Bool()
+  val zero2 = UInt(23.W)
+  val mpv = Bool()
+  val gva = Bool()
+  val mbe = Bool()
+  val sbe = Bool()
+  val sxl = UInt(2.W)
+  val uxl = UInt(2.W)
+  val sd_rv32 = Bool()
+  val zero1 = UInt(8.W)
+  val tsr = Bool()
+  val tw = Bool()
+  val tvm = Bool()
+  val mxr = Bool()
+  val sum = Bool()
+  val mprv = Bool()
+  val xs = UInt(2.W)
+  val fs = UInt(2.W)
+  val mpp = UInt(2.W)
+  val vs = UInt(2.W)
+  val spp = UInt(1.W)
+  val mpie = Bool()
+  val ube = Bool()
+  val spie = Bool()
+  val upie = Bool()
+  val mie = Bool()
+  val hie = Bool()
+  val sie = Bool()
+  val uie = Bool()
+}
+
+class HStatus extends Bundle {
+  val zero6 = UInt(30.W)
+  val vsxl = UInt(2.W)
+  val zero5 = UInt(9.W)
+  val vtsr = Bool()
+  val vtw = Bool()
+  val vtvm = Bool()
+  val zero3 = UInt(2.W)
+  val vgein = UInt(6.W)
+  val zero2 = UInt(2.W)
+  val hu = Bool()
+  val spvp = Bool()
+  val spv = Bool()
+  val gva = Bool()
+  val vsbe = Bool()
+  val zero1 = UInt(5.W)
+}
+
+class PTBR(
+    xLen: Int, 
+    pgLevels: Int,
+    minPgLevels: Int,
+    maxPAddrBits: Int,
+    pgIdxBits: Int) extends Bundle {
+  def additionalPgLevels = mode(log2Ceil(pgLevels-minPgLevels+1)-1, 0)
+  def pgLevelsToMode(i: Int) = (xLen, i) match {
+    case (32, 2) => 1
+    case (64, x) if x >= 3 && x <= 6 => x + 5
+  }
+  val (modeBits, maxASIdBits) = xLen match {
+    case 32 => (1, 9)
+    case 64 => (4, 16)
+  }
+  require(modeBits + maxASIdBits + maxPAddrBits - pgIdxBits == xLen)
+
+  val mode = UInt(modeBits.W)
+  val asid = UInt(maxASIdBits.W)
+  val ppn = UInt((maxPAddrBits - pgIdxBits).W)
+}
\ No newline at end of file
diff --git a/rocket/src/ConstInCSR.scala b/rocket/src/ConstInCSR.scala
index b2e67930e..576edb669 100644
--- a/rocket/src/ConstInCSR.scala
+++ b/rocket/src/ConstInCSR.scala
@@ -14,53 +14,4 @@ object PRV
   val S = 1
   val H = 2
   val M = 3
-}
-
-object CSR
-{
-  // commands
-  val SZ = 3
-  def X = BitPat.dontCare(SZ)
-  def N = 0.U(SZ.W)
-  def R = 2.U(SZ.W)
-  def I = 4.U(SZ.W)
-  def W = 5.U(SZ.W)
-  def S = 6.U(SZ.W)
-  def C = 7.U(SZ.W)
-
-  // mask a CSR cmd with a valid bit
-  def maskCmd(valid: Bool, cmd: UInt): UInt = {
-    // all commands less than CSR.I are treated by CSRFile as NOPs
-    cmd & ~Mux(valid, 0.U, CSR.I)
-  }
-
-  val ADDRSZ = 12
-
-  def modeLSB: Int = 8
-  def mode(addr: Int): Int = (addr >> modeLSB) % (1 << PRV.SZ)
-  def mode(addr: UInt): UInt = addr(modeLSB + PRV.SZ - 1, modeLSB)
-
-  def busErrorIntCause = 128
-  def debugIntCause = 14 // keep in sync with MIP.debug
-  def debugTriggerCause = {
-    val res = debugIntCause
-    require(!(Causes.all contains res))
-    res
-  }
-  def rnmiIntCause = 13  // NMI: Higher numbers = higher priority, must not reuse debugIntCause
-  def rnmiBEUCause = 12
-
-  val firstCtr = CSRs.cycle
-  val firstCtrH = CSRs.cycleh
-  val firstHPC = CSRs.hpmcounter3
-  val firstHPCH = CSRs.hpmcounter3h
-  val firstHPE = CSRs.mhpmevent3
-  val firstMHPC = CSRs.mhpmcounter3
-  val firstMHPCH = CSRs.mhpmcounter3h
-  val firstHPM = 3
-  val nCtr = 32
-  val nHPM = nCtr - firstHPM
-  val hpmWidth = 40
-
-  val maxPMPs = 16
-}
+}
\ No newline at end of file
diff --git a/rocket/src/CustomCSRs.scala b/rocket/src/CustomCSRs.scala
new file mode 100644
index 000000000..1e74c3aa8
--- /dev/null
+++ b/rocket/src/CustomCSRs.scala
@@ -0,0 +1,63 @@
+// See LICENSE.SiFive for license details.
+
+package org.chipsalliance.rocket
+
+import chisel3._
+
+case class CustomCSR(id: Int, mask: BigInt, init: Option[BigInt])
+
+object CustomCSRs {
+  val mnscratch = 0x350
+  val mnepc = 0x351
+  val mncause = 0x352
+  val mnstatus = 0x353
+  val all = {
+    val res = collection.mutable.ArrayBuffer[Int]()
+    res += mnscratch
+    res += mnepc
+    res += mncause
+    res += mnstatus
+    res.toArray
+  }
+  val all32 = {
+    val res = collection.mutable.ArrayBuffer(all:_*)
+    res.toArray
+  }
+}
+
+class CustomCSRIO(xLen: Int) extends Bundle {
+  val wen = Bool()
+  val wdata = UInt(xLen.W)
+  val value = UInt(xLen.W)
+}
+
+class CustomCSRs(xLen: Int) extends Bundle {
+  // Not all cores have these CSRs, but those that do should follow the same
+  // numbering conventions.  So we list them here but default them to None.
+  protected def bpmCSRId = 0x7c0
+  protected def bpmCSR: Option[CustomCSR] = None
+
+  protected def chickenCSRId = 0x7c1
+  protected def chickenCSR: Option[CustomCSR] = None
+
+  // If you override this, you'll want to concatenate super.decls
+  def decls: Seq[CustomCSR] = bpmCSR.toSeq ++ chickenCSR
+
+  val csrs = Vec(decls.size, new CustomCSRIO(xLen))
+
+  def flushBTB = getOrElse(bpmCSR, _.wen, false.B)
+  def bpmStatic = getOrElse(bpmCSR, _.value(0), false.B)
+  def disableDCacheClockGate = getOrElse(chickenCSR, _.value(0), false.B)
+  def disableICacheClockGate = getOrElse(chickenCSR, _.value(1), false.B)
+  def disableCoreClockGate = getOrElse(chickenCSR, _.value(2), false.B)
+  def disableSpeculativeICacheRefill = getOrElse(chickenCSR, _.value(3), false.B)
+  def suppressCorruptOnGrantData = getOrElse(chickenCSR, _.value(9), false.B)
+
+  protected def getByIdOrElse[T](id: Int, f: CustomCSRIO => T, alt: T): T = {
+    val idx = decls.indexWhere(_.id == id)
+    if (idx < 0) alt else f(csrs(idx))
+  }
+
+  protected def getOrElse[T](csr: Option[CustomCSR], f: CustomCSRIO => T, alt: T): T =
+    csr.map(c => getByIdOrElse(c.id, f, alt)).getOrElse(alt)
+}
diff --git a/rocket/src/CustomInstructions.scala b/rocket/src/CustomInstructions.scala
index f4770184c..9411f9229 100644
--- a/rocket/src/CustomInstructions.scala
+++ b/rocket/src/CustomInstructions.scala
@@ -35,22 +35,3 @@ object CustomInstructions {
   def CUSTOM3_RD_RS1     = BitPat("b?????????????????110?????1111011")
   def CUSTOM3_RD_RS1_RS2 = BitPat("b?????????????????111?????1111011")
 }
-
-object CustomCSRs {
-  val mnscratch = 0x350
-  val mnepc = 0x351
-  val mncause = 0x352
-  val mnstatus = 0x353
-  val all = {
-    val res = collection.mutable.ArrayBuffer[Int]()
-    res += mnscratch
-    res += mnepc
-    res += mncause
-    res += mnstatus
-    res.toArray
-  }
-  val all32 = {
-    val res = collection.mutable.ArrayBuffer(all:_*)
-    res.toArray
-  }
-}
diff --git a/rocket/src/PTW.scala b/rocket/src/PTW.scala
new file mode 100644
index 000000000..bbcaa4363
--- /dev/null
+++ b/rocket/src/PTW.scala
@@ -0,0 +1,855 @@
+// See LICENSE.Berkeley for license details.
+// See LICENSE.SiFive for license details.
+
+package org.chipsalliance.rocket
+
+import chisel3._
+import chisel3.util.{Arbiter, Cat, Decoupled, Enum, Mux1H, OHToUInt, PopCount, PriorityEncoder, PriorityEncoderOH, RegEnable, UIntToOH, Valid, is, isPow2, log2Ceil, switch}
+import chisel3.withClock
+import chisel3.internal.sourceinfo.SourceInfo
+import org.chipsalliance.rocket.util._
+
+import scala.collection.mutable.ListBuffer
+
+/** PTE request from TLB to PTW
+  *
+  * TLB send a PTE request to PTW when L1TLB miss
+  */
+class PTWReq(vpnBits: Int) extends Bundle {
+  val addr = UInt(vpnBits.W)
+  val need_gpa = Bool()
+  val vstage1 = Bool()
+  val stage2 = Bool()
+}
+
+/** PTE info from L2TLB to TLB
+  *
+  * containing: target PTE, exceptions, two-satge tanslation info
+  */
+class PTWResp(pgLevels: Int, vaddrBits: Int) extends Bundle {
+  /** ptw access exception */
+  val ae_ptw = Bool()
+  /** final access exception */
+  val ae_final = Bool()
+  /** page fault */
+  val pf = Bool()
+  /** guest page fault */
+  val gf = Bool()
+  /** hypervisor read */
+  val hr = Bool()
+  /** hypervisor write */
+  val hw = Bool()
+  /** hypervisor execute */
+  val hx = Bool()
+  /** PTE to refill L1TLB
+    *
+    * source: L2TLB
+    */
+  val pte = new PTE
+  /** pte pglevel */
+  val level = UInt(log2Ceil(pgLevels).W)
+  /** fragmented_superpage support */
+  val fragmented_superpage = Bool()
+  /** homogeneous for both pma and pmp  */
+  val homogeneous = Bool()
+  val gpa = Valid(UInt(vaddrBits.W))
+  val gpa_is_pte = Bool()
+}
+
+/** IO between TLB and PTW
+  *
+  * PTW receives :
+  *   - PTE request
+  *   - CSRs info
+  *   - pmp results from PMP(in TLB)
+  */
+class TLBPTWIO(
+  xLen: Int,
+  vpnBits: Int, 
+  pgLevels: Int,
+  minPgLevels: Int,
+  pgLevelBits: Int,
+  maxPAddrBits: Int,
+  pgIdxBits: Int,
+  vaddrBits: Int,
+  paddrBits: Int,
+  pmpGranularity: Int,
+  nPMPs: Int,
+  customCSRsParam: CustomCSRs
+) extends Bundle {
+  val req = Decoupled(Valid(new PTWReq(vpnBits)))
+  val resp = Flipped(Valid(new PTWResp(pgLevels, vaddrBits)))
+  val ptbr = Input(new PTBR(xLen, pgLevels, minPgLevels, maxPAddrBits, pgIdxBits))
+  val hgatp = Input(new PTBR(xLen, pgLevels, minPgLevels, maxPAddrBits, pgIdxBits))
+  val vsatp = Input(new PTBR(xLen, pgLevels, minPgLevels, maxPAddrBits, pgIdxBits))
+  val status = Input(new MStatus())
+  val hstatus = Input(new HStatus())
+  val gstatus = Input(new MStatus())
+  val pmp = Input(Vec(nPMPs, new PMP(paddrBits, pmpGranularity, pgIdxBits, pgLevels, pgLevelBits)))
+  val customCSRs = Input(customCSRsParam)
+}
+/** PTW performance statistics */
+class PTWPerfEvents extends Bundle {
+  val l2miss = Bool()
+  val l2hit = Bool()
+  val pte_miss = Bool()
+  val pte_hit = Bool()
+}
+
+/** Datapath IO between PTW and Core
+  *
+  * PTW receives CSRs info, pmp checks, sfence instruction info
+  *
+  * PTW sends its performance statistics to core
+  */
+class DatapathPTWIO(
+  xLen: Int,
+  pgLevels: Int,
+  pgLevelBits: Int,
+  minPgLevels: Int,
+  maxPAddrBits: Int,
+  pgIdxBits: Int,
+  vaddrBits: Int,
+  paddrBits: Int,
+  asIdBits: Int,
+  pmpGranularity: Int,
+  nPMPs: Int,
+  customCSRsParam: CustomCSRs
+) extends Bundle {
+  val ptbr = Input(new PTBR(xLen, pgLevels, minPgLevels, maxPAddrBits, pgIdxBits))
+  val hgatp = Input(new PTBR(xLen, pgLevels, minPgLevels, maxPAddrBits, pgIdxBits))
+  val vsatp = Input(new PTBR(xLen, pgLevels, minPgLevels, maxPAddrBits, pgIdxBits))
+  val sfence = Flipped(Valid(new SFenceReq(vaddrBits, asIdBits)))
+  val status = Input(new MStatus())
+  val hstatus = Input(new HStatus())
+  val gstatus = Input(new MStatus())
+  val pmp = Input(Vec(nPMPs, new PMP(paddrBits, pmpGranularity, pgIdxBits, pgLevels, pgLevelBits)))
+  val perf = Output(new PTWPerfEvents())
+  val customCSRs = Input(customCSRsParam)
+  /** enable clock generated by ptw */
+  val clock_enabled = Output(Bool())
+}
+/** PTE template for transmission
+  *
+  * contains useful methods to check PTE attributes
+  * @see RV-priv spec 4.3.1 for pgae table entry format
+  */
+class PTE extends Bundle {
+  val reserved_for_future = UInt(10.W)
+  val ppn = UInt(44.W)
+  val reserved_for_software = Bits(2.W)
+  /** dirty bit */
+  val d = Bool()
+  /** access bit */
+  val a = Bool()
+  /** global mapping */
+  val g = Bool()
+  /** user mode accessible */
+  val u = Bool()
+  /** whether the page is executable */
+  val x = Bool()
+  /** whether the page is writable */
+  val w = Bool()
+  /** whether the page is readable */
+  val r = Bool()
+  /** valid bit */
+  val v = Bool()
+  /** return true if find a pointer to next level page table */
+  def table(dummy: Int = 0) = v && !r && !w && !x && !d && !a && !u && reserved_for_future === 0.U
+  /** return true if find a leaf PTE */
+  def leaf(dummy: Int = 0) = v && (r || (x && !w)) && a
+  /** user read */
+  def ur(dummy: Int = 0) = sr() && u
+  /** user write*/
+  def uw(dummy: Int = 0) = sw() && u
+  /** user execute */
+  def ux(dummy: Int = 0) = sx() && u
+  /** supervisor read */
+  def sr(dummy: Int = 0) = leaf() && r
+  /** supervisor write */
+  def sw(dummy: Int = 0) = leaf() && w && d
+  /** supervisor execute */
+  def sx(dummy: Int = 0) = leaf() && x
+  /** full permission: writable and executable in user mode */
+  def isFullPerm(dummy: Int = 0) = uw() && ux()
+}
+
+/** L2TLB PTE template
+  *
+  * contains tag bits
+  * @param nSets number of sets in L2TLB
+  * @see RV-priv spec 4.3.1 for page table entry format
+  */
+class L2TLBEntry(
+  nSets: Int,
+  maxSVAddrBits: Int,
+  pgIdxBits: Int,
+  ppnBits: Int,
+  usingHypervisor: Boolean,
+) extends Bundle {
+  val idxBits = log2Ceil(nSets)
+  val tagBits = maxSVAddrBits - pgIdxBits - idxBits + (if (usingHypervisor) 1 else 0)
+  val tag = UInt(tagBits.W)
+  val ppn = UInt(ppnBits.W)
+  /** dirty bit */
+  val d = Bool()
+  /** access bit */
+  val a = Bool()
+  /** user mode accessible */
+  val u = Bool()
+  /** whether the page is executable */
+  val x = Bool()
+  /** whether the page is writable */
+  val w = Bool()
+  /** whether the page is readable */
+  val r = Bool()
+
+}
+/** PTW contains L2TLB, and performs page table walk for high level TLB, and cache queries from L1 TLBs(I$, D$, RoCC)
+  *
+  * It performs hierarchy page table query to mem for the desired leaf PTE and cache them in l2tlb.
+  * Besides leaf PTEs, it also caches non-leaf PTEs in pte_cache to accerlerate the process.
+  *
+  * ==Structure==
+  *  - l2tlb : for leaf PTEs
+  *   - set-associative (configurable with [[CoreParams.nL2TLBEntries]]and [[CoreParams.nL2TLBWays]]))
+  *   - PLRU
+  *  - pte_cache: for non-leaf PTEs
+  *   - set-associative
+  *   - LRU
+  *  - s2_pte_cache: for non-leaf PTEs in 2-stage translation
+  *   - set-associative
+  *   - PLRU
+  *
+  * l2tlb Pipeline: 3 stage
+  * {{{
+  * stage 0 : read
+  * stage 1 : decode
+  * stage 2 : hit check
+  * }}}
+  * ==State Machine==
+  * s_ready: ready to reveive request from TLB
+  * s_req: request mem; pte_cache hit judge
+  * s_wait1: deal with l2tlb error
+  * s_wait2: final hit judge
+  * s_wait3: receive mem response
+  * s_fragment_superpage: for superpage PTE
+  *
+  * @note l2tlb hit happens in s_req or s_wait1
+  * @see RV-priv spec 4.3-4.6 for Virtual-Memory System
+  * @see RV-priv spec 8.5 for Two-Stage Address Translation
+  * @todo details in two-stage translation
+  */
+class PTW(
+  n: Int,
+  xLen: Int,
+  vpnBits: Int, 
+  ppnBits: Int,
+  pgLevels: Int,
+  minPgLevels: Int,
+  pgLevelBits: Int,
+  maxPAddrBits: Int,
+  pgIdxBits: Int,
+  vaddrBits: Int,
+  paddrBits: Int,
+  asIdBits: Int,
+  pmpGranularity: Int,
+  nPMPs: Int,
+  nPTECacheEntries: Int,
+  nL2TLBEntries: Int,
+  nL2TLBWays: Int,
+  hypervisorExtraAddrBits: Int,
+  maxHypervisorExtraAddrBits: Int,
+  customCSRsParam: CustomCSRs,
+  clockGate: Boolean,
+  usingVM: Boolean,
+  usingHypervisor:Boolean
+) extends Module {
+  val io = IO(new Bundle {
+    /** to n TLB */
+    val requestor = Flipped(Vec(
+      n,
+      new TLBPTWIO(
+        xLen, vpnBits, pgLevels, minPgLevels, pgLevelBits, maxPAddrBits,
+        pgIdxBits, vaddrBits, paddrBits, pmpGranularity, nPMPs, customCSRsParam
+      )
+    ))
+    /** to HellaCache */
+    val mem = new HellaCacheIO
+    /** to Core
+      *
+      * contains CSRs info and performance statistics
+      */
+    val dpath = new DatapathPTWIO(
+      xLen, pgLevels, pgLevelBits, minPgLevels, maxPAddrBits, pgIdxBits,
+      vaddrBits, paddrBits, asIdBits, pmpGranularity, nPMPs, customCSRsParam
+    )
+  })
+
+  val s_ready :: s_req :: s_wait1 :: s_dummy1 :: s_wait2 :: s_wait3 :: s_dummy2 :: s_fragment_superpage :: Nil = Enum(8)
+  val state = RegInit(s_ready)
+  val l2_refill_wire = Wire(Bool())
+  /** Arbiter to arbite request from n TLB */
+  val arb = Module(new Arbiter(Valid(new PTWReq(vpnBits)), n))
+  // use TLB req as arbitor's input
+  arb.io.in <> io.requestor.map(_.req)
+  // receive req only when s_ready and not in refill
+  arb.io.out.ready := (state === s_ready) && !l2_refill_wire
+
+  val resp_valid = RegNext(VecInit(Seq.fill(io.requestor.size)(false.B)))
+
+  val clock_en = state =/= s_ready || l2_refill_wire || arb.io.out.valid || io.dpath.sfence.valid || io.dpath.customCSRs.disableDCacheClockGate
+  io.dpath.clock_enabled := usingVM.B && clock_en
+  val gated_clock =
+    if (!usingVM || !clockGate) clock
+    else ClockGate(clock, clock_en, "ptw_clock_gate")
+  withClock (gated_clock) { // entering gated-clock domain
+
+  val invalidated = Reg(Bool())
+  /** current PTE level
+    * {{{
+    * 0 <= count <= pgLevel-1
+    * count = pgLevel - 1 : leaf PTE
+    * count < pgLevel - 1 : non-leaf PTE
+    * }}}
+    */
+  val count = Reg(UInt(log2Ceil(pgLevels).W))
+  val resp_ae_ptw = Reg(Bool())
+  val resp_ae_final = Reg(Bool())
+  val resp_pf = Reg(Bool())
+  val resp_gf = Reg(Bool())
+  val resp_hr = Reg(Bool())
+  val resp_hw = Reg(Bool())
+  val resp_hx = Reg(Bool())
+  val resp_fragmented_superpage = Reg(Bool())
+
+  /** tlb request */
+  val r_req = Reg(new PTWReq(vpnBits))
+  /** current selected way in arbitor */
+  val r_req_dest = Reg(Bits())
+  // to respond to L1TLB : l2_hit
+  // to construct mem.req.addr
+  val r_pte = Reg(new PTE)
+  val r_hgatp = Reg(new PTBR(xLen, pgLevels, minPgLevels, maxPAddrBits, pgIdxBits))
+  // 2-stage pageLevel
+  val aux_count = Reg(UInt(log2Ceil(pgLevels).W))
+  /** pte for 2-stage translation */
+  val aux_pte = Reg(new PTE)
+  val aux_ppn_hi = Option.when(pgLevels > 4 && r_req.addr.getWidth > aux_pte.ppn.getWidth)(Reg(UInt((r_req.addr.getWidth - aux_pte.ppn.getWidth).W)))
+  val gpa_pgoff = Reg(UInt(pgIdxBits.W)) // only valid in resp_gf case
+  val stage2 = Reg(Bool())
+  val stage2_final = Reg(Bool())
+
+  val satp = Mux(arb.io.out.bits.bits.vstage1, io.dpath.vsatp, io.dpath.ptbr)
+  val r_hgatp_initial_count = pgLevels.U - minPgLevels.U - r_hgatp.additionalPgLevels
+  /** 2-stage translation both enable */
+  val do_both_stages = r_req.vstage1 && r_req.stage2
+  val max_count = count max aux_count
+  val vpn = Mux(r_req.vstage1 && stage2, aux_pte.ppn, r_req.addr)
+
+  val mem_resp_valid = RegNext(io.mem.resp.valid)
+  val mem_resp_data = RegNext(io.mem.resp.bits.data)
+  io.mem.uncached_resp.map { resp =>
+    assert(!(resp.valid && io.mem.resp.valid))
+    resp.ready := true.B
+    when (resp.valid) {
+      mem_resp_valid := true.B
+      mem_resp_data := resp.bits.data
+    }
+  }
+  // construct pte from mem.resp
+  val (pte, invalid_paddr) = {
+    val tmp = mem_resp_data.asTypeOf(new PTE())
+    val res = WireDefault(tmp)
+    res.ppn := Mux(do_both_stages && !stage2, tmp.ppn(vpnBits.min(tmp.ppn.getWidth)-1, 0), tmp.ppn(ppnBits-1, 0))
+    when (tmp.r || tmp.w || tmp.x) {
+      // for superpage mappings, make sure PPN LSBs are zero
+      for (i <- 0 until pgLevels-1)
+        when (count <= i.U && tmp.ppn((pgLevels-1-i)*pgLevelBits-1, (pgLevels-2-i)*pgLevelBits) =/= 0.U) { res.v := false.B }
+    }
+    (res, Mux(do_both_stages && !stage2, (tmp.ppn >> vpnBits) =/= 0.U, (tmp.ppn >> ppnBits) =/= 0.U))
+  }
+  // find non-leaf PTE, need traverse
+  val traverse = pte.table() && !invalid_paddr && count < (pgLevels-1).U
+  /** address send to mem for enquerry */
+  val pte_addr = if (!usingVM) 0.U else {
+    val vpn_idxs = (0 until pgLevels).map { i =>
+      val width = pgLevelBits + (if (i <= pgLevels - minPgLevels) hypervisorExtraAddrBits else 0)
+      (vpn >> (pgLevels - i - 1) * pgLevelBits)(width - 1, 0)
+    }
+    val mask     = Mux(stage2 && count === r_hgatp_initial_count, ((1 << (hypervisorExtraAddrBits + pgLevelBits)) - 1).U, ((1 << pgLevelBits) - 1).U)
+    val vpn_idx  = vpn_idxs(count) & mask
+    val raw_pte_addr = ((r_pte.ppn << pgLevelBits) | vpn_idx) << log2Ceil(xLen / 8)
+    val size = if (usingHypervisor) vaddrBits else paddrBits
+    //use r_pte.ppn as page table base address
+    //use vpn slice as offset
+    raw_pte_addr.apply(size.min(raw_pte_addr.getWidth) - 1, 0)
+  }
+  /** pte_cache input addr */
+  val pte_cache_addr = if (!usingHypervisor) pte_addr else {
+    val vpn_idxs = (0 until pgLevels-1).map { i =>
+      val ext_aux_pte_ppn = aux_ppn_hi match {
+        case None     => aux_pte.ppn
+        case Some(hi) => Cat(hi, aux_pte.ppn)
+      }
+      (ext_aux_pte_ppn >> (pgLevels - i - 1) * pgLevelBits)(pgLevelBits - 1, 0)
+    }
+    val vpn_idx = vpn_idxs(count)
+    val raw_pte_cache_addr = Cat(r_pte.ppn, vpn_idx) << log2Ceil(xLen/8)
+    raw_pte_cache_addr(vaddrBits.min(raw_pte_cache_addr.getWidth)-1, 0)
+  }
+  /** stage2_pte_cache input addr */
+  val stage2_pte_cache_addr = if (!usingHypervisor) 0.U else {
+    val vpn_idxs = (0 until pgLevels - 1).map { i =>
+      (r_req.addr >> (pgLevels - i - 1) * pgLevelBits)(pgLevelBits - 1, 0)
+    }
+    val vpn_idx  = vpn_idxs(aux_count)
+    val raw_s2_pte_cache_addr = Cat(aux_pte.ppn, vpn_idx) << log2Ceil(xLen / 8)
+    raw_s2_pte_cache_addr(vaddrBits.min(raw_s2_pte_cache_addr.getWidth) - 1, 0)
+  }
+
+  def makeFragmentedSuperpagePPN(ppn: UInt): Seq[UInt] = {
+    (pgLevels-1 until 0 by -1).map(i => Cat(ppn >> (pgLevelBits*i), r_req.addr(((pgLevelBits*i) min vpnBits)-1, 0).padTo(pgLevelBits*i)))
+  }
+  /** PTECache caches non-leaf PTE
+    * @param s2 true: 2-stage address translation
+    */
+  def makePTECache(s2: Boolean): (Bool, UInt) = if (nPTECacheEntries == 0) {
+    (false.B, 0.U)
+  } else {
+    val plru = new PseudoLRU(nPTECacheEntries)
+    val valid = RegInit(0.U(nPTECacheEntries.W))
+    val tags = Reg(Vec(nPTECacheEntries, UInt((if (usingHypervisor) 1 + vaddrBits else paddrBits).W)))
+    // not include full pte, only ppn
+    val data = Reg(Vec(nPTECacheEntries, UInt((if (usingHypervisor && s2) vpnBits else ppnBits).W)))
+    val can_hit =
+      if (s2) count === r_hgatp_initial_count && aux_count < (pgLevels-1).U && r_req.vstage1 && stage2 && !stage2_final
+      else count < (pgLevels-1).U && Mux(r_req.vstage1, stage2, !r_req.stage2)
+    val can_refill =
+      if (s2) do_both_stages && !stage2 && !stage2_final
+      else can_hit
+    val tag =
+      if (s2) Cat(true.B, stage2_pte_cache_addr.padTo(vaddrBits))
+      else Cat(r_req.vstage1, pte_cache_addr.padTo(if (usingHypervisor) vaddrBits else paddrBits))
+
+    val hits = tags.map(_ === tag).asUInt & valid
+    val hit = hits.orR && can_hit
+    // refill with mem response
+    when (mem_resp_valid && traverse && can_refill && !hits.orR && !invalidated) {
+      val r = Mux(valid.andR, plru.way, PriorityEncoder(~valid))
+      valid := valid | UIntToOH(r)
+      tags(r) := tag
+      data(r) := pte.ppn
+      plru.access(r)
+    }
+    // replace
+    when (hit && state === s_req) { plru.access(OHToUInt(hits)) }
+    when (io.dpath.sfence.valid && (!io.dpath.sfence.bits.rs1 || usingHypervisor.B && io.dpath.sfence.bits.hg)) { valid := 0.U }
+
+    val lcount = if (s2) aux_count else count
+    for (i <- 0 until pgLevels-1) {
+      ccover(hit && state === s_req && lcount === i.U, s"PTE_CACHE_HIT_L$i", s"PTE cache hit, level $i")
+    }
+
+    (hit, Mux1H(hits, data))
+  }
+  // generate pte_cache
+  val (pte_cache_hit, pte_cache_data) = makePTECache(false)
+  // generate pte_cache with 2-stage translation
+  val (stage2_pte_cache_hit, stage2_pte_cache_data) = makePTECache(true)
+  // pte_cache hit or 2-stage pte_cache hit
+  val pte_hit = RegNext(false.B)
+  io.dpath.perf.pte_miss := false.B
+  io.dpath.perf.pte_hit := pte_hit && (state === s_req) && !io.dpath.perf.l2hit
+  assert(!(io.dpath.perf.l2hit && (io.dpath.perf.pte_miss || io.dpath.perf.pte_hit)),
+    "PTE Cache Hit/Miss Performance Monitor Events are lower priority than L2TLB Hit event")
+  // l2_refill happens when find the leaf pte
+  val l2_refill = RegNext(false.B)
+  l2_refill_wire := l2_refill
+  io.dpath.perf.l2miss := false.B
+  io.dpath.perf.l2hit := false.B
+  // l2tlb
+  val (l2_hit, l2_error, l2_pte, l2_tlb_ram) = if (nL2TLBEntries == 0) (false.B, false.B, WireDefault(0.U.asTypeOf(new PTE)), None) else {
+    val code = new ParityCode
+    require(isPow2(nL2TLBEntries))
+    require(isPow2(nL2TLBWays))
+    require(nL2TLBEntries >= nL2TLBWays)
+    val nL2TLBSets = nL2TLBEntries / nL2TLBWays
+    require(isPow2(nL2TLBSets))
+    val idxBits = log2Ceil(nL2TLBSets)
+
+    val l2_plru = new SetAssocLRU(nL2TLBSets, nL2TLBWays, "plru")
+
+    val ram =  DescribedSRAM(
+      name = "l2_tlb_ram",
+      desc = "L2 TLB",
+      size = nL2TLBSets,
+      data = Vec(nL2TLBWays, UInt(code.width(new L2TLBEntry(nL2TLBSets).getWidth).W))
+    )
+
+    val g = Reg(Vec(nL2TLBWays, UInt(nL2TLBSets.W)))
+    val valid = RegInit(VecInit(Seq.fill(nL2TLBWays)(0.U(nL2TLBSets.W))))
+    // use r_req to construct tag
+    val (r_tag, r_idx) = Split(Cat(r_req.vstage1, r_req.addr(maxSVAddrBits-pgIdxBits-1, 0)), idxBits)
+    /** the valid vec for the selected set(including n ways) */
+    val r_valid_vec = valid.map(_(r_idx)).asUInt
+    val r_valid_vec_q = Reg(UInt(nL2TLBWays.W))
+    val r_l2_plru_way = Reg(UInt(log2Ceil(nL2TLBWays max 1).W))
+    r_valid_vec_q := r_valid_vec
+    // replacement way
+    r_l2_plru_way := (if (nL2TLBWays > 1) l2_plru.way(r_idx) else 0.U)
+    // refill with r_pte(leaf pte)
+    when (l2_refill && !invalidated) {
+      val entry = Wire(new L2TLBEntry(nL2TLBSets))
+      entry.ppn := r_pte.ppn
+      entry.d := r_pte.d
+      entry.a := r_pte.a
+      entry.u := r_pte.u
+      entry.x := r_pte.x
+      entry.w := r_pte.w
+      entry.r := r_pte.r
+      entry.tag := r_tag
+      // if all the way are valid, use plru to select one way to be replaced,
+      // otherwise use PriorityEncoderOH to select one
+      val wmask = if (nL2TLBWays > 1) Mux(r_valid_vec_q.andR, UIntToOH(r_l2_plru_way, nL2TLBWays), PriorityEncoderOH(~r_valid_vec_q)) else 1.U(1.W)
+      ram.write(r_idx, VecInit(Seq.fill(nL2TLBWays)(code.encode(entry.asUInt))), wmask.asBools)
+
+      val mask = UIntToOH(r_idx)
+      for (way <- 0 until nL2TLBWays) {
+        when (wmask(way)) {
+          valid(way) := valid(way) | mask
+          g(way) := Mux(r_pte.g, g(way) | mask, g(way) & ~mask)
+        }
+      }
+    }
+    // sfence happens
+    when (io.dpath.sfence.valid) {
+      val hg = usingHypervisor.B && io.dpath.sfence.bits.hg
+      for (way <- 0 until nL2TLBWays) {
+        valid(way) :=
+          Mux(!hg && io.dpath.sfence.bits.rs1, valid(way) & ~UIntToOH(io.dpath.sfence.bits.addr(idxBits+pgIdxBits-1, pgIdxBits)),
+          Mux(!hg && io.dpath.sfence.bits.rs2, valid(way) & g(way),
+          0.U))
+      }
+    }
+
+    val s0_valid = !l2_refill && arb.io.out.fire
+    val s0_suitable = arb.io.out.bits.bits.vstage1 === arb.io.out.bits.bits.stage2 && !arb.io.out.bits.bits.need_gpa
+    val s1_valid = RegNext(s0_valid && s0_suitable && arb.io.out.bits.valid)
+    val s2_valid = RegNext(s1_valid)
+    // read from tlb idx
+    val s1_rdata = ram.read(arb.io.out.bits.bits.addr(idxBits-1, 0), s0_valid)
+    val s2_rdata = s1_rdata.map(s1_rdway => code.decode(RegEnable(s1_rdway, s1_valid)))
+    val s2_valid_vec = RegEnable(r_valid_vec, s1_valid)
+    val s2_g_vec = RegEnable(VecInit(g.map(_(r_idx))), s1_valid)
+    val s2_error = (0 until nL2TLBWays).map(way => s2_valid_vec(way) && s2_rdata(way).error).orR
+    when (s2_valid && s2_error) { valid.foreach { _ := 0.U }}
+    // decode
+    val s2_entry_vec = s2_rdata.map(_.uncorrected.asTypeOf(new L2TLBEntry(nL2TLBSets)))
+    val s2_hit_vec = (0 until nL2TLBWays).map(way => s2_valid_vec(way) && (r_tag === s2_entry_vec(way).tag))
+    val s2_hit = s2_valid && s2_hit_vec.orR
+    io.dpath.perf.l2miss := s2_valid && !(s2_hit_vec.orR)
+    io.dpath.perf.l2hit := s2_hit
+    when (s2_hit) {
+      l2_plru.access(r_idx, OHToUInt(s2_hit_vec))
+      assert((PopCount(s2_hit_vec) === 1.U) || s2_error, "L2 TLB multi-hit")
+    }
+
+    val s2_pte = Wire(new PTE)
+    val s2_hit_entry = Mux1H(s2_hit_vec, s2_entry_vec)
+    s2_pte.ppn := s2_hit_entry.ppn
+    s2_pte.d := s2_hit_entry.d
+    s2_pte.a := s2_hit_entry.a
+    s2_pte.g := Mux1H(s2_hit_vec, s2_g_vec)
+    s2_pte.u := s2_hit_entry.u
+    s2_pte.x := s2_hit_entry.x
+    s2_pte.w := s2_hit_entry.w
+    s2_pte.r := s2_hit_entry.r
+    s2_pte.v := true.B
+    s2_pte.reserved_for_future := 0.U
+    s2_pte.reserved_for_software := 0.U
+
+    for (way <- 0 until nL2TLBWays) {
+      ccover(s2_hit && s2_hit_vec(way), s"L2_TLB_HIT_WAY$way", s"L2 TLB hit way$way")
+    }
+
+    (s2_hit, s2_error, s2_pte, Some(ram))
+  }
+
+  // if SFENCE occurs during walk, don't refill PTE cache or L2 TLB until next walk
+  invalidated := io.dpath.sfence.valid || (invalidated && state =/= s_ready)
+  // mem request
+  io.mem.req.valid := state === s_req || state === s_dummy1
+  io.mem.req.bits.phys := true.B
+  io.mem.req.bits.cmd  := M_XRD
+  io.mem.req.bits.size := log2Ceil(xLen/8).U
+  io.mem.req.bits.signed := false.B
+  io.mem.req.bits.addr := pte_addr
+  io.mem.req.bits.idx.foreach(_ := pte_addr)
+  io.mem.req.bits.dprv := PRV.S.U   // PTW accesses are S-mode by definition
+  io.mem.req.bits.dv := do_both_stages && !stage2
+  io.mem.s1_kill := l2_hit || state =/= s_wait1
+  io.mem.s2_kill := false.B
+
+  val pageGranularityPMPs = pmpGranularity >= (1 << pgIdxBits)
+  require(!usingHypervisor || pageGranularityPMPs, s"hypervisor requires pmpGranularity >= ${1<<pgIdxBits}")
+
+  val pmaPgLevelHomogeneous = (0 until pgLevels) map { i =>
+    val pgSize = BigInt(1) << (pgIdxBits + ((pgLevels - 1 - i) * pgLevelBits))
+    if (pageGranularityPMPs && i == pgLevels - 1) {
+      require(TLBPageLookup.homogeneous(edge.manager.managers, pgSize), s"All memory regions must be $pgSize-byte aligned")
+      true.B
+    } else {
+      TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), pgSize)(r_pte.ppn << pgIdxBits).homogeneous
+    }
+  }
+  val pmaHomogeneous = pmaPgLevelHomogeneous(count)
+  val pmpHomogeneous = new PMPHomogeneityChecker(io.dpath.pmp, paddrBits, pmpGranularity, pgIdxBits, pgLevels, pgLevelBits).apply(r_pte.ppn << pgIdxBits, count)
+  val homogeneous = pmaHomogeneous && pmpHomogeneous
+  // response to tlb
+  for (i <- 0 until io.requestor.size) {
+    io.requestor(i).resp.valid := resp_valid(i)
+    io.requestor(i).resp.bits.ae_ptw := resp_ae_ptw
+    io.requestor(i).resp.bits.ae_final := resp_ae_final
+    io.requestor(i).resp.bits.pf := resp_pf
+    io.requestor(i).resp.bits.gf := resp_gf
+    io.requestor(i).resp.bits.hr := resp_hr
+    io.requestor(i).resp.bits.hw := resp_hw
+    io.requestor(i).resp.bits.hx := resp_hx
+    io.requestor(i).resp.bits.pte := r_pte
+    io.requestor(i).resp.bits.level := max_count
+    io.requestor(i).resp.bits.homogeneous := homogeneous || pageGranularityPMPs.B
+    io.requestor(i).resp.bits.fragmented_superpage := resp_fragmented_superpage && pageGranularityPMPs.B
+    io.requestor(i).resp.bits.gpa.valid := r_req.need_gpa
+    io.requestor(i).resp.bits.gpa.bits :=
+      Cat(Mux(!stage2_final || !r_req.vstage1 || aux_count === (pgLevels - 1).U, aux_pte.ppn, makeFragmentedSuperpagePPN(aux_pte.ppn)(aux_count)), gpa_pgoff)
+    io.requestor(i).resp.bits.gpa_is_pte := !stage2_final
+    io.requestor(i).ptbr := io.dpath.ptbr
+    io.requestor(i).hgatp := io.dpath.hgatp
+    io.requestor(i).vsatp := io.dpath.vsatp
+    io.requestor(i).customCSRs := io.dpath.customCSRs
+    io.requestor(i).status := io.dpath.status
+    io.requestor(i).hstatus := io.dpath.hstatus
+    io.requestor(i).gstatus := io.dpath.gstatus
+    io.requestor(i).pmp := io.dpath.pmp
+  }
+
+  // control state machine
+  val next_state = WireDefault(state)
+  state := OptimizationBarrier(next_state)
+  val do_switch = WireDefault(false.B)
+
+  switch (state) {
+    is (s_ready) {
+      when (arb.io.out.fire) {
+        val satp_initial_count = pgLevels.U - minPgLevels.U - satp.additionalPgLevels
+        val vsatp_initial_count = pgLevels.U - minPgLevels.U - io.dpath.vsatp.additionalPgLevels
+        val hgatp_initial_count = pgLevels.U - minPgLevels.U - io.dpath.hgatp.additionalPgLevels
+        val aux_ppn             = Mux(arb.io.out.bits.bits.vstage1, io.dpath.vsatp.ppn, arb.io.out.bits.bits.addr)
+
+        r_req := arb.io.out.bits.bits
+        r_req_dest := arb.io.chosen
+        next_state := Mux(arb.io.out.bits.valid, s_req, s_ready)
+        stage2       := arb.io.out.bits.bits.stage2
+        stage2_final := arb.io.out.bits.bits.stage2 && !arb.io.out.bits.bits.vstage1
+        count       := Mux(arb.io.out.bits.bits.stage2, hgatp_initial_count, satp_initial_count)
+        aux_count   := Mux(arb.io.out.bits.bits.vstage1, vsatp_initial_count, 0.U)
+        aux_pte.ppn := aux_ppn
+        aux_ppn_hi.foreach { _ := aux_ppn >> aux_pte.ppn.getWidth }
+        aux_pte.reserved_for_future := 0.U
+        resp_ae_ptw := false.B
+        resp_ae_final := false.B
+        resp_pf := false.B
+        resp_gf := false.B
+        resp_hr := true.B
+        resp_hw := true.B
+        resp_hx := true.B
+        resp_fragmented_superpage := false.B
+        r_hgatp := io.dpath.hgatp
+
+        assert(!arb.io.out.bits.bits.need_gpa || arb.io.out.bits.bits.stage2)
+      }
+    }
+    is (s_req) {
+      when(stage2 && count === r_hgatp_initial_count) {
+        gpa_pgoff := Mux(aux_count === (pgLevels-1).U, r_req.addr << (xLen/8).log2, stage2_pte_cache_addr)
+      }
+      // pte_cache hit
+      when (stage2_pte_cache_hit) {
+        aux_count := aux_count + 1.U
+        aux_pte.ppn := stage2_pte_cache_data
+        aux_ppn_hi.foreach { _ := 0.U }
+        aux_pte.reserved_for_future := 0.U
+        pte_hit := true.B
+      }.elsewhen (pte_cache_hit) {
+        count := count + 1.U
+        pte_hit := true.B
+      }.otherwise {
+        next_state := Mux(io.mem.req.ready, s_wait1, s_req)
+      }
+    }
+    is (s_wait1) {
+      // This Mux is for the l2_error case; the l2_hit && !l2_error case is overriden below
+      next_state := Mux(l2_hit, s_req, s_wait2)
+    }
+    is (s_wait2) {
+      next_state := s_wait3
+      io.dpath.perf.pte_miss := count < (pgLevels-1).U
+      when (io.mem.s2_xcpt.ae.ld) {
+        resp_ae_ptw := true.B
+        next_state := s_ready
+        resp_valid(r_req_dest) := true.B
+      }
+    }
+    is (s_fragment_superpage) {
+      next_state := s_ready
+      resp_valid(r_req_dest) := true.B
+      when (!homogeneous) {
+        count := (pgLevels-1).U
+        resp_fragmented_superpage := true.B
+      }
+      when (do_both_stages) {
+        resp_fragmented_superpage := true.B
+      }
+    }
+  }
+
+  val merged_pte = {
+    val superpage_masks = (0 until pgLevels).map(i => ((BigInt(1) << pte.ppn.getWidth) - (BigInt(1) << (pgLevels-1-i)*pgLevelBits)).U)
+    val superpage_mask = superpage_masks(Mux(stage2_final, max_count, (pgLevels-1).U))
+    val stage1_ppns = (0 until pgLevels-1).map(i => Cat(pte.ppn(pte.ppn.getWidth-1, (pgLevels-i-1)*pgLevelBits), aux_pte.ppn((pgLevels-i-1)*pgLevelBits-1,0))) :+ pte.ppn
+    val stage1_ppn = stage1_ppns(count)
+    makePTE(stage1_ppn & superpage_mask, aux_pte)
+  }
+
+  r_pte :=
+    // l2tlb hit->find a leaf PTE(l2_pte), respond to L1TLB
+    Mux(l2_hit && !l2_error, l2_pte,
+    // pte cache hit->find a non-leaf PTE(pte_cache),continue to request mem
+    Mux(state === s_req && !stage2_pte_cache_hit && pte_cache_hit, makePTE(pte_cache_data, l2_pte),
+    // 2-stage translation
+    Mux(do_switch, makeHypervisorRootPTE(r_hgatp, pte.ppn, r_pte),
+    // when mem respond, store mem.resp.pte
+    Mux(mem_resp_valid, Mux(!traverse && r_req.vstage1 && stage2, merged_pte, pte),
+    // fragment_superpage
+    Mux(state === s_fragment_superpage && !homogeneous && count =/= (pgLevels - 1).U, makePTE(makeFragmentedSuperpagePPN(r_pte.ppn)(count), r_pte),
+    // when tlb request come->request mem, use root address in satp(or vsatp,hgatp)
+    Mux(arb.io.out.fire, Mux(arb.io.out.bits.bits.stage2, makeHypervisorRootPTE(io.dpath.hgatp, io.dpath.vsatp.ppn, r_pte), makePTE(satp.ppn, r_pte)),
+    r_pte))))))
+
+  when (l2_hit && !l2_error) {
+    assert(state === s_req || state === s_wait1)
+    next_state := s_ready
+    resp_valid(r_req_dest) := true.B
+    count := (pgLevels-1).U
+  }
+  when (mem_resp_valid) {
+    assert(state === s_wait3)
+    next_state := s_req
+    when (traverse) {
+      when (do_both_stages && !stage2) { do_switch := true.B }
+      count := count + 1.U
+    }.otherwise {
+      val gf = stage2 && !stage2_final && !pte.ur()
+      val ae = pte.v && invalid_paddr
+      val pf = pte.v && pte.reserved_for_future =/= 0.U
+      val success = pte.v && !ae && !pf && !gf
+
+      when (do_both_stages && !stage2_final && success) {
+        when (stage2) {
+          stage2 := false.B
+          count := aux_count
+        }.otherwise {
+          stage2_final := true.B
+          do_switch := true.B
+        }
+      }.otherwise {
+        // find a leaf pte, start l2 refill
+        l2_refill := success && count === (pgLevels-1).U && !r_req.need_gpa &&
+          (!r_req.vstage1 && !r_req.stage2 ||
+           do_both_stages && aux_count === (pgLevels-1).U && pte.isFullPerm())
+        count := max_count
+
+        when (pageGranularityPMPs.B && !(count === (pgLevels-1).U && (!do_both_stages || aux_count === (pgLevels-1).U))) {
+          next_state := s_fragment_superpage
+        }.otherwise {
+          next_state := s_ready
+          resp_valid(r_req_dest) := true.B
+        }
+
+        resp_ae_final := ae
+        resp_pf := pf && !stage2
+        resp_gf := gf || (pf && stage2)
+        resp_hr := !stage2 || (!pf && !gf && pte.ur())
+        resp_hw := !stage2 || (!pf && !gf && pte.uw())
+        resp_hx := !stage2 || (!pf && !gf && pte.ux())
+      }
+    }
+  }
+  when (io.mem.s2_nack) {
+    assert(state === s_wait2)
+    next_state := s_req
+  }
+
+  when (do_switch) {
+    aux_count := Mux(traverse, count + 1.U, count)
+    count := r_hgatp_initial_count
+    aux_pte := Mux(traverse, pte, {
+      val s1_ppns = (0 until pgLevels-1).map(i => Cat(pte.ppn(pte.ppn.getWidth-1, (pgLevels-i-1)*pgLevelBits), r_req.addr(((pgLevels-i-1)*pgLevelBits min vpnBits)-1,0).padTo((pgLevels-i-1)*pgLevelBits))) :+ pte.ppn
+      makePTE(s1_ppns(count), pte)
+    })
+    aux_ppn_hi.foreach { _ := 0.U }
+    stage2 := true.B
+  }
+
+  for (i <- 0 until pgLevels) {
+    val leaf = mem_resp_valid && !traverse && count === i.U
+    ccover(leaf && pte.v && !invalid_paddr && pte.reserved_for_future === 0.U, s"L$i", s"successful page-table access, level $i")
+    ccover(leaf && pte.v && invalid_paddr, s"L${i}_BAD_PPN_MSB", s"PPN too large, level $i")
+    ccover(leaf && pte.v && pte.reserved_for_future =/= 0.U, s"L${i}_BAD_RSV_MSB", s"reserved MSBs set, level $i")
+    ccover(leaf && !mem_resp_data(0), s"L${i}_INVALID_PTE", s"page not present, level $i")
+    if (i != pgLevels-1)
+      ccover(leaf && !pte.v && mem_resp_data(0), s"L${i}_BAD_PPN_LSB", s"PPN LSBs not zero, level $i")
+  }
+  ccover(mem_resp_valid && count === (pgLevels-1).U && pte.table(), s"TOO_DEEP", s"page table too deep")
+  ccover(io.mem.s2_nack, "NACK", "D$ nacked page-table access")
+  ccover(state === s_wait2 && io.mem.s2_xcpt.ae.ld, "AE", "access exception while walking page table")
+
+  } // leaving gated-clock domain
+
+  private def ccover(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) =
+    if (usingVM) property.cover(cond, s"PTW_$label", "MemorySystem;;" + desc)
+
+  /** Relace PTE.ppn with ppn */
+  private def makePTE(ppn: UInt, default: PTE) = {
+    val pte = WireDefault(default)
+    pte.ppn := ppn
+    pte
+  }
+  /** use hgatp and vpn to construct a new ppn */
+  private def makeHypervisorRootPTE(hgatp: PTBR, vpn: UInt, default: PTE) = {
+    val count = pgLevels.U - minPgLevels.U - hgatp.additionalPgLevels
+    val idxs = (0 to pgLevels-minPgLevels).map(i => (vpn >> (pgLevels-i)*pgLevelBits))
+    val lsbs = WireDefault(UInt(maxHypervisorExtraAddrBits.W), idxs(count))
+    val pte = WireDefault(default)
+    pte.ppn := Cat(hgatp.ppn >> maxHypervisorExtraAddrBits, lsbs)
+    pte
+  }
+}
+
+/** Mix-ins for constructing tiles that might have a PTW */
+trait CanHavePTW extends HasTileParameters with HasHellaCache { this: BaseTile =>
+  val module: CanHavePTWModule
+  var nPTWPorts = 1
+  nDCachePorts += usingPTW.toInt
+}
+
+trait CanHavePTWModule extends HasHellaCacheModule {
+  val outer: CanHavePTW
+  val ptwPorts = ListBuffer(outer.dcache.module.io.ptw)
+  val ptw = Module(new PTW(outer.nPTWPorts)(outer.dcache.node.edges.out(0), outer.p))
+  ptw.io.mem <> DontCare
+  if (outer.usingPTW) {
+    dcachePorts += ptw.io.mem
+  }
+}
diff --git a/rocket/src/TLB.scala b/rocket/src/TLB.scala
index d72e49bb4..9295886c2 100644
--- a/rocket/src/TLB.scala
+++ b/rocket/src/TLB.scala
@@ -198,7 +198,7 @@ class TLBEntry(
   def insert(vpn: UInt, virtual: Bool, level: UInt, entry: TLBEntryData): Unit = {
     this.tag_vpn := vpn
     this.tag_v := virtual
-    this.level := level(log2Ceil(pgLevels - superpageOnly.B.litValue) - 1, 0)
+    this.level := level(log2Ceil(pgLevels - superpageOnly.B) - 1, 0)
 
     val idx = sectorIdx(vpn)
     valid(idx) := true.B
@@ -318,10 +318,12 @@ class TLB(
   vaddrBits: Int,
   vaddrBitsExtended: Int,
   paddrBits: Int,
+  maxPAddrBits: Int,
   hypervisorExtraAddrBits: Int,
   asIdBits: Int,
   xLen: Int,
   cacheBlockBytes: Int,
+  customCSRsParam: CustomCSRs,
   debugModuleAddress: Option[AddressSet],
   memoryCacheable: Boolean,
   memoryHomogenous: Boolean,
@@ -339,7 +341,10 @@ class TLB(
     /** SFence Input */
     val sfence = Flipped(Valid((new SFenceReq(vaddrBits, asIdBits))))
     /** IO to PTW */
-    val ptw = new TLBPTWIO()
+    val ptw = new TLBPTWIO(
+      xLen, vpnBits, pgLevels, minPgLevels, pgLevelBits, maxPAddrBits,
+      pgIdxBits, vaddrBits, paddrBits, pmpGranularity, nPMPs, customCSRsParam
+    )
     /** suppress a TLB refill, one cycle after a miss */
     val kill = Input(Bool())
   })
@@ -568,7 +573,7 @@ class TLB(
     val minVAddrBits = pgIdxBits + minPgLevels * pgLevelBits + extraBits
     VecInit(Seq.range(0, nPgLevelChoices).map {
       i =>
-        val mask = ((BigInt(1) << vaddrBitsExtended) - (BigInt(1) << (minVAddrBits + i * pgLevelBits - signed.B.litValue.toInt))).U
+        val mask = ((BigInt(1) << vaddrBitsExtended) - (BigInt(1) << (minVAddrBits + i * pgLevelBits - signed.B))).U
         val maskedVAddr = io.req.bits.vaddr & mask
         additionalPgLevels === i.U && !(maskedVAddr === 0.U || signed.B && maskedVAddr === mask)
     }).asUInt.orR
@@ -660,7 +665,7 @@ class TLB(
   io.resp.ma.inst := false.B // this is up to the pipeline to figure out
   io.resp.cacheable := (c_array & hits).orR
   io.resp.must_alloc := (must_alloc_array & hits).orR
-  io.resp.prefetchable := (prefetchable_array & hits).orR && edge.manager.managers.forall(m => !m.supportsAcquireB || m.supportsHint).B
+  io.resp.prefetchable := (prefetchable_array & hits).orR && memSlaves.forall(m => !m.supportsAcquireB || m.supportsHint).B
   io.resp.miss := do_refill || vsatp_mode_mismatch || tlb_miss || multipleHits
   io.resp.paddr := Cat(ppn, io.req.bits.vaddr(pgIdxBits-1, 0))
   io.resp.gpa_is_pte := vstage1_en && r_gpa_is_pte
diff --git a/rocket/src/util/AddressDecoder.scala b/rocket/src/util/AddressDecoder.scala
index fea3e515a..8a84c8873 100644
--- a/rocket/src/util/AddressDecoder.scala
+++ b/rocket/src/util/AddressDecoder.scala
@@ -1,11 +1,9 @@
 // See LICENSE.SiFive for license details.
 
-package org.chipsalliance.rocket
+package org.chipsalliance.rocket.util
 
 import Chisel.log2Ceil
 
-import org.chipsalliance.rocket.util._
-
 object AddressDecoder
 {
   type Port = Seq[AddressSet]
diff --git a/rocket/src/util/Annotations.scala b/rocket/src/util/Annotations.scala
new file mode 100644
index 000000000..f4ebaeb23
--- /dev/null
+++ b/rocket/src/util/Annotations.scala
@@ -0,0 +1,298 @@
+// See LICENSE.SiFive for license details.
+
+package org.chipsalliance.rocket.util
+
+import Chisel._
+import chisel3.internal.InstanceId
+import chisel3.experimental.{annotate, ChiselAnnotation}
+import chisel3.RawModule
+import firrtl.annotations._
+
+import org.json4s.JsonDSL._
+import org.json4s.jackson.JsonMethods.{pretty, render}
+
+/** Record a sram. */
+case class SRAMAnnotation(target: Named,
+  address_width: Int,
+  name: String,
+  data_width: Int,
+  depth: BigInt,
+  description: String,
+  write_mask_granularity: Int) extends SingleTargetAnnotation[Named] {
+  def duplicate(n: Named) = this.copy(n)
+}
+
+/** Record a set of interrupts. */
+case class InterruptsPortAnnotation(target: Named, name: String, interruptIndexes: Seq[Int]) extends SingleTargetAnnotation[Named] {
+  def duplicate(n: Named) = this.copy(n)
+}
+
+/** Record a case class that was used to parameterize this target. */
+case class GlobalConstantsAnnotation(target: Named, xLen: Int) extends SingleTargetAnnotation[Named] {
+  def duplicate(n: Named) = this.copy(n)
+}
+
+case class GlobalConstantsChiselAnnotation[T <: Product](target: InstanceId, xLen: Int) extends ChiselAnnotation {
+  def toFirrtl = GlobalConstantsAnnotation(target.toNamed, xLen)
+}
+
+/** Record a case class that was used to parameterize this target. */
+case class ParamsAnnotation(target: Named, paramsClassName: String, params: Map[String,Any]) extends SingleTargetAnnotation[Named] {
+  def duplicate(n: Named) = this.copy(n)
+}
+
+case class ParamsChiselAnnotation[T <: Product](target: InstanceId, params: T) extends ChiselAnnotation {
+  private val paramMap = params.getClass.getDeclaredFields.map(_.getName).zip(params.productIterator).toMap
+  def toFirrtl = ParamsAnnotation(target.toNamed, params.getClass.getName, paramMap)
+}
+
+/** Record an address map. */
+case class AddressMapAnnotation(target: Named, mapping: Seq[AddressMapEntry], label: String) extends SingleTargetAnnotation[Named] {
+  def duplicate(n: Named) = this.copy(n)
+
+  def toUVM: String =
+    s"// Instance Name: ${target.serialize}\n" +
+      mapping.map(_.range.toUVM).mkString("\n")
+
+  def toJSON: String =
+    s"""{\n  "${label}":  [\n""" +
+      mapping.map(_.range.toJSON).mkString(",\n") +
+      "\n  ]\n}"
+}
+
+/** Marks this module as a candidate for register retiming */
+case class RetimeModuleAnnotation(target: ModuleName) extends SingleTargetAnnotation[ModuleName] {
+  def duplicate(n: ModuleName) = this.copy(n)
+}
+
+/** Annotation capturing information about port slave devices. */
+case class SlaveAddressMapChiselAnnotation(
+    target: InstanceId,
+    addresses: Seq[AddressSet],
+    perms: ResourcePermissions) extends ChiselAnnotation {
+  private val range = AddressRange.fromSets(addresses)
+  def toFirrtl = AddressMapAnnotation(
+    target = target.toNamed,
+    mapping = range.map { r => AddressMapEntry(r, perms, Nil) },
+    label = "slaves")
+}
+
+/** Record information about a top-level port of the design */
+case class TopLevelPortAnnotation(
+  target: ComponentName,
+  protocol: String,
+  tags: Seq[String],
+  names: Seq[String],
+  width: Int,
+  address: Seq[AddressSet]) extends SingleTargetAnnotation[ComponentName] {
+  def duplicate(n: ComponentName): TopLevelPortAnnotation = this.copy(n)
+}
+
+/** Record the resetVector. */
+case class ResetVectorAnnotation(target: Named, resetVec: BigInt) extends SingleTargetAnnotation[Named] {
+  def duplicate(n: Named): ResetVectorAnnotation = this.copy(n)
+}
+
+/** Helper object containing methods for applying annotations to targets */
+object Annotated {
+
+  def srams(
+    component: InstanceId,
+    name: String,
+    address_width: Int,
+    data_width: Int,
+    depth: BigInt,
+    description: String,
+    write_mask_granularity: Int): Unit = {
+    annotate(new ChiselAnnotation {def toFirrtl: Annotation = SRAMAnnotation(
+      component.toNamed,
+      address_width = address_width,
+      name = name,
+      data_width = data_width,
+      depth = depth,
+      description = description,
+      write_mask_granularity = write_mask_granularity
+    )})}
+
+  def interrupts(component: InstanceId, name: String, interrupts: Seq[Int]): Unit = {
+    annotate(new ChiselAnnotation {def toFirrtl: Annotation = InterruptsPortAnnotation(
+      component.toNamed,
+      name,
+      interrupts
+    )})
+  }
+
+  def resetVector(component: InstanceId, resetVec: BigInt): Unit = {
+    annotate(new ChiselAnnotation {def toFirrtl: Annotation = ResetVectorAnnotation(component.toNamed, resetVec)})
+  }
+
+  def constants(component: InstanceId, xLen: Int): Unit = {
+    annotate(GlobalConstantsChiselAnnotation(component, xLen ))
+  }
+
+  def params[T <: Product](component: InstanceId, params: T): T = {
+    annotate(ParamsChiselAnnotation(component, params))
+    params
+  }
+
+  def addressMapping(component: InstanceId, mapping: Seq[AddressMapEntry]): Seq[AddressMapEntry] = {
+    annotate(new ChiselAnnotation { def toFirrtl = AddressMapAnnotation(component.toNamed, mapping, "mapping") })
+    mapping
+  }
+
+  def port[T <: Data](
+    data: T,
+    protocol: String,
+    tags: Seq[String],
+    names: Seq[String],
+    width: Int,
+    address: Seq[AddressSet] = Nil): T = {
+    annotate(new ChiselAnnotation { def toFirrtl = TopLevelPortAnnotation(data.toNamed, protocol, tags, names, width, address) })
+    data
+  }
+}
+
+/** Mix this into a Module class or instance to mark its ports as untouchable */
+trait DontTouch { self: RawModule =>
+  // TODO: replace this with an implicit class from UserModule that uses getPorts
+  // TODO: this is a workaround for firrtl #756
+  def dontTouch(data: Data): Unit = data match {
+     case agg: Aggregate => agg.getElements.foreach(dontTouch)
+     case elt: Element => chisel3.dontTouch(elt)
+  }
+
+  /** Marks every port as don't touch
+    *
+    * @note This method can only be called after the Module has been fully constructed
+    *   (after Module(...))
+    */
+  def dontTouchPorts(): this.type = {
+    self.getModulePorts.foreach(dontTouch(_))
+    self
+  }
+
+  def dontTouchPortsExcept(f: Data => Boolean): this.type = {
+    self.getModulePorts.filterNot(f).foreach(dontTouch(_))
+    self
+  }
+}
+
+/** Mix this into a Module class or instance to mark it for register retiming */
+trait ShouldBeRetimed { self: RawModule =>
+  chisel3.experimental.annotate(new ChiselAnnotation { def toFirrtl: RetimeModuleAnnotation = RetimeModuleAnnotation(self.toNamed) })
+}
+
+case class RegFieldDescMappingAnnotation(
+  target: ModuleName,
+  regMappingSer: RegistersSer) extends SingleTargetAnnotation[ModuleName] {
+  def duplicate(n: ModuleName): RegFieldDescMappingAnnotation = this.copy(target = n)
+}
+
+object InterruptsPortAnnotation {
+  val GLOBAL_EXTERNAL_INTERRUPTS = "global-external-interrupts"
+  val LOCAL_EXTERNAL_INTERRUPTS = "local-external-interrupts"
+  val LOCAL_INTERRUPTS_STARTING_NUMBER = 16 /* TODO the ISA specfication reserves the first 12 interrupts but
+  somewhere in DTS 16 is used as the starting number. */
+
+}
+
+object GenRegDescsAnno {
+
+  def makeRegMappingSer(
+    rawModule: RawModule,
+    moduleName: String,
+    baseAddress: BigInt,
+    width: Int,
+    byteOffset: Int,
+    bitOffset: Int,
+    regField: RegField): RegFieldDescSer = {
+
+    val anonRegFieldName = s"unnamedRegField${byteOffset.toHexString}_${bitOffset}"
+    val selectedRegFieldName = regField.desc.map(_.name).getOrElse(anonRegFieldName)
+
+    val map = Map[BigInt, (String, String)]() // TODO
+
+// TODO: enumerations will be handled in upcoming PR
+//    ("enumerations" -> desc.map {d =>
+//      Option(d.enumerations.map { case (key, (name, edesc)) =>
+//        (("value" -> key) ~ ("name" -> name) ~ ("description" -> edesc))
+//      }).filter(_.nonEmpty)}) )
+
+    val desc = regField.desc
+
+    val regFieldDescSer = RegFieldDescSer(
+      byteOffset = s"0x${byteOffset.toInt.toHexString}",
+      bitOffset = bitOffset,
+      bitWidth = width,
+      name = selectedRegFieldName,
+      desc = desc.map {_.desc}.getOrElse("None"),
+      group = desc.map {_.group.getOrElse("None")}.getOrElse("None"),
+      groupDesc = desc.map {_.groupDesc.getOrElse("None")}.getOrElse("None"),
+      accessType = desc.map {_.access.toString}.getOrElse("None"),
+      wrType = desc.map(_.wrType.toString).getOrElse("None"),
+      rdAction = desc.map(_.rdAction.toString).getOrElse("None"),
+      volatile = desc.map(_.volatile).getOrElse(false),
+      hasReset = desc.map {_.reset != None }.getOrElse(false),
+      resetValue = desc.map{_.reset.getOrElse(BigInt(0))}.getOrElse(BigInt(0)),
+      enumerations = map
+    )
+
+    regFieldDescSer
+  }
+
+
+  def anno(
+    rawModule: RawModule,
+    baseAddress: BigInt,
+    mapping: RegField.Map*): Seq[RegField.Map] = {
+
+    val moduleName = rawModule.name
+    val baseHex = s"0x${baseAddress.toInt.toHexString}"
+    val displayName = s"${moduleName}.${baseHex}"
+
+    val regFieldSers = mapping.flatMap {
+      case (byteOffset, seq) =>
+        seq.map(_.width).scanLeft(0)(_ + _).zip(seq).map { case (bitOffset, regField) =>
+          makeRegMappingSer(
+            rawModule,
+            moduleName,
+            baseAddress,
+            regField.width,
+            byteOffset,
+            bitOffset,
+            regField
+          )
+        }
+    }
+
+    val registersSer = RegistersSer(
+      displayName = moduleName,
+      deviceName = moduleName,
+      baseAddress = baseAddress,
+      regFields = regFieldSers // Seq[RegFieldSer]()
+    )
+
+    /* annotate the module with the registers */
+    annotate(new ChiselAnnotation { def toFirrtl = RegFieldDescMappingAnnotation(rawModule.toNamed, registersSer) })
+
+    mapping
+  }
+
+
+  def serialize(base: BigInt, name: String, mapping: RegField.Map*): String = {
+
+
+    val regDescs = mapping.flatMap { case (byte, seq) =>
+      seq.map(_.width).scanLeft(0)(_ + _).zip(seq).map { case (bit, f) =>
+        val anonName = s"unnamedRegField${byte.toHexString}_${bit}"
+        (f.desc.map{ _.name}.getOrElse(anonName)) -> f.toJson(byte, bit)
+      }
+    }
+
+    pretty(render(
+      ("peripheral" -> (
+        ("displayName" -> name) ~
+          ("baseAddress" -> s"0x${base.toInt.toHexString}") ~
+          ("regfields" -> regDescs)))))
+  }
+}
\ No newline at end of file
diff --git a/rocket/src/util/ClockGate.scala b/rocket/src/util/ClockGate.scala
new file mode 100644
index 000000000..043d33b8a
--- /dev/null
+++ b/rocket/src/util/ClockGate.scala
@@ -0,0 +1,53 @@
+// See LICENSE.SiFive for license details.
+
+package org.chipsalliance.rocket.util
+
+import chisel3._
+import chisel3.util.{HasBlackBoxResource, HasBlackBoxPath}
+
+import java.nio.file.{Files, Paths}
+
+case object ClockGateImpl extends Field[() => ClockGate](() => new EICG_wrapper)
+case object ClockGateModelFile extends Field[Option[String]](None)
+
+abstract class ClockGate extends BlackBox
+  with HasBlackBoxResource with HasBlackBoxPath {
+  val io = IO(new Bundle{
+    val in = Input(Clock())
+    val test_en = Input(Bool())
+    val en = Input(Bool())
+    val out = Output(Clock())
+  })
+
+  def addVerilogResource(vsrc: String): Unit = {
+    if (Files.exists(Paths.get(vsrc)))
+      addPath(vsrc)
+    else
+      addResource(vsrc)
+  }
+}
+
+object ClockGate {
+  def apply[T <: ClockGate](
+      in: Clock,
+      en: Bool,
+      name: Option[String] = None): Clock = {
+    val cg = Module(p(ClockGateImpl)())
+    name.foreach(cg.suggestName(_))
+    p(ClockGateModelFile).map(cg.addVerilogResource(_))
+
+    cg.io.in := in
+    cg.io.test_en := false.B
+    cg.io.en := en
+    cg.io.out
+  }
+
+  def apply[T <: ClockGate](
+      in: Clock,
+      en: Bool,
+      name: String): Clock =
+    apply(in, en, Some(name))
+}
+
+// behavioral model of Integrated Clock Gating cell
+class EICG_wrapper extends ClockGate
\ No newline at end of file
diff --git a/rocket/src/util/DescribedSRAM.scala b/rocket/src/util/DescribedSRAM.scala
new file mode 100644
index 000000000..c82ddc7e7
--- /dev/null
+++ b/rocket/src/util/DescribedSRAM.scala
@@ -0,0 +1,40 @@
+// See LICENSE.Berkeley for license details.
+// See LICENSE.SiFive for license details.
+
+package org.chipsalliance.rocket.util
+
+import chisel3.{Data, SyncReadMem, Vec}
+import chisel3.util.log2Ceil
+
+object DescribedSRAM {
+  def apply[T <: Data](
+    name: String,
+    desc: String,
+    size: BigInt, // depth
+    data: T
+  ): SyncReadMem[T] = {
+
+    val mem = SyncReadMem(size, data)
+
+    mem.suggestName(name)
+
+    val granWidth = data match {
+      case v: Vec[_] => v.head.getWidth
+      case d => d.getWidth
+    }
+
+    val uid = 0
+
+    Annotated.srams(
+      component = mem,
+      name = name,
+      address_width = log2Ceil(size),
+      data_width = data.getWidth,
+      depth = size,
+      description = desc,
+      write_mask_granularity = granWidth
+    )
+
+    mem
+  }
+}
\ No newline at end of file

From d7168cf0ad04a4dc37f53229b96f7337aa80eb72 Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Mon, 5 Jun 2023 15:10:22 +0800
Subject: [PATCH 26/32] TLB: resolve dependencies

---
 rocket/src/PTW.scala                |  36 +---
 rocket/src/util/Annotations.scala   | 298 ----------------------------
 rocket/src/util/ClockGate.scala     |   8 +-
 rocket/src/util/DescribedSRAM.scala |  10 -
 rocket/src/util/ECC.scala           | 233 ++++++++++++++++++++++
 rocket/src/util/Misc.scala          |  16 ++
 6 files changed, 263 insertions(+), 338 deletions(-)
 delete mode 100644 rocket/src/util/Annotations.scala
 create mode 100644 rocket/src/util/ECC.scala

diff --git a/rocket/src/PTW.scala b/rocket/src/PTW.scala
index bbcaa4363..d425eee4e 100644
--- a/rocket/src/PTW.scala
+++ b/rocket/src/PTW.scala
@@ -260,10 +260,13 @@ class PTW(
   nL2TLBWays: Int,
   hypervisorExtraAddrBits: Int,
   maxHypervisorExtraAddrBits: Int,
+  maxSVAddrBits: Int,
+  cacheBlockBytes: Int,
   customCSRsParam: CustomCSRs,
+  memSlaves: Seq[MemSlaveParameters],
   clockGate: Boolean,
   usingVM: Boolean,
-  usingHypervisor:Boolean
+  usingHypervisor: Boolean
 ) extends Module {
   val io = IO(new Bundle {
     /** to n TLB */
@@ -484,7 +487,7 @@ class PTW(
       name = "l2_tlb_ram",
       desc = "L2 TLB",
       size = nL2TLBSets,
-      data = Vec(nL2TLBWays, UInt(code.width(new L2TLBEntry(nL2TLBSets).getWidth).W))
+      data = Vec(nL2TLBWays, UInt(code.width(new L2TLBEntry(nL2TLBSets, maxSVAddrBits, pgIdxBits, ppnBits, usingHypervisor).getWidth).W))
     )
 
     val g = Reg(Vec(nL2TLBWays, UInt(nL2TLBSets.W)))
@@ -500,7 +503,7 @@ class PTW(
     r_l2_plru_way := (if (nL2TLBWays > 1) l2_plru.way(r_idx) else 0.U)
     // refill with r_pte(leaf pte)
     when (l2_refill && !invalidated) {
-      val entry = Wire(new L2TLBEntry(nL2TLBSets))
+      val entry = Wire(new L2TLBEntry(nL2TLBSets, maxSVAddrBits, pgIdxBits, ppnBits, usingHypervisor))
       entry.ppn := r_pte.ppn
       entry.d := r_pte.d
       entry.a := r_pte.a
@@ -545,7 +548,7 @@ class PTW(
     val s2_error = (0 until nL2TLBWays).map(way => s2_valid_vec(way) && s2_rdata(way).error).orR
     when (s2_valid && s2_error) { valid.foreach { _ := 0.U }}
     // decode
-    val s2_entry_vec = s2_rdata.map(_.uncorrected.asTypeOf(new L2TLBEntry(nL2TLBSets)))
+    val s2_entry_vec = s2_rdata.map(_.uncorrected.asTypeOf(new L2TLBEntry(nL2TLBSets, maxSVAddrBits, pgIdxBits, ppnBits, usingHypervisor)))
     val s2_hit_vec = (0 until nL2TLBWays).map(way => s2_valid_vec(way) && (r_tag === s2_entry_vec(way).tag))
     val s2_hit = s2_valid && s2_hit_vec.orR
     io.dpath.perf.l2miss := s2_valid && !(s2_hit_vec.orR)
@@ -597,10 +600,10 @@ class PTW(
   val pmaPgLevelHomogeneous = (0 until pgLevels) map { i =>
     val pgSize = BigInt(1) << (pgIdxBits + ((pgLevels - 1 - i) * pgLevelBits))
     if (pageGranularityPMPs && i == pgLevels - 1) {
-      require(TLBPageLookup.homogeneous(edge.manager.managers, pgSize), s"All memory regions must be $pgSize-byte aligned")
+      require(TLBPageLookup.homogeneous(memSlaves, pgSize), s"All memory regions must be $pgSize-byte aligned")
       true.B
     } else {
-      TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), pgSize)(r_pte.ppn << pgIdxBits).homogeneous
+      TLBPageLookup(memSlaves, xLen, cacheBlockBytes, pgSize)(r_pte.ppn << pgIdxBits).homogeneous
     }
   }
   val pmaHomogeneous = pmaPgLevelHomogeneous(count)
@@ -636,7 +639,7 @@ class PTW(
 
   // control state machine
   val next_state = WireDefault(state)
-  state := OptimizationBarrier(next_state)
+  state := next_state
   val do_switch = WireDefault(false.B)
 
   switch (state) {
@@ -818,7 +821,7 @@ class PTW(
   } // leaving gated-clock domain
 
   private def ccover(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) =
-    if (usingVM) property.cover(cond, s"PTW_$label", "MemorySystem;;" + desc)
+    if (usingVM) cover(cond, s"PTW_$label; MemorySystem;;" + desc)
 
   /** Relace PTE.ppn with ppn */
   private def makePTE(ppn: UInt, default: PTE) = {
@@ -836,20 +839,3 @@ class PTW(
     pte
   }
 }
-
-/** Mix-ins for constructing tiles that might have a PTW */
-trait CanHavePTW extends HasTileParameters with HasHellaCache { this: BaseTile =>
-  val module: CanHavePTWModule
-  var nPTWPorts = 1
-  nDCachePorts += usingPTW.toInt
-}
-
-trait CanHavePTWModule extends HasHellaCacheModule {
-  val outer: CanHavePTW
-  val ptwPorts = ListBuffer(outer.dcache.module.io.ptw)
-  val ptw = Module(new PTW(outer.nPTWPorts)(outer.dcache.node.edges.out(0), outer.p))
-  ptw.io.mem <> DontCare
-  if (outer.usingPTW) {
-    dcachePorts += ptw.io.mem
-  }
-}
diff --git a/rocket/src/util/Annotations.scala b/rocket/src/util/Annotations.scala
deleted file mode 100644
index f4ebaeb23..000000000
--- a/rocket/src/util/Annotations.scala
+++ /dev/null
@@ -1,298 +0,0 @@
-// See LICENSE.SiFive for license details.
-
-package org.chipsalliance.rocket.util
-
-import Chisel._
-import chisel3.internal.InstanceId
-import chisel3.experimental.{annotate, ChiselAnnotation}
-import chisel3.RawModule
-import firrtl.annotations._
-
-import org.json4s.JsonDSL._
-import org.json4s.jackson.JsonMethods.{pretty, render}
-
-/** Record a sram. */
-case class SRAMAnnotation(target: Named,
-  address_width: Int,
-  name: String,
-  data_width: Int,
-  depth: BigInt,
-  description: String,
-  write_mask_granularity: Int) extends SingleTargetAnnotation[Named] {
-  def duplicate(n: Named) = this.copy(n)
-}
-
-/** Record a set of interrupts. */
-case class InterruptsPortAnnotation(target: Named, name: String, interruptIndexes: Seq[Int]) extends SingleTargetAnnotation[Named] {
-  def duplicate(n: Named) = this.copy(n)
-}
-
-/** Record a case class that was used to parameterize this target. */
-case class GlobalConstantsAnnotation(target: Named, xLen: Int) extends SingleTargetAnnotation[Named] {
-  def duplicate(n: Named) = this.copy(n)
-}
-
-case class GlobalConstantsChiselAnnotation[T <: Product](target: InstanceId, xLen: Int) extends ChiselAnnotation {
-  def toFirrtl = GlobalConstantsAnnotation(target.toNamed, xLen)
-}
-
-/** Record a case class that was used to parameterize this target. */
-case class ParamsAnnotation(target: Named, paramsClassName: String, params: Map[String,Any]) extends SingleTargetAnnotation[Named] {
-  def duplicate(n: Named) = this.copy(n)
-}
-
-case class ParamsChiselAnnotation[T <: Product](target: InstanceId, params: T) extends ChiselAnnotation {
-  private val paramMap = params.getClass.getDeclaredFields.map(_.getName).zip(params.productIterator).toMap
-  def toFirrtl = ParamsAnnotation(target.toNamed, params.getClass.getName, paramMap)
-}
-
-/** Record an address map. */
-case class AddressMapAnnotation(target: Named, mapping: Seq[AddressMapEntry], label: String) extends SingleTargetAnnotation[Named] {
-  def duplicate(n: Named) = this.copy(n)
-
-  def toUVM: String =
-    s"// Instance Name: ${target.serialize}\n" +
-      mapping.map(_.range.toUVM).mkString("\n")
-
-  def toJSON: String =
-    s"""{\n  "${label}":  [\n""" +
-      mapping.map(_.range.toJSON).mkString(",\n") +
-      "\n  ]\n}"
-}
-
-/** Marks this module as a candidate for register retiming */
-case class RetimeModuleAnnotation(target: ModuleName) extends SingleTargetAnnotation[ModuleName] {
-  def duplicate(n: ModuleName) = this.copy(n)
-}
-
-/** Annotation capturing information about port slave devices. */
-case class SlaveAddressMapChiselAnnotation(
-    target: InstanceId,
-    addresses: Seq[AddressSet],
-    perms: ResourcePermissions) extends ChiselAnnotation {
-  private val range = AddressRange.fromSets(addresses)
-  def toFirrtl = AddressMapAnnotation(
-    target = target.toNamed,
-    mapping = range.map { r => AddressMapEntry(r, perms, Nil) },
-    label = "slaves")
-}
-
-/** Record information about a top-level port of the design */
-case class TopLevelPortAnnotation(
-  target: ComponentName,
-  protocol: String,
-  tags: Seq[String],
-  names: Seq[String],
-  width: Int,
-  address: Seq[AddressSet]) extends SingleTargetAnnotation[ComponentName] {
-  def duplicate(n: ComponentName): TopLevelPortAnnotation = this.copy(n)
-}
-
-/** Record the resetVector. */
-case class ResetVectorAnnotation(target: Named, resetVec: BigInt) extends SingleTargetAnnotation[Named] {
-  def duplicate(n: Named): ResetVectorAnnotation = this.copy(n)
-}
-
-/** Helper object containing methods for applying annotations to targets */
-object Annotated {
-
-  def srams(
-    component: InstanceId,
-    name: String,
-    address_width: Int,
-    data_width: Int,
-    depth: BigInt,
-    description: String,
-    write_mask_granularity: Int): Unit = {
-    annotate(new ChiselAnnotation {def toFirrtl: Annotation = SRAMAnnotation(
-      component.toNamed,
-      address_width = address_width,
-      name = name,
-      data_width = data_width,
-      depth = depth,
-      description = description,
-      write_mask_granularity = write_mask_granularity
-    )})}
-
-  def interrupts(component: InstanceId, name: String, interrupts: Seq[Int]): Unit = {
-    annotate(new ChiselAnnotation {def toFirrtl: Annotation = InterruptsPortAnnotation(
-      component.toNamed,
-      name,
-      interrupts
-    )})
-  }
-
-  def resetVector(component: InstanceId, resetVec: BigInt): Unit = {
-    annotate(new ChiselAnnotation {def toFirrtl: Annotation = ResetVectorAnnotation(component.toNamed, resetVec)})
-  }
-
-  def constants(component: InstanceId, xLen: Int): Unit = {
-    annotate(GlobalConstantsChiselAnnotation(component, xLen ))
-  }
-
-  def params[T <: Product](component: InstanceId, params: T): T = {
-    annotate(ParamsChiselAnnotation(component, params))
-    params
-  }
-
-  def addressMapping(component: InstanceId, mapping: Seq[AddressMapEntry]): Seq[AddressMapEntry] = {
-    annotate(new ChiselAnnotation { def toFirrtl = AddressMapAnnotation(component.toNamed, mapping, "mapping") })
-    mapping
-  }
-
-  def port[T <: Data](
-    data: T,
-    protocol: String,
-    tags: Seq[String],
-    names: Seq[String],
-    width: Int,
-    address: Seq[AddressSet] = Nil): T = {
-    annotate(new ChiselAnnotation { def toFirrtl = TopLevelPortAnnotation(data.toNamed, protocol, tags, names, width, address) })
-    data
-  }
-}
-
-/** Mix this into a Module class or instance to mark its ports as untouchable */
-trait DontTouch { self: RawModule =>
-  // TODO: replace this with an implicit class from UserModule that uses getPorts
-  // TODO: this is a workaround for firrtl #756
-  def dontTouch(data: Data): Unit = data match {
-     case agg: Aggregate => agg.getElements.foreach(dontTouch)
-     case elt: Element => chisel3.dontTouch(elt)
-  }
-
-  /** Marks every port as don't touch
-    *
-    * @note This method can only be called after the Module has been fully constructed
-    *   (after Module(...))
-    */
-  def dontTouchPorts(): this.type = {
-    self.getModulePorts.foreach(dontTouch(_))
-    self
-  }
-
-  def dontTouchPortsExcept(f: Data => Boolean): this.type = {
-    self.getModulePorts.filterNot(f).foreach(dontTouch(_))
-    self
-  }
-}
-
-/** Mix this into a Module class or instance to mark it for register retiming */
-trait ShouldBeRetimed { self: RawModule =>
-  chisel3.experimental.annotate(new ChiselAnnotation { def toFirrtl: RetimeModuleAnnotation = RetimeModuleAnnotation(self.toNamed) })
-}
-
-case class RegFieldDescMappingAnnotation(
-  target: ModuleName,
-  regMappingSer: RegistersSer) extends SingleTargetAnnotation[ModuleName] {
-  def duplicate(n: ModuleName): RegFieldDescMappingAnnotation = this.copy(target = n)
-}
-
-object InterruptsPortAnnotation {
-  val GLOBAL_EXTERNAL_INTERRUPTS = "global-external-interrupts"
-  val LOCAL_EXTERNAL_INTERRUPTS = "local-external-interrupts"
-  val LOCAL_INTERRUPTS_STARTING_NUMBER = 16 /* TODO the ISA specfication reserves the first 12 interrupts but
-  somewhere in DTS 16 is used as the starting number. */
-
-}
-
-object GenRegDescsAnno {
-
-  def makeRegMappingSer(
-    rawModule: RawModule,
-    moduleName: String,
-    baseAddress: BigInt,
-    width: Int,
-    byteOffset: Int,
-    bitOffset: Int,
-    regField: RegField): RegFieldDescSer = {
-
-    val anonRegFieldName = s"unnamedRegField${byteOffset.toHexString}_${bitOffset}"
-    val selectedRegFieldName = regField.desc.map(_.name).getOrElse(anonRegFieldName)
-
-    val map = Map[BigInt, (String, String)]() // TODO
-
-// TODO: enumerations will be handled in upcoming PR
-//    ("enumerations" -> desc.map {d =>
-//      Option(d.enumerations.map { case (key, (name, edesc)) =>
-//        (("value" -> key) ~ ("name" -> name) ~ ("description" -> edesc))
-//      }).filter(_.nonEmpty)}) )
-
-    val desc = regField.desc
-
-    val regFieldDescSer = RegFieldDescSer(
-      byteOffset = s"0x${byteOffset.toInt.toHexString}",
-      bitOffset = bitOffset,
-      bitWidth = width,
-      name = selectedRegFieldName,
-      desc = desc.map {_.desc}.getOrElse("None"),
-      group = desc.map {_.group.getOrElse("None")}.getOrElse("None"),
-      groupDesc = desc.map {_.groupDesc.getOrElse("None")}.getOrElse("None"),
-      accessType = desc.map {_.access.toString}.getOrElse("None"),
-      wrType = desc.map(_.wrType.toString).getOrElse("None"),
-      rdAction = desc.map(_.rdAction.toString).getOrElse("None"),
-      volatile = desc.map(_.volatile).getOrElse(false),
-      hasReset = desc.map {_.reset != None }.getOrElse(false),
-      resetValue = desc.map{_.reset.getOrElse(BigInt(0))}.getOrElse(BigInt(0)),
-      enumerations = map
-    )
-
-    regFieldDescSer
-  }
-
-
-  def anno(
-    rawModule: RawModule,
-    baseAddress: BigInt,
-    mapping: RegField.Map*): Seq[RegField.Map] = {
-
-    val moduleName = rawModule.name
-    val baseHex = s"0x${baseAddress.toInt.toHexString}"
-    val displayName = s"${moduleName}.${baseHex}"
-
-    val regFieldSers = mapping.flatMap {
-      case (byteOffset, seq) =>
-        seq.map(_.width).scanLeft(0)(_ + _).zip(seq).map { case (bitOffset, regField) =>
-          makeRegMappingSer(
-            rawModule,
-            moduleName,
-            baseAddress,
-            regField.width,
-            byteOffset,
-            bitOffset,
-            regField
-          )
-        }
-    }
-
-    val registersSer = RegistersSer(
-      displayName = moduleName,
-      deviceName = moduleName,
-      baseAddress = baseAddress,
-      regFields = regFieldSers // Seq[RegFieldSer]()
-    )
-
-    /* annotate the module with the registers */
-    annotate(new ChiselAnnotation { def toFirrtl = RegFieldDescMappingAnnotation(rawModule.toNamed, registersSer) })
-
-    mapping
-  }
-
-
-  def serialize(base: BigInt, name: String, mapping: RegField.Map*): String = {
-
-
-    val regDescs = mapping.flatMap { case (byte, seq) =>
-      seq.map(_.width).scanLeft(0)(_ + _).zip(seq).map { case (bit, f) =>
-        val anonName = s"unnamedRegField${byte.toHexString}_${bit}"
-        (f.desc.map{ _.name}.getOrElse(anonName)) -> f.toJson(byte, bit)
-      }
-    }
-
-    pretty(render(
-      ("peripheral" -> (
-        ("displayName" -> name) ~
-          ("baseAddress" -> s"0x${base.toInt.toHexString}") ~
-          ("regfields" -> regDescs)))))
-  }
-}
\ No newline at end of file
diff --git a/rocket/src/util/ClockGate.scala b/rocket/src/util/ClockGate.scala
index 043d33b8a..143c6e327 100644
--- a/rocket/src/util/ClockGate.scala
+++ b/rocket/src/util/ClockGate.scala
@@ -7,9 +7,6 @@ import chisel3.util.{HasBlackBoxResource, HasBlackBoxPath}
 
 import java.nio.file.{Files, Paths}
 
-case object ClockGateImpl extends Field[() => ClockGate](() => new EICG_wrapper)
-case object ClockGateModelFile extends Field[Option[String]](None)
-
 abstract class ClockGate extends BlackBox
   with HasBlackBoxResource with HasBlackBoxPath {
   val io = IO(new Bundle{
@@ -31,10 +28,11 @@ object ClockGate {
   def apply[T <: ClockGate](
       in: Clock,
       en: Bool,
+      modelFile: Option[String],
       name: Option[String] = None): Clock = {
-    val cg = Module(p(ClockGateImpl)())
+    val cg = Module(new EICG_wrapper)
     name.foreach(cg.suggestName(_))
-    p(ClockGateModelFile).map(cg.addVerilogResource(_))
+    modelFile.map(cg.addVerilogResource(_))
 
     cg.io.in := in
     cg.io.test_en := false.B
diff --git a/rocket/src/util/DescribedSRAM.scala b/rocket/src/util/DescribedSRAM.scala
index c82ddc7e7..535781a7f 100644
--- a/rocket/src/util/DescribedSRAM.scala
+++ b/rocket/src/util/DescribedSRAM.scala
@@ -25,16 +25,6 @@ object DescribedSRAM {
 
     val uid = 0
 
-    Annotated.srams(
-      component = mem,
-      name = name,
-      address_width = log2Ceil(size),
-      data_width = data.getWidth,
-      depth = size,
-      description = desc,
-      write_mask_granularity = granWidth
-    )
-
     mem
   }
 }
\ No newline at end of file
diff --git a/rocket/src/util/ECC.scala b/rocket/src/util/ECC.scala
new file mode 100644
index 000000000..1b462cdf4
--- /dev/null
+++ b/rocket/src/util/ECC.scala
@@ -0,0 +1,233 @@
+// See LICENSE.Berkeley for license details.
+
+package org.chipsalliance.rocket.util
+
+import chisel3._
+import chisel3.util._
+import chisel3.util.random.LFSR
+
+abstract class Decoding
+{
+  def uncorrected: UInt
+  def corrected: UInt
+  def correctable: Bool
+  def uncorrectable: Bool // If true, correctable should be ignored
+  def error = correctable || uncorrectable
+}
+
+abstract class Code
+{
+  def canDetect: Boolean
+  def canCorrect: Boolean
+
+  def width(w0: Int): Int
+
+  /** Takes the unencoded width and returns a list of indices indicating which
+    * bits of the encoded value will be used for ecc
+    */
+  def eccIndices(width: Int): Seq[Int]
+
+  /** Encode x to a codeword suitable for decode.
+   *  If poison is true, the decoded value will report uncorrectable
+   *  error despite uncorrected == corrected == x.
+   */
+  def encode(x: UInt, poison: Bool = false.B): UInt
+  def decode(x: UInt): Decoding
+
+  /** Copy the bits in x to the right bit positions in an encoded word,
+   *  so that x === decode(swizzle(x)).uncorrected; but don't generate
+   *  the other code bits, so decode(swizzle(x)).error might be true.
+   *  For codes for which this operation is not trivial, throw an
+   *  UnsupportedOperationException.  */
+  def swizzle(x: UInt): UInt
+}
+
+class IdentityCode extends Code
+{
+  def canDetect = false
+  def canCorrect = false
+
+  def width(w0: Int) = w0
+  def eccIndices(width: Int) = Seq.empty[Int]
+  def encode(x: UInt, poison: Bool = false.B) = {
+    require (poison.isLit && poison.litValue == 0, "IdentityCode can not be poisoned")
+    x
+  }
+  def swizzle(x: UInt) = x
+  def decode(y: UInt) = new Decoding {
+    def uncorrected = y
+    def corrected = y
+    def correctable = false.B
+    def uncorrectable = false.B
+  }
+}
+
+class ParityCode extends Code
+{
+  def canDetect = true
+  def canCorrect = false
+
+  def width(w0: Int) = w0+1
+  def eccIndices(w0: Int) = Seq(w0)
+  def encode(x: UInt, poison: Bool = false.B) = Cat(x.xorR ^ poison, x)
+  def swizzle(x: UInt) = Cat(false.B, x)
+  def decode(y: UInt) = new Decoding {
+    val uncorrected = y(y.getWidth-2,0)
+    val corrected = uncorrected
+    val correctable = false.B
+    val uncorrectable = y.xorR
+  }
+}
+
+class SECCode extends Code
+{
+  def canDetect = true
+  def canCorrect = true
+
+  // SEC codes may or may not be poisonous depending on the length
+  // If the code is perfect, every non-codeword is correctable
+  def poisonous(n: Int) = !isPow2(n+1)
+
+  def width(k: Int) = {
+    val m = log2Floor(k) + 1
+    k + m + (if((1 << m) < m+k+1) 1 else 0)
+  }
+
+  def eccIndices(w0: Int) = {
+    (0 until width(w0)).collect {
+      case i if i >= w0 => i
+    }
+  }
+
+  def swizzle(x: UInt) = {
+    val k = x.getWidth
+    val n = width(k)
+    Cat(0.U((n-k).W), x)
+  }
+
+  // An (n=16, k=11) Hamming code is naturally encoded as:
+  //   PPxPxxxPxxxxxxxP where P are parity bits and x are data
+  //   Indexes typically start at 1, because then the P are on powers of two
+  // In systematic coding, you put all the data in the front:
+  //   xxxxxxxxxxxPPPPP
+  //   Indexes typically start at 0, because Computer Science
+  // For sanity when reading SRAMs, you want systematic form.
+
+  private def impl(n: Int, k: Int) = {
+    require (n >= 3 && k >= 1 && !isPow2(n))
+    val hamm2sys = IndexedSeq.tabulate(n+1) { i =>
+      if (i == 0) {
+        n /* undefined */
+      } else if (isPow2(i)) {
+        k + log2Ceil(i)
+      } else {
+        i - 1 - log2Ceil(i)
+      }
+    }
+    val sys2hamm = hamm2sys.zipWithIndex.sortBy(_._1).map(_._2).toIndexedSeq
+    def syndrome(j: Int) = {
+      val bit = 1 << j
+      ("b" + Seq.tabulate(n) { i =>
+        if ((sys2hamm(i) & bit) != 0) "1" else "0"
+      }.reverse.mkString).U
+    }
+    (hamm2sys, sys2hamm, syndrome _)
+  }
+
+  def encode(x: UInt, poison: Bool = false.B) = {
+    val k = x.getWidth
+    val n = width(k)
+    val (_, _, syndrome) = impl(n, k)
+
+    require ((poison.isLit && poison.litValue == 0) || poisonous(n), s"SEC code of length ${n} cannot be poisoned")
+
+    /* By setting the entire syndrome on poison, the corrected bit falls off the end of the code */
+    val syndromeUInt = VecInit.tabulate(n-k) { j => (syndrome(j)(k-1, 0) & x).xorR ^ poison }.asUInt
+    Cat(syndromeUInt, x)
+  }
+
+  def decode(y: UInt) = new Decoding {
+    val n = y.getWidth
+    val k = n - log2Ceil(n)
+    val (_, sys2hamm, syndrome) = impl(n, k)
+
+    val syndromeUInt = VecInit.tabulate(n-k) { j => (syndrome(j) & y).xorR }.asUInt
+
+    val hammBadBitOH = UIntToOH(syndromeUInt, n+1)
+    val sysBadBitOH = VecInit.tabulate(k) { i => hammBadBitOH(sys2hamm(i)) }.asUInt
+
+    val uncorrected = y(k-1, 0)
+    val corrected = uncorrected ^ sysBadBitOH
+    val correctable = syndromeUInt.orR
+    val uncorrectable = if (poisonous(n)) { syndromeUInt > n.U } else { false.B }
+  }
+}
+
+class SECDEDCode extends Code
+{
+  def canDetect = true
+  def canCorrect = true
+
+  private val sec = new SECCode
+  private val par = new ParityCode
+
+  def width(k: Int) = sec.width(k)+1
+  def eccIndices(w0: Int) = {
+    (0 until width(w0)).collect {
+      case i if i >= w0 => i
+    }
+  }
+  def encode(x: UInt, poison: Bool = false.B) = {
+    // toggling two bits ensures the error is uncorrectable
+    // to ensure corrected == uncorrected, we pick one redundant
+    // bit from SEC (the highest); correcting it does not affect
+    // corrected == uncorrected. the second toggled bit is the
+    // parity bit, which also does not appear in the decoding
+    val toggle_lo = Cat(poison.asUInt, poison.asUInt)
+    val toggle_hi = toggle_lo << (sec.width(x.getWidth)-1)
+    par.encode(sec.encode(x)) ^ toggle_hi
+  }
+  def swizzle(x: UInt) = par.swizzle(sec.swizzle(x))
+  def decode(x: UInt) = new Decoding {
+    val secdec = sec.decode(x(x.getWidth-2,0))
+    val pardec = par.decode(x)
+
+    val uncorrected = secdec.uncorrected
+    val corrected = secdec.corrected
+    val correctable = pardec.uncorrectable
+    val uncorrectable = !pardec.uncorrectable && secdec.correctable
+  }
+}
+
+object ErrGen
+{
+  // generate a 1-bit error with approximate probability 2^-f
+  def apply(width: Int, f: Int): UInt = {
+    require(width > 0 && f >= 0 && log2Up(width) + f <= 16)
+    UIntToOH(LFSR(16)(log2Up(width)+f-1,0))(width-1,0)
+  }
+  def apply(x: UInt, f: Int): UInt = x ^ apply(x.getWidth, f)
+}
+
+trait CanHaveErrors extends Bundle {
+  val correctable: Option[ValidIO[UInt]]
+  val uncorrectable: Option[ValidIO[UInt]]
+}
+
+case class ECCParams(
+  bytes: Int = 1,
+  code: Code = new IdentityCode,
+  notifyErrors: Boolean = false,
+)
+
+object Code {
+  def fromString(s: Option[String]): Code = fromString(s.getOrElse("none"))
+  def fromString(s: String): Code = s.toLowerCase match {
+    case "none" => new IdentityCode
+    case "identity" => new IdentityCode
+    case "parity" => new ParityCode
+    case "sec" => new SECCode
+    case "secded" => new SECDEDCode
+    case _ => throw new IllegalArgumentException("Unknown ECC type")
+  }
+}
\ No newline at end of file
diff --git a/rocket/src/util/Misc.scala b/rocket/src/util/Misc.scala
index 46171b55f..c44773b54 100644
--- a/rocket/src/util/Misc.scala
+++ b/rocket/src/util/Misc.scala
@@ -40,4 +40,20 @@ object Random
   private def randomizer = LFSR(16)
   private def partition(value: UInt, slices: Int) =
     Seq.tabulate(slices)(i => value < UInt((((i + 1) << value.getWidth) / slices).W))
+}
+
+object Split
+{
+  def apply(x: UInt, n0: Int) = {
+    val w = x.getWidth
+    (x.extract(w-1,n0), x.extract(n0-1,0))
+  }
+  def apply(x: UInt, n1: Int, n0: Int) = {
+    val w = x.getWidth
+    (x.extract(w-1,n1), x.extract(n1-1,n0), x.extract(n0-1,0))
+  }
+  def apply(x: UInt, n2: Int, n1: Int, n0: Int) = {
+    val w = x.getWidth
+    (x.extract(w-1,n2), x.extract(n2-1,n1), x.extract(n1-1,n0), x.extract(n0-1,0))
+  }
 }
\ No newline at end of file

From 2d7cadc4ed0fe634f3158d865937138b8064b1f1 Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Mon, 5 Jun 2023 16:16:30 +0800
Subject: [PATCH 27/32] TLB: migrate HellaCache

---
 rocket/src/HellaCache.scala | 175 ++++++++++++++++++++++++++++++++++++
 rocket/src/PTW.scala        |  33 ++++++-
 rocket/src/TLB.scala        |   4 +-
 3 files changed, 207 insertions(+), 5 deletions(-)
 create mode 100644 rocket/src/HellaCache.scala

diff --git a/rocket/src/HellaCache.scala b/rocket/src/HellaCache.scala
new file mode 100644
index 000000000..a243ddc4b
--- /dev/null
+++ b/rocket/src/HellaCache.scala
@@ -0,0 +1,175 @@
+// See LICENSE.SiFive for license details.
+// See LICENSE.Berkeley for license details.
+
+package org.chipsalliance.rocket
+
+import chisel3._
+import chisel3.util.{isPow2,log2Ceil,log2Up,Decoupled,Valid}
+import chisel3.dontTouch
+import scala.collection.mutable.ListBuffer
+import org.chipsalliance.rocket.util._
+import org.chipsalliance.rocket.MemoryOpConstants._
+
+class HellaCacheReq(
+  xLen: Int,
+  coreDataBits: Int,
+  coreDataBytes: Int,
+  val subWordBits: Int,
+  cacheBlockBytes: Int,
+  cacheDataBeats: Int,
+  cacheDataBits: Int,
+  dcacheReqTagBits: Int,
+  dcacheArbPorts: Int,
+  untagBits: Int,
+  blockOffBits: Int,
+  rowBits: Int,
+  coreMaxAddrBits: Int,
+  pgIdxBits: Int,
+  val lrscCycles: Int, // ISA requires 16-insn LRSC sequences to succeed
+  nWays: Int,
+  nMMIOs: Int,
+  dataScratchpadBytes: Int,
+  dataECCBytes: Int,
+  dataCode: Code,
+  usingDataScratchpad: Boolean,
+  usingVM: Boolean
+) extends Bundle {
+  def wordBits = coreDataBits
+  def wordBytes = coreDataBytes
+  def subWordBytes = subWordBits / 8
+  def wordOffBits = log2Up(wordBytes)
+  def beatBytes = cacheBlockBytes / cacheDataBeats
+  def beatWords = beatBytes / wordBytes
+  def beatOffBits = log2Up(beatBytes)
+  def idxMSB = untagBits-1
+  def idxLSB = blockOffBits
+  def offsetmsb = idxLSB-1
+  def offsetlsb = wordOffBits
+  def rowWords = rowBits/wordBits
+  def doNarrowRead = coreDataBits * nWays % rowBits == 0
+  def eccBytes = dataECCBytes
+  val eccBits = dataECCBytes * 8
+  val encBits = dataCode.width(eccBits)
+  val encWordBits = encBits * (wordBits / eccBits)
+  def encDataBits = dataCode.width(coreDataBits) // NBDCache only
+  def encRowBits = encDataBits*rowWords
+  def lrscBackoff = 3 // disallow LRSC reacquisition briefly
+  def blockProbeAfterGrantCycles = 8 // give the processor some time to issue a request after a grant
+  def nIOMSHRs = nMMIOs
+  def maxUncachedInFlight = nMMIOs
+  def dataScratchpadSize = dataScratchpadBytes
+
+  require(rowBits >= coreDataBits, s"rowBits($rowBits) < coreDataBits($coreDataBits)")
+  if (!usingDataScratchpad)
+    require(rowBits == cacheDataBits, s"rowBits($rowBits) != cacheDataBits($cacheDataBits)")
+  // would need offset addr for puts if data width < xlen
+  require(xLen <= cacheDataBits, s"xLen($xLen) > cacheDataBits($cacheDataBits)")
+
+  val phys = Bool()
+  val no_alloc = Bool()
+  val no_xcpt = Bool()
+
+  val addr = UInt(coreMaxAddrBits.W)
+  val idx  = Option.when(usingVM && untagBits > pgIdxBits)(UInt(coreMaxAddrBits.W))
+  val tag  = UInt((dcacheReqTagBits + log2Ceil(dcacheArbPorts)).W)
+  val cmd  = UInt(M_SZ.W)
+  val size = UInt(log2Ceil(coreDataBytes.log2 + 1).W)
+  val signed = Bool()
+  val dprv = UInt(PRV.SZ.W)
+  val dv = Bool()
+}
+
+class HellaCacheWriteData(coreDataBits: Int, coreDataBytes: Int) extends Bundle {
+  val data = UInt(coreDataBits.W)
+  val mask = UInt(coreDataBytes.W)
+}
+
+class HellaCacheResp(coreDataBits: Int, coreDataBytes: Int) extends Bundle {
+  val replay = Bool()
+  val has_data = Bool()
+  val data_word_bypass = UInt(coreDataBits.W)
+  val data_raw = UInt(coreDataBits.W)
+  val store_data = UInt(coreDataBits.W)
+  val data = UInt(coreDataBits.W)
+  val mask = UInt(coreDataBytes.W)
+}
+
+class AlignmentExceptions extends Bundle {
+  val ld = Bool()
+  val st = Bool()
+}
+
+class HellaCacheExceptions extends Bundle {
+  val ma = new AlignmentExceptions
+  val pf = new AlignmentExceptions
+  val gf = new AlignmentExceptions
+  val ae = new AlignmentExceptions
+}
+
+class HellaCachePerfEvents extends Bundle {
+  val acquire = Bool()
+  val release = Bool()
+  val grant = Bool()
+  val tlbMiss = Bool()
+  val blocked = Bool()
+  val canAcceptStoreThenLoad = Bool()
+  val canAcceptStoreThenRMW = Bool()
+  val canAcceptLoadThenLoad = Bool()
+  val storeBufferEmptyAfterLoad = Bool()
+  val storeBufferEmptyAfterStore = Bool()
+}
+
+// interface between D$ and processor/DTLB
+class HellaCacheIO(
+  paddrBits: Int,
+  vaddrBitsExtended: Int,
+  separateUncachedResp: Boolean,
+  xLen: Int,
+  coreDataBits: Int,
+  coreDataBytes: Int,
+  subWordBits: Int,
+  cacheBlockBytes: Int,
+  cacheDataBeats: Int,
+  cacheDataBits: Int,
+  dcacheReqTagBits: Int,
+  dcacheArbPorts: Int,
+  untagBits: Int,
+  blockOffBits: Int,
+  rowBits: Int,
+  coreMaxAddrBits: Int,
+  pgIdxBits: Int,
+  lrscCycles: Int,
+  nWays: Int,
+  nMMIOs: Int,
+  dataScratchpadBytes: Int,
+  dataECCBytes: Int,
+  dataCode: Code,
+  usingDataScratchpad: Boolean,
+  usingVM: Boolean
+) extends Bundle {
+  val req = Decoupled(new HellaCacheReq(
+    xLen, coreDataBits, coreDataBytes, subWordBits, cacheBlockBytes, cacheDataBeats,
+    cacheDataBits, dcacheReqTagBits, dcacheArbPorts, untagBits, blockOffBits,
+    rowBits, coreMaxAddrBits, pgIdxBits, lrscCycles, nWays, nMMIOs, dataScratchpadBytes,
+    dataECCBytes, dataCode, usingDataScratchpad, usingVM
+  ))
+  val s1_kill = Output(Bool()) // kill previous cycle's req
+  val s1_data = Output(new HellaCacheWriteData(coreDataBits, coreDataBytes)) // data for previous cycle's req
+  val s2_nack = Input(Bool()) // req from two cycles ago is rejected
+  val s2_nack_cause_raw = Input(Bool()) // reason for nack is store-load RAW hazard (performance hint)
+  val s2_kill = Output(Bool()) // kill req from two cycles ago
+  val s2_uncached = Input(Bool()) // advisory signal that the access is MMIO
+  val s2_paddr = Input(UInt(paddrBits.W)) // translated address
+
+  val resp = Flipped(Valid(new HellaCacheResp(coreDataBits, coreDataBytes)))
+  val replay_next = Input(Bool())
+  val s2_xcpt = Input(new HellaCacheExceptions)
+  val s2_gpa = Input(UInt(vaddrBitsExtended.W))
+  val s2_gpa_is_pte = Input(Bool())
+  val uncached_resp = Option.when(separateUncachedResp)(Flipped(Decoupled(new HellaCacheResp(coreDataBits, coreDataBytes))))
+  val ordered = Input(Bool())
+  val perf = Input(new HellaCachePerfEvents())
+
+  val keep_clock_enabled = Output(Bool()) // should D$ avoid clock-gating itself?
+  val clock_enabled = Input(Bool()) // is D$ currently being clocked?
+}
\ No newline at end of file
diff --git a/rocket/src/PTW.scala b/rocket/src/PTW.scala
index d425eee4e..b49958586 100644
--- a/rocket/src/PTW.scala
+++ b/rocket/src/PTW.scala
@@ -8,6 +8,7 @@ import chisel3.util.{Arbiter, Cat, Decoupled, Enum, Mux1H, OHToUInt, PopCount, P
 import chisel3.withClock
 import chisel3.internal.sourceinfo.SourceInfo
 import org.chipsalliance.rocket.util._
+import org.chipsalliance.rocket.MemoryOpConstants._
 
 import scala.collection.mutable.ListBuffer
 
@@ -251,6 +252,7 @@ class PTW(
   maxPAddrBits: Int,
   pgIdxBits: Int,
   vaddrBits: Int,
+  vaddrBitsExtended: Int,
   paddrBits: Int,
   asIdBits: Int,
   pmpGranularity: Int,
@@ -262,11 +264,30 @@ class PTW(
   maxHypervisorExtraAddrBits: Int,
   maxSVAddrBits: Int,
   cacheBlockBytes: Int,
+  cacheDataBeats: Int,
+  cacheDataBits: Int,
+  coreDataBits: Int,
+  coreDataBytes: Int,
+  subWordBits: Int,
+  dcacheReqTagBits: Int,
+  dcacheArbPorts: Int,
+  untagBits: Int,
+  blockOffBits: Int,
+  rowBits: Int,
+  coreMaxAddrBits: Int,
+  lrscCycles: Int,
+  nWays: Int,
+  nMMIOs: Int,
+  dataScratchpadBytes: Int,
+  dataECCBytes: Int,
+  dataCode: Code,
   customCSRsParam: CustomCSRs,
   memSlaves: Seq[MemSlaveParameters],
   clockGate: Boolean,
   usingVM: Boolean,
-  usingHypervisor: Boolean
+  usingHypervisor: Boolean,
+  usingDataScratchpad: Boolean,
+  separateUncachedResp: Boolean
 ) extends Module {
   val io = IO(new Bundle {
     /** to n TLB */
@@ -278,7 +299,13 @@ class PTW(
       )
     ))
     /** to HellaCache */
-    val mem = new HellaCacheIO
+    val mem = new HellaCacheIO(
+      paddrBits, vaddrBitsExtended, separateUncachedResp,
+      xLen, coreDataBits, coreDataBytes, subWordBits, cacheBlockBytes, cacheDataBeats,
+      cacheDataBits, dcacheReqTagBits, dcacheArbPorts, untagBits, blockOffBits,
+      rowBits, coreMaxAddrBits, pgIdxBits, lrscCycles, nWays, nMMIOs, dataScratchpadBytes,
+      dataECCBytes, dataCode, usingDataScratchpad, usingVM
+    )
     /** to Core
       *
       * contains CSRs info and performance statistics
@@ -362,7 +389,7 @@ class PTW(
   }
   // construct pte from mem.resp
   val (pte, invalid_paddr) = {
-    val tmp = mem_resp_data.asTypeOf(new PTE())
+    val tmp = mem_resp_data.asTypeOf(new PTE)
     val res = WireDefault(tmp)
     res.ppn := Mux(do_both_stages && !stage2, tmp.ppn(vpnBits.min(tmp.ppn.getWidth)-1, 0), tmp.ppn(ppnBits-1, 0))
     when (tmp.r || tmp.w || tmp.x) {
diff --git a/rocket/src/TLB.scala b/rocket/src/TLB.scala
index 9295886c2..bc060bc86 100644
--- a/rocket/src/TLB.scala
+++ b/rocket/src/TLB.scala
@@ -198,7 +198,7 @@ class TLBEntry(
   def insert(vpn: UInt, virtual: Bool, level: UInt, entry: TLBEntryData): Unit = {
     this.tag_vpn := vpn
     this.tag_v := virtual
-    this.level := level(log2Ceil(pgLevels - superpageOnly.B) - 1, 0)
+    this.level := level(log2Ceil(pgLevels - superpageOnly.B.litValue.toInt) - 1, 0)
 
     val idx = sectorIdx(vpn)
     valid(idx) := true.B
@@ -573,7 +573,7 @@ class TLB(
     val minVAddrBits = pgIdxBits + minPgLevels * pgLevelBits + extraBits
     VecInit(Seq.range(0, nPgLevelChoices).map {
       i =>
-        val mask = ((BigInt(1) << vaddrBitsExtended) - (BigInt(1) << (minVAddrBits + i * pgLevelBits - signed.B))).U
+        val mask = ((BigInt(1) << vaddrBitsExtended) - (BigInt(1) << (minVAddrBits + i * pgLevelBits - signed.B.litValue.toInt))).U
         val maskedVAddr = io.req.bits.vaddr & mask
         additionalPgLevels === i.U && !(maskedVAddr === 0.U || signed.B && maskedVAddr === mask)
     }).asUInt.orR

From 855702879d050c224df12153976ea881fc57c72e Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Tue, 6 Jun 2023 14:15:03 +0800
Subject: [PATCH 28/32] TLB: restore diplomatic

---
 diplomatic/src/rocket/CSR.scala            |   1 +
 diplomatic/src/rocket/Consts.scala         |  88 +++
 diplomatic/src/rocket/FPU.scala            |   1 +
 diplomatic/src/rocket/TLB.scala            | 746 +++++++++++++++++++++
 diplomatic/src/rocket/TLBPermissions.scala | 114 ++++
 5 files changed, 950 insertions(+)
 create mode 100644 diplomatic/src/rocket/Consts.scala
 create mode 100644 diplomatic/src/rocket/TLB.scala
 create mode 100644 diplomatic/src/rocket/TLBPermissions.scala

diff --git a/diplomatic/src/rocket/CSR.scala b/diplomatic/src/rocket/CSR.scala
index 7d8a6e0b5..51da1e5b9 100644
--- a/diplomatic/src/rocket/CSR.scala
+++ b/diplomatic/src/rocket/CSR.scala
@@ -13,6 +13,7 @@ import freechips.rocketchip.util.property
 
 import scala.collection.mutable.LinkedHashMap
 import Instructions._
+import Instructions64._
 import CustomInstructions._
 
 class MStatus extends Bundle {
diff --git a/diplomatic/src/rocket/Consts.scala b/diplomatic/src/rocket/Consts.scala
new file mode 100644
index 000000000..8a0d36ba4
--- /dev/null
+++ b/diplomatic/src/rocket/Consts.scala
@@ -0,0 +1,88 @@
+// See LICENSE.Berkeley for license details.
+
+package org.chipsalliance.rocket.constants
+
+import chisel3._
+import chisel3.util._
+import freechips.rocketchip.util._
+
+trait ScalarOpConstants {
+  val SZ_BR = 3
+  def BR_X    = BitPat("b???")
+  def BR_EQ   = 0.U(3.W)
+  def BR_NE   = 1.U(3.W)
+  def BR_J    = 2.U(3.W)
+  def BR_N    = 3.U(3.W)
+  def BR_LT   = 4.U(3.W)
+  def BR_GE   = 5.U(3.W)
+  def BR_LTU  = 6.U(3.W)
+  def BR_GEU  = 7.U(3.W)
+
+  def A1_X    = BitPat("b??")
+  def A1_ZERO = 0.U(2.W)
+  def A1_RS1  = 1.U(2.W)
+  def A1_PC   = 2.U(2.W)
+
+  def IMM_X  = BitPat("b???")
+  def IMM_S  = 0.U(3.W)
+  def IMM_SB = 1.U(3.W)
+  def IMM_U  = 2.U(3.W)
+  def IMM_UJ = 3.U(3.W)
+  def IMM_I  = 4.U(3.W)
+  def IMM_Z  = 5.U(3.W)
+
+  def A2_X    = BitPat("b??")
+  def A2_ZERO = 0.U(2.W)
+  def A2_SIZE = 1.U(2.W)
+  def A2_RS2  = 2.U(2.W)
+  def A2_IMM  = 3.U(2.W)
+
+  def X = BitPat("b?")
+  def N = BitPat("b0")
+  def Y = BitPat("b1")
+
+  val SZ_DW = 1
+  def DW_X  = X
+  def DW_32 = false.B
+  def DW_64 = true.B
+  def DW_XPR = DW_64
+}
+
+trait MemoryOpConstants {
+  val NUM_XA_OPS = 9
+  val M_SZ      = 5
+  def M_X       = BitPat("b?????");
+  def M_XRD     = "b00000".U; // int load
+  def M_XWR     = "b00001".U; // int store
+  def M_PFR     = "b00010".U; // prefetch with intent to read
+  def M_PFW     = "b00011".U; // prefetch with intent to write
+  def M_XA_SWAP = "b00100".U
+  def M_FLUSH_ALL = "b00101".U  // flush all lines
+  def M_XLR     = "b00110".U
+  def M_XSC     = "b00111".U
+  def M_XA_ADD  = "b01000".U
+  def M_XA_XOR  = "b01001".U
+  def M_XA_OR   = "b01010".U
+  def M_XA_AND  = "b01011".U
+  def M_XA_MIN  = "b01100".U
+  def M_XA_MAX  = "b01101".U
+  def M_XA_MINU = "b01110".U
+  def M_XA_MAXU = "b01111".U
+  def M_FLUSH   = "b10000".U // write back dirty data and cede R/W permissions
+  def M_PWR     = "b10001".U // partial (masked) store
+  def M_PRODUCE = "b10010".U // write back dirty data and cede W permissions
+  def M_CLEAN   = "b10011".U // write back dirty data and retain R/W permissions
+  def M_SFENCE  = "b10100".U // SFENCE.VMA
+  def M_HFENCEV = "b10101".U // HFENCE.VVMA
+  def M_HFENCEG = "b10110".U // HFENCE.GVMA
+  def M_WOK     = "b10111".U // check write permissions but don't perform a write
+  def M_HLVX    = "b10000".U // HLVX instruction
+
+  def isAMOLogical(cmd: UInt) = cmd.isOneOf(M_XA_SWAP, M_XA_XOR, M_XA_OR, M_XA_AND)
+  def isAMOArithmetic(cmd: UInt) = cmd.isOneOf(M_XA_ADD, M_XA_MIN, M_XA_MAX, M_XA_MINU, M_XA_MAXU)
+  def isAMO(cmd: UInt) = isAMOLogical(cmd) || isAMOArithmetic(cmd)
+  def isPrefetch(cmd: UInt) = cmd === M_PFR || cmd === M_PFW
+  def isRead(cmd: UInt) = cmd.isOneOf(M_XRD, M_HLVX, M_XLR, M_XSC) || isAMO(cmd)
+  def isWrite(cmd: UInt) = cmd === M_XWR || cmd === M_PWR || cmd === M_XSC || isAMO(cmd)
+  def isWriteIntent(cmd: UInt) = isWrite(cmd) || cmd === M_PFW || cmd === M_XLR
+}
diff --git a/diplomatic/src/rocket/FPU.scala b/diplomatic/src/rocket/FPU.scala
index fa5597015..c83024ce5 100644
--- a/diplomatic/src/rocket/FPU.scala
+++ b/diplomatic/src/rocket/FPU.scala
@@ -11,6 +11,7 @@ import chisel3.internal.sourceinfo.SourceInfo
 import org.chipsalliance.cde.config.Parameters
 import org.chipsalliance.rocket._
 import org.chipsalliance.rocket.Instructions._
+import org.chipsalliance.rocket.Instructions64._
 import freechips.rocketchip.util._
 import freechips.rocketchip.util.property
 
diff --git a/diplomatic/src/rocket/TLB.scala b/diplomatic/src/rocket/TLB.scala
new file mode 100644
index 000000000..c73a14536
--- /dev/null
+++ b/diplomatic/src/rocket/TLB.scala
@@ -0,0 +1,746 @@
+// See LICENSE.SiFive for license details.
+// See LICENSE.Berkeley for license details.
+
+package org.chipsalliance.rocket
+
+import chisel3._
+import chisel3.util._
+
+import org.chipsalliance.cde.config.{Field, Parameters}
+import freechips.rocketchip.subsystem.CacheBlockBytes
+import freechips.rocketchip.diplomacy.RegionType
+import org.chipsalliance.rockettile.{CoreModule, CoreBundle}
+import freechips.rocketchip.tilelink._
+import freechips.rocketchip.util._
+import freechips.rocketchip.util.property
+import freechips.rocketchip.devices.debug.DebugModuleKey
+import chisel3.internal.sourceinfo.SourceInfo
+
+case object PgLevels extends Field[Int](2)
+case object ASIdBits extends Field[Int](0)
+case object VMIdBits extends Field[Int](0)
+
+/** =SFENCE=
+  * rs1 rs2
+  * {{{
+  *  0   0 -> flush All
+  *  0   1 -> flush by ASID
+  *  1   1 -> flush by ADDR
+  *  1   0 -> flush by ADDR and ASID
+  * }}}
+  * {{{
+  * If rs1=x0 and rs2=x0, the fence orders all reads and writes made to any level of the page tables, for all address spaces.
+  * If rs1=x0 and rs2!=x0, the fence orders all reads and writes made to any level of the page tables, but only for the address space identified by integer register rs2. Accesses to global mappings (see Section 4.3.1) are not ordered.
+  * If rs1!=x0 and rs2=x0, the fence orders only reads and writes made to the leaf page table entry corresponding to the virtual address in rs1, for all address spaces.
+  * If rs1!=x0 and rs2!=x0, the fence orders only reads and writes made to the leaf page table entry corresponding to the virtual address in rs1, for the address space identified by integer register rs2. Accesses to global mappings are not ordered.
+  * }}}
+  */
+class SFenceReq(implicit p: Parameters) extends CoreBundle()(p) {
+  val rs1 = Bool()
+  val rs2 = Bool()
+  val addr = UInt(vaddrBits.W)
+  val asid = UInt((asIdBits max 1).W) // TODO zero-width
+  val hv = Bool()
+  val hg = Bool()
+}
+
+class TLBReq(lgMaxSize: Int)(implicit p: Parameters) extends CoreBundle()(p) {
+  /** request address from CPU. */
+  val vaddr = UInt(vaddrBitsExtended.W)
+  /** don't lookup TLB, bypass vaddr as paddr */
+  val passthrough = Bool()
+  /** granularity */
+  val size = UInt(log2Ceil(lgMaxSize + 1).W)
+  /** memory command. */
+  val cmd  = Bits(M_SZ.W)
+  val prv = UInt(PRV.SZ.W)
+  /** virtualization mode */
+  val v = Bool()
+
+}
+
+class TLBExceptions extends Bundle {
+  val ld = Bool()
+  val st = Bool()
+  val inst = Bool()
+}
+
+class TLBResp(implicit p: Parameters) extends CoreBundle()(p) {
+  // lookup responses
+  val miss = Bool()
+  /** physical address */
+  val paddr = UInt(paddrBits.W)
+  val gpa = UInt(vaddrBitsExtended.W)
+  val gpa_is_pte = Bool()
+  /** page fault exception */
+  val pf = new TLBExceptions
+  /** guest page fault exception */
+  val gf = new TLBExceptions
+  /** access exception */
+  val ae = new TLBExceptions
+  /** misaligned access exception */
+  val ma = new TLBExceptions
+  /** if this address is cacheable */
+  val cacheable = Bool()
+  /** if caches must allocate this address */
+  val must_alloc = Bool()
+  /** if this address is prefetchable for caches*/
+  val prefetchable = Bool()
+}
+
+class TLBEntryData(implicit p: Parameters) extends CoreBundle()(p) {
+  val ppn = UInt(ppnBits.W)
+  /** pte.u user */
+  val u = Bool()
+  /** pte.g global */
+  val g = Bool()
+  /** access exception.
+    * D$ -> PTW -> TLB AE
+    * Alignment failed.
+    */
+  val ae_ptw = Bool()
+  val ae_final = Bool()
+  /** page fault */
+  val pf = Bool()
+  /** guest page fault */
+  val gf = Bool()
+  /** supervisor write */
+  val sw = Bool()
+  /** supervisor execute */
+  val sx = Bool()
+  /** supervisor read */
+  val sr = Bool()
+  /** hypervisor write */
+  val hw = Bool()
+  /** hypervisor excute */
+  val hx = Bool()
+  /** hypervisor read */
+  val hr = Bool()
+  /** prot_w */
+  val pw = Bool()
+  /** prot_x */
+  val px = Bool()
+  /** prot_r */
+  val pr = Bool()
+
+  /** PutPartial */
+  val ppp = Bool()
+  /** AMO logical */
+  val pal = Bool()
+  /** AMO arithmetic */
+  val paa = Bool()
+  /** get/put effects */
+  val eff = Bool()
+  /** cacheable */
+  val c = Bool()
+  /** fragmented_superpage support */
+  val fragmented_superpage = Bool()
+}
+
+/** basic cell for TLB data */
+class TLBEntry(val nSectors: Int, val superpage: Boolean, val superpageOnly: Boolean)(implicit p: Parameters) extends CoreBundle()(p) {
+  require(nSectors == 1 || !superpage)
+  require(!superpageOnly || superpage)
+
+  val level = UInt(log2Ceil(pgLevels).W)
+  /** use vpn as tag */
+  val tag_vpn = UInt(vpnBits.W)
+  /** tag in vitualization mode */
+  val tag_v = Bool()
+  /** entry data */
+  val data = Vec(nSectors, UInt(new TLBEntryData().getWidth.W))
+  /** valid bit */
+  val valid = Vec(nSectors, Bool())
+  /** returns all entry data in this entry */
+  def entry_data = data.map(_.asTypeOf(new TLBEntryData))
+  /** returns the index of sector */
+  private def sectorIdx(vpn: UInt) = vpn.extract(nSectors.log2-1, 0)
+  /** returns the entry data matched with this vpn*/
+  def getData(vpn: UInt) = OptimizationBarrier(data(sectorIdx(vpn)).asTypeOf(new TLBEntryData))
+  /** returns whether a sector hits */
+  def sectorHit(vpn: UInt, virtual: Bool) = valid.orR && sectorTagMatch(vpn, virtual)
+  /** returns whether tag matches vpn */
+  def sectorTagMatch(vpn: UInt, virtual: Bool) = (((tag_vpn ^ vpn) >> nSectors.log2) === 0.U) && (tag_v === virtual)
+  /** returns hit signal */
+  def hit(vpn: UInt, virtual: Bool): Bool = {
+    if (superpage && usingVM) {
+      var tagMatch = valid.head && (tag_v === virtual)
+      for (j <- 0 until pgLevels) {
+        val base = (pgLevels - 1 - j) * pgLevelBits
+        val n = pgLevelBits + (if (j == 0) hypervisorExtraAddrBits else 0)
+        val ignore = level < j.U || (superpageOnly && j == pgLevels - 1).B
+        tagMatch = tagMatch && (ignore || (tag_vpn ^ vpn)(base + n - 1, base) === 0.U)
+      }
+      tagMatch
+    } else {
+      val idx = sectorIdx(vpn)
+      valid(idx) && sectorTagMatch(vpn, virtual)
+    }
+  }
+  /** returns the ppn of the input TLBEntryData */
+  def ppn(vpn: UInt, data: TLBEntryData) = {
+    val supervisorVPNBits = pgLevels * pgLevelBits
+    if (superpage && usingVM) {
+      var res = data.ppn >> pgLevelBits*(pgLevels - 1)
+      for (j <- 1 until pgLevels) {
+        val ignore = level < j.U || (superpageOnly && j == pgLevels - 1).B
+        res = Cat(res, (Mux(ignore, vpn, 0.U) | data.ppn)(supervisorVPNBits - j*pgLevelBits - 1, supervisorVPNBits - (j + 1)*pgLevelBits))
+      }
+      res
+    } else {
+      data.ppn
+    }
+  }
+  /** does the refill
+    *
+    * find the target entry with vpn tag
+    * and replace the target entry with the input entry data
+    */
+  def insert(vpn: UInt, virtual: Bool, level: UInt, entry: TLBEntryData): Unit = {
+    this.tag_vpn := vpn
+    this.tag_v := virtual
+    this.level := level.extract(log2Ceil(pgLevels - superpageOnly.toInt)-1, 0)
+
+    val idx = sectorIdx(vpn)
+    valid(idx) := true.B
+    data(idx) := entry.asUInt
+  }
+
+  def invalidate(): Unit = { valid.foreach(_ := false.B) }
+  def invalidate(virtual: Bool): Unit = {
+    for ((v, e) <- valid zip entry_data)
+      when (tag_v === virtual) { v := false.B }
+  }
+  def invalidateVPN(vpn: UInt, virtual: Bool): Unit = {
+    if (superpage) {
+      when (hit(vpn, virtual)) { invalidate() }
+    } else {
+      when (sectorTagMatch(vpn, virtual)) {
+        for (((v, e), i) <- (valid zip entry_data).zipWithIndex)
+          when (tag_v === virtual && i.U === sectorIdx(vpn)) { v := false.B }
+      }
+    }
+    // For fragmented superpage mappings, we assume the worst (largest)
+    // case, and zap entries whose most-significant VPNs match
+    when (((tag_vpn ^ vpn) >> (pgLevelBits * (pgLevels - 1))) === 0.U) {
+      for ((v, e) <- valid zip entry_data)
+        when (tag_v === virtual && e.fragmented_superpage) { v := false.B }
+    }
+  }
+  def invalidateNonGlobal(virtual: Bool): Unit = {
+    for ((v, e) <- valid zip entry_data)
+      when (tag_v === virtual && !e.g) { v := false.B }
+  }
+}
+
+/** TLB config
+  *
+  * @param nSets the number of sets of PTE, follow [[ICacheParams.nSets]]
+  * @param nWays the total number of wayss of PTE, follow [[ICacheParams.nWays]]
+  * @param nSectors the number of ways in a single PTE TLBEntry
+  * @param nSuperpageEntries the number of SuperpageEntries
+  */
+case class TLBConfig(
+    nSets: Int,
+    nWays: Int,
+    nSectors: Int = 4,
+    nSuperpageEntries: Int = 4)
+
+/** =Overview=
+  * [[TLB]] is a TLB template which contains PMA logic and PMP checker.
+  *
+  * TLB caches PTE and accelerates the address translation process.
+  * When tlb miss happens, ask PTW(L2TLB) for Page Table Walk.
+  * Perform PMP and PMA check during the translation and throw exception if there were any.
+  *
+  *  ==Cache Structure==
+  *  - Sectored Entry (PTE)
+  *   - set-associative or direct-mapped
+  *    - nsets = [[TLBConfig.nSets]]
+  *    - nways = [[TLBConfig.nWays]] / [[TLBConfig.nSectors]]
+  *    - PTEEntry( sectors = [[TLBConfig.nSectors]] )
+  *   - LRU(if set-associative)
+  *
+  *  - Superpage Entry(superpage PTE)
+  *   - fully associative
+  *    - nsets = [[TLBConfig.nSuperpageEntries]]
+  *    - PTEEntry(sectors = 1)
+  *   - PseudoLRU
+  *
+  *  - Special Entry(PTE across PMP)
+  *   - nsets = 1
+  *   - PTEEntry(sectors = 1)
+  *
+  * ==Address structure==
+  * {{{
+  * |vaddr                                                 |
+  * |ppn/vpn                                   | pgIndex   |
+  * |                                          |           |
+  * |           |nSets             |nSector    |           |}}}
+  *
+  * ==State Machine==
+  * {{{
+  * s_ready: ready to accept request from CPU.
+  * s_request: when L1TLB(this) miss, send request to PTW(L2TLB), .
+  * s_wait: wait for PTW to refill L1TLB.
+  * s_wait_invalidate: L1TLB is waiting for respond from PTW, but L1TLB will invalidate respond from PTW.}}}
+  *
+  * ==PMP==
+  * pmp check
+  *  - special_entry: always check
+  *  - other entry: check on refill
+  *
+  * ==Note==
+  * PMA consume diplomacy parameter generate physical memory address checking logic
+  *
+  * Boom use Rocket ITLB, and its own DTLB.
+  *
+  * Accelerators:{{{
+  *   sha3: DTLB
+  *   gemmini: DTLB
+  *   hwacha: DTLB*2+ITLB}}}
+  * @param instruction true for ITLB, false for DTLB
+  * @param lgMaxSize @todo seems granularity
+  * @param cfg [[TLBConfig]]
+  * @param edge collect SoC metadata.
+  */
+class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(p) {
+  val io = IO(new Bundle {
+    /** request from Core */
+    val req = Flipped(Decoupled(new TLBReq(lgMaxSize)))
+    /** response to Core */
+    val resp = Output(new TLBResp())
+    /** SFence Input */
+    val sfence = Flipped(Valid(new SFenceReq))
+    /** IO to PTW */
+    val ptw = new TLBPTWIO
+    /** suppress a TLB refill, one cycle after a miss */
+    val kill = Input(Bool())
+  })
+
+  val pageGranularityPMPs = pmpGranularity >= (1 << pgIdxBits)
+  val vpn = io.req.bits.vaddr(vaddrBits-1, pgIdxBits)
+  /** index for sectored_Entry */
+  val memIdx = vpn.extract(cfg.nSectors.log2 + cfg.nSets.log2 - 1, cfg.nSectors.log2)
+  /** TLB Entry */
+  val sectored_entries = Reg(Vec(cfg.nSets, Vec(cfg.nWays / cfg.nSectors, new TLBEntry(cfg.nSectors, false, false))))
+  /** Superpage Entry */
+  val superpage_entries = Reg(Vec(cfg.nSuperpageEntries, new TLBEntry(1, true, true)))
+  /** Special Entry
+    *
+    * If PMP granularity is less than page size, thus need additional "special" entry manage PMP.
+    */
+  val special_entry = (!pageGranularityPMPs).option(Reg(new TLBEntry(1, true, false)))
+  def ordinary_entries = sectored_entries(memIdx) ++ superpage_entries
+  def all_entries = ordinary_entries ++ special_entry
+  def all_real_entries = sectored_entries.flatten ++ superpage_entries ++ special_entry
+
+  val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(4)
+  val state = RegInit(s_ready)
+  // use vpn as refill_tag
+  val r_refill_tag = Reg(UInt(vpnBits.W))
+  val r_superpage_repl_addr = Reg(UInt(log2Ceil(superpage_entries.size).W))
+  val r_sectored_repl_addr = Reg(UInt(log2Ceil(sectored_entries.head.size).W))
+  val r_sectored_hit = Reg(Valid(UInt(log2Ceil(sectored_entries.head.size).W)))
+  val r_superpage_hit = Reg(Valid(UInt(log2Ceil(superpage_entries.size).W)))
+  val r_vstage1_en = Reg(Bool())
+  val r_stage2_en = Reg(Bool())
+  val r_need_gpa = Reg(Bool())
+  val r_gpa_valid = Reg(Bool())
+  val r_gpa = Reg(UInt(vaddrBits.W))
+  val r_gpa_vpn = Reg(UInt(vpnBits.W))
+  val r_gpa_is_pte = Reg(Bool())
+
+  /** privilege mode */
+  val priv = io.req.bits.prv
+  val priv_v = usingHypervisor.B && io.req.bits.v
+  val priv_s = priv(0)
+  // user mode and supervisor mode
+  val priv_uses_vm = priv <= PRV.S.U
+  val satp = Mux(priv_v, io.ptw.vsatp, io.ptw.ptbr)
+  val stage1_en = usingVM.B && satp.mode(satp.mode.getWidth-1)
+  /** VS-stage translation enable */
+  val vstage1_en = usingHypervisor.B && priv_v && io.ptw.vsatp.mode(io.ptw.vsatp.mode.getWidth-1)
+  /** G-stage translation enable */
+  val stage2_en  = usingHypervisor.B && priv_v && io.ptw.hgatp.mode(io.ptw.hgatp.mode.getWidth-1)
+  /** Enable Virtual Memory when:
+    *  1. statically configured
+    *  1. satp highest bits enabled
+    *   i. RV32:
+    *     - 0 -> Bare
+    *     - 1 -> SV32
+    *   i. RV64:
+    *     - 0000 -> Bare
+    *     - 1000 -> SV39
+    *     - 1001 -> SV48
+    *     - 1010 -> SV57
+    *     - 1011 -> SV64
+    *  1. In virtualization mode, vsatp highest bits enabled
+    *  1. priv mode in U and S.
+    *  1. in H & M mode, disable VM.
+    *  1. no passthrough(micro-arch defined.)
+    *
+    * @see RV-priv spec 4.1.11 Supervisor Address Translation and Protection (satp) Register
+    * @see RV-priv spec 8.2.18 Virtual Supervisor Address Translation and Protection Register (vsatp)
+    */
+  val vm_enabled = (stage1_en || stage2_en) && priv_uses_vm && !io.req.bits.passthrough
+
+  // flush guest entries on vsatp.MODE Bare <-> SvXX transitions
+  val v_entries_use_stage1 = RegInit(false.B)
+  val vsatp_mode_mismatch  = priv_v && (vstage1_en =/= v_entries_use_stage1) && !io.req.bits.passthrough
+
+  // share a single physical memory attribute checker (unshare if critical path)
+  val refill_ppn = io.ptw.resp.bits.pte.ppn(ppnBits-1, 0)
+  /** refill signal */
+  val do_refill = usingVM.B && io.ptw.resp.valid
+  /** sfence invalidate refill */
+  val invalidate_refill = state.isOneOf(s_request /* don't care */, s_wait_invalidate) || io.sfence.valid
+  // PMP
+  val mpu_ppn = Mux(do_refill, refill_ppn,
+                Mux(vm_enabled && special_entry.nonEmpty.B, special_entry.map(e => e.ppn(vpn, e.getData(vpn))).getOrElse(0.U), io.req.bits.vaddr >> pgIdxBits))
+  val mpu_physaddr = Cat(mpu_ppn, io.req.bits.vaddr(pgIdxBits-1, 0))
+  val mpu_priv = Mux[UInt](usingVM.B && (do_refill || io.req.bits.passthrough /* PTW */), PRV.S.U, Cat(io.ptw.status.debug, priv))
+  val pmp = Module(new PMPChecker(lgMaxSize, paddrBits, pmpGranularity, nPMPs, pgIdxBits, pgLevels, pgLevelBits))
+  pmp.io.addr := mpu_physaddr
+  pmp.io.size := io.req.bits.size
+  pmp.io.pmp := (io.ptw.pmp: Seq[PMP])
+  pmp.io.prv := mpu_priv
+  // PMA
+  // check exist a slave can consume this address.
+  val legal_address = edge.manager.findSafe(mpu_physaddr).reduce(_||_)
+  // check utility to help check SoC property.
+  def fastCheck(member: TLManagerParameters => Boolean) =
+    legal_address && edge.manager.fastProperty(mpu_physaddr, member, (b:Boolean) => b.B)
+  // todo: using DataScratchpad doesn't support cacheable.
+  val cacheable = fastCheck(_.supportsAcquireB) && (instruction || !usingDataScratchpad).B
+  val homogeneous = TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), BigInt(1) << pgIdxBits)(mpu_physaddr).homogeneous
+  // In M mode, if access DM address(debug module program buffer)
+  val deny_access_to_debug = mpu_priv <= PRV.M.U && p(DebugModuleKey).map(dmp => dmp.address.contains(mpu_physaddr)).getOrElse(false.B)
+  val prot_r = fastCheck(_.supportsGet) && !deny_access_to_debug && pmp.io.r
+  val prot_w = fastCheck(_.supportsPutFull) && !deny_access_to_debug && pmp.io.w
+  val prot_pp = fastCheck(_.supportsPutPartial)
+  val prot_al = fastCheck(_.supportsLogical)
+  val prot_aa = fastCheck(_.supportsArithmetic)
+  val prot_x = fastCheck(_.executable) && !deny_access_to_debug && pmp.io.x
+  val prot_eff = fastCheck(Seq(RegionType.PUT_EFFECTS, RegionType.GET_EFFECTS) contains _.regionType)
+
+  // hit check
+  val sector_hits = sectored_entries(memIdx).map(_.sectorHit(vpn, priv_v))
+  val superpage_hits = superpage_entries.map(_.hit(vpn, priv_v))
+  val hitsVec = all_entries.map(vm_enabled && _.hit(vpn, priv_v))
+  val real_hits = hitsVec.asUInt
+  val hits = Cat(!vm_enabled, real_hits)
+
+  // use ptw response to refill
+  // permission bit arrays
+  when (do_refill) {
+    val pte = io.ptw.resp.bits.pte
+    val refill_v = r_vstage1_en || r_stage2_en
+    val newEntry = Wire(new TLBEntryData)
+    newEntry.ppn := pte.ppn
+    newEntry.c := cacheable
+    newEntry.u := pte.u
+    newEntry.g := pte.g && pte.v
+    newEntry.ae_ptw := io.ptw.resp.bits.ae_ptw
+    newEntry.ae_final := io.ptw.resp.bits.ae_final
+    newEntry.pf := io.ptw.resp.bits.pf
+    newEntry.gf := io.ptw.resp.bits.gf
+    newEntry.hr := io.ptw.resp.bits.hr
+    newEntry.hw := io.ptw.resp.bits.hw
+    newEntry.hx := io.ptw.resp.bits.hx
+    newEntry.sr := pte.sr()
+    newEntry.sw := pte.sw()
+    newEntry.sx := pte.sx()
+    newEntry.pr := prot_r
+    newEntry.pw := prot_w
+    newEntry.px := prot_x
+    newEntry.ppp := prot_pp
+    newEntry.pal := prot_al
+    newEntry.paa := prot_aa
+    newEntry.eff := prot_eff
+    newEntry.fragmented_superpage := io.ptw.resp.bits.fragmented_superpage
+    // refill special_entry
+    when (special_entry.nonEmpty.B && !io.ptw.resp.bits.homogeneous) {
+      special_entry.foreach(_.insert(r_refill_tag, refill_v, io.ptw.resp.bits.level, newEntry))
+    }.elsewhen (io.ptw.resp.bits.level < (pgLevels-1).U) {
+      val waddr = Mux(r_superpage_hit.valid && usingHypervisor.B, r_superpage_hit.bits, r_superpage_repl_addr)
+      for ((e, i) <- superpage_entries.zipWithIndex) when (r_superpage_repl_addr === i.U) {
+        e.insert(r_refill_tag, refill_v, io.ptw.resp.bits.level, newEntry)
+        when (invalidate_refill) { e.invalidate() }
+      }
+    // refill sectored_hit
+    }.otherwise {
+      val r_memIdx = r_refill_tag.extract(cfg.nSectors.log2 + cfg.nSets.log2 - 1, cfg.nSectors.log2)
+      val waddr = Mux(r_sectored_hit.valid, r_sectored_hit.bits, r_sectored_repl_addr)
+      for ((e, i) <- sectored_entries(r_memIdx).zipWithIndex) when (waddr === i.U) {
+        when (!r_sectored_hit.valid) { e.invalidate() }
+        e.insert(r_refill_tag, refill_v, 0.U, newEntry)
+        when (invalidate_refill) { e.invalidate() }
+      }
+    }
+
+    r_gpa_valid := io.ptw.resp.bits.gpa.valid
+    r_gpa := io.ptw.resp.bits.gpa.bits
+    r_gpa_is_pte := io.ptw.resp.bits.gpa_is_pte
+  }
+
+  // get all entries data.
+  val entries = all_entries.map(_.getData(vpn))
+  val normal_entries = entries.take(ordinary_entries.size)
+  // parallel query PPN from [[all_entries]], if VM not enabled return VPN instead
+  val ppn = Mux1H(hitsVec :+ !vm_enabled, (all_entries zip entries).map{ case (entry, data) => entry.ppn(vpn, data) } :+ vpn(ppnBits-1, 0))
+
+  val nPhysicalEntries = 1 + special_entry.size
+  // generally PTW misaligned load exception.
+  val ptw_ae_array = Cat(false.B, entries.map(_.ae_ptw).asUInt)
+  val final_ae_array = Cat(false.B, entries.map(_.ae_final).asUInt)
+  val ptw_pf_array = Cat(false.B, entries.map(_.pf).asUInt)
+  val ptw_gf_array = Cat(false.B, entries.map(_.gf).asUInt)
+  val sum = Mux(priv_v, io.ptw.gstatus.sum, io.ptw.status.sum)
+  // if in hypervisor/machine mode, cannot read/write user entries.
+  // if in superviosr/user mode, "If the SUM bit in the sstatus register is set, supervisor mode software may also access pages with U=1.(from spec)"
+  val priv_rw_ok = Mux(!priv_s || sum, entries.map(_.u).asUInt, 0.U) | Mux(priv_s, ~entries.map(_.u).asUInt, 0.U)
+  // if in hypervisor/machine mode, other than user pages, all pages are executable.
+  // if in superviosr/user mode, only user page can execute.
+  val priv_x_ok = Mux(priv_s, ~entries.map(_.u).asUInt, entries.map(_.u).asUInt)
+  val stage1_bypass = Fill(entries.size, usingHypervisor.B && !stage1_en)
+  val mxr = io.ptw.status.mxr | Mux(priv_v, io.ptw.gstatus.mxr, false.B)
+  // "The vsstatus field MXR, which makes execute-only pages readable, only overrides VS-stage page protection.(from spec)"
+  val r_array = Cat(true.B, (priv_rw_ok & (entries.map(_.sr).asUInt | Mux(mxr, entries.map(_.sx).asUInt, 0.U))) | stage1_bypass)
+  val w_array = Cat(true.B, (priv_rw_ok & entries.map(_.sw).asUInt) | stage1_bypass)
+  val x_array = Cat(true.B, (priv_x_ok & entries.map(_.sx).asUInt) | stage1_bypass)
+  val stage2_bypass = Fill(entries.size, !stage2_en)
+  val hr_array = Cat(true.B, entries.map(_.hr).asUInt | Mux(io.ptw.status.mxr, entries.map(_.hx).asUInt, 0.U) | stage2_bypass)
+  val hw_array = Cat(true.B, entries.map(_.hw).asUInt | stage2_bypass)
+  val hx_array = Cat(true.B, entries.map(_.hx).asUInt | stage2_bypass)
+  // These array is for each TLB entries.
+  // user mode can read: PMA OK, TLB OK, AE OK
+  val pr_array = Cat(Fill(nPhysicalEntries, prot_r), normal_entries.map(_.pr).asUInt) & ~(ptw_ae_array | final_ae_array)
+  // user mode can write: PMA OK, TLB OK, AE OK
+  val pw_array = Cat(Fill(nPhysicalEntries, prot_w), normal_entries.map(_.pw).asUInt) & ~(ptw_ae_array | final_ae_array)
+  // user mode can write: PMA OK, TLB OK, AE OK
+  val px_array = Cat(Fill(nPhysicalEntries, prot_x), normal_entries.map(_.px).asUInt) & ~(ptw_ae_array | final_ae_array)
+  // put effect
+  val eff_array = Cat(Fill(nPhysicalEntries, prot_eff), normal_entries.map(_.eff).asUInt)
+  // cacheable
+  val c_array = Cat(Fill(nPhysicalEntries, cacheable), normal_entries.map(_.c).asUInt)
+  // put partial
+  val ppp_array = Cat(Fill(nPhysicalEntries, prot_pp), normal_entries.map(_.ppp).asUInt)
+  // atomic arithmetic
+  val paa_array = Cat(Fill(nPhysicalEntries, prot_aa), normal_entries.map(_.paa).asUInt)
+  // atomic logic
+  val pal_array = Cat(Fill(nPhysicalEntries, prot_al), normal_entries.map(_.pal).asUInt)
+  val ppp_array_if_cached = ppp_array | c_array
+  val paa_array_if_cached = paa_array | (if(usingAtomicsInCache) c_array else 0.U)
+  val pal_array_if_cached = pal_array | (if(usingAtomicsInCache) c_array else 0.U)
+  val prefetchable_array = Cat((cacheable && homogeneous) << (nPhysicalEntries-1), normal_entries.map(_.c).asUInt)
+
+  // vaddr misaligned: vaddr[1:0]=b00
+  val misaligned = (io.req.bits.vaddr & (UIntToOH(io.req.bits.size) - 1.U)).orR
+  def badVA(guestPA: Boolean): Bool = {
+    val additionalPgLevels = (if (guestPA) io.ptw.hgatp else satp).additionalPgLevels
+    val extraBits = if (guestPA) hypervisorExtraAddrBits else 0
+    val signed = !guestPA
+    val nPgLevelChoices = pgLevels - minPgLevels + 1
+    val minVAddrBits = pgIdxBits + minPgLevels * pgLevelBits + extraBits
+    (for (i <- 0 until nPgLevelChoices) yield {
+      val mask = ((BigInt(1) << vaddrBitsExtended) - (BigInt(1) << (minVAddrBits + i * pgLevelBits - signed.toInt))).U
+      val maskedVAddr = io.req.bits.vaddr & mask
+      additionalPgLevels === i.U && !(maskedVAddr === 0.U || signed.B && maskedVAddr === mask)
+    }).orR
+  }
+  val bad_gpa =
+    if (!usingHypervisor) false.B
+    else vm_enabled && !stage1_en && badVA(true)
+  val bad_va =
+    if (!usingVM || (minPgLevels == pgLevels && vaddrBits == vaddrBitsExtended)) false.B
+    else vm_enabled && stage1_en && badVA(false)
+
+  val cmd_lrsc = usingAtomics.B && io.req.bits.cmd.isOneOf(M_XLR, M_XSC)
+  val cmd_amo_logical = usingAtomics.B && isAMOLogical(io.req.bits.cmd)
+  val cmd_amo_arithmetic = usingAtomics.B && isAMOArithmetic(io.req.bits.cmd)
+  val cmd_put_partial = io.req.bits.cmd === M_PWR
+  val cmd_read = isRead(io.req.bits.cmd)
+  val cmd_readx = usingHypervisor.B && io.req.bits.cmd === M_HLVX
+  val cmd_write = isWrite(io.req.bits.cmd)
+  val cmd_write_perms = cmd_write ||
+    io.req.bits.cmd.isOneOf(M_FLUSH_ALL, M_WOK) // not a write, but needs write permissions
+
+  val lrscAllowed = Mux((usingDataScratchpad || usingAtomicsOnlyForIO).B, 0.U, c_array)
+  val ae_array =
+    Mux(misaligned, eff_array, 0.U) |
+    Mux(cmd_lrsc, ~lrscAllowed, 0.U)
+
+  // access exception needs SoC information from PMA
+  val ae_ld_array = Mux(cmd_read, ae_array | ~pr_array, 0.U)
+  val ae_st_array =
+    Mux(cmd_write_perms, ae_array | ~pw_array, 0.U) |
+    Mux(cmd_put_partial, ~ppp_array_if_cached, 0.U) |
+    Mux(cmd_amo_logical, ~pal_array_if_cached, 0.U) |
+    Mux(cmd_amo_arithmetic, ~paa_array_if_cached, 0.U)
+  val must_alloc_array =
+    Mux(cmd_put_partial, ~ppp_array, 0.U) |
+    Mux(cmd_amo_logical, ~paa_array, 0.U) |
+    Mux(cmd_amo_arithmetic, ~pal_array, 0.U) |
+    Mux(cmd_lrsc, ~0.U(pal_array.getWidth.W), 0.U)
+  val pf_ld_array = Mux(cmd_read, ((~Mux(cmd_readx, x_array, r_array) & ~ptw_ae_array) | ptw_pf_array) & ~ptw_gf_array, 0.U)
+  val pf_st_array = Mux(cmd_write_perms, ((~w_array & ~ptw_ae_array) | ptw_pf_array) & ~ptw_gf_array, 0.U)
+  val pf_inst_array = ((~x_array & ~ptw_ae_array) | ptw_pf_array) & ~ptw_gf_array
+  val gf_ld_array = Mux(priv_v && cmd_read, ~Mux(cmd_readx, hx_array, hr_array) & ~ptw_ae_array, 0.U)
+  val gf_st_array = Mux(priv_v && cmd_write_perms, ~hw_array & ~ptw_ae_array, 0.U)
+  val gf_inst_array = Mux(priv_v, ~hx_array & ~ptw_ae_array, 0.U)
+
+  val gpa_hits = {
+    val need_gpa_mask = if (instruction) gf_inst_array else gf_ld_array | gf_st_array
+    val hit_mask = Fill(ordinary_entries.size, r_gpa_valid && r_gpa_vpn === vpn) | Fill(all_entries.size, !vstage1_en)
+    hit_mask | ~need_gpa_mask(all_entries.size-1, 0)
+  }
+
+  val tlb_hit_if_not_gpa_miss = real_hits.orR
+  val tlb_hit = (real_hits & gpa_hits).orR
+  // leads to s_request
+  val tlb_miss = vm_enabled && !vsatp_mode_mismatch && !bad_va && !tlb_hit
+
+  val sectored_plru = new SetAssocLRU(cfg.nSets, sectored_entries.head.size, "plru")
+  val superpage_plru = new PseudoLRU(superpage_entries.size)
+  when (io.req.valid && vm_enabled) {
+    // replace
+    when (sector_hits.orR) { sectored_plru.access(memIdx, OHToUInt(sector_hits)) }
+    when (superpage_hits.orR) { superpage_plru.access(OHToUInt(superpage_hits)) }
+  }
+
+  // Superpages create the possibility that two entries in the TLB may match.
+  // This corresponds to a software bug, but we can't return complete garbage;
+  // we must return either the old translation or the new translation.  This
+  // isn't compatible with the Mux1H approach.  So, flush the TLB and report
+  // a miss on duplicate entries.
+  val multipleHits = PopCountAtLeast(real_hits, 2)
+
+  // only pull up req.ready when this is s_ready state.
+  io.req.ready := state === s_ready
+  // page fault
+  io.resp.pf.ld := (bad_va && cmd_read) || (pf_ld_array & hits).orR
+  io.resp.pf.st := (bad_va && cmd_write_perms) || (pf_st_array & hits).orR
+  io.resp.pf.inst := bad_va || (pf_inst_array & hits).orR
+  // guest page fault
+  io.resp.gf.ld := (bad_gpa && cmd_read) || (gf_ld_array & hits).orR
+  io.resp.gf.st := (bad_gpa && cmd_write_perms) || (gf_st_array & hits).orR
+  io.resp.gf.inst := bad_gpa || (gf_inst_array & hits).orR
+  // access exception
+  io.resp.ae.ld := (ae_ld_array & hits).orR
+  io.resp.ae.st := (ae_st_array & hits).orR
+  io.resp.ae.inst := (~px_array & hits).orR
+  // misaligned
+  io.resp.ma.ld := misaligned && cmd_read
+  io.resp.ma.st := misaligned && cmd_write
+  io.resp.ma.inst := false.B // this is up to the pipeline to figure out
+  io.resp.cacheable := (c_array & hits).orR
+  io.resp.must_alloc := (must_alloc_array & hits).orR
+  io.resp.prefetchable := (prefetchable_array & hits).orR && edge.manager.managers.forall(m => !m.supportsAcquireB || m.supportsHint).B
+  io.resp.miss := do_refill || vsatp_mode_mismatch || tlb_miss || multipleHits
+  io.resp.paddr := Cat(ppn, io.req.bits.vaddr(pgIdxBits-1, 0))
+  io.resp.gpa_is_pte := vstage1_en && r_gpa_is_pte
+  io.resp.gpa := {
+    val page = Mux(!vstage1_en, Cat(bad_gpa, vpn), r_gpa >> pgIdxBits)
+    val offset = Mux(io.resp.gpa_is_pte, r_gpa(pgIdxBits-1, 0), io.req.bits.vaddr(pgIdxBits-1, 0))
+    Cat(page, offset)
+  }
+
+  io.ptw.req.valid := state === s_request
+  io.ptw.req.bits.valid := !io.kill
+  io.ptw.req.bits.bits.addr := r_refill_tag
+  io.ptw.req.bits.bits.vstage1 := r_vstage1_en
+  io.ptw.req.bits.bits.stage2 := r_stage2_en
+  io.ptw.req.bits.bits.need_gpa := r_need_gpa
+
+  if (usingVM) {
+    when(io.ptw.req.fire && io.ptw.req.bits.valid) {
+      r_gpa_valid := false.B
+      r_gpa_vpn   := r_refill_tag
+    }
+
+    val sfence = io.sfence.valid
+    // this is [[s_ready]]
+    // handle miss/hit at the first cycle.
+    // if miss, request PTW(L2TLB).
+    when (io.req.fire && tlb_miss) {
+      state := s_request
+      r_refill_tag := vpn
+      r_need_gpa := tlb_hit_if_not_gpa_miss
+      r_vstage1_en := vstage1_en
+      r_stage2_en := stage2_en
+      r_superpage_repl_addr := replacementEntry(superpage_entries, superpage_plru.way)
+      r_sectored_repl_addr := replacementEntry(sectored_entries(memIdx), sectored_plru.way(memIdx))
+      r_sectored_hit.valid := sector_hits.orR
+      r_sectored_hit.bits := OHToUInt(sector_hits)
+      r_superpage_hit.valid := superpage_hits.orR
+      r_superpage_hit.bits := OHToUInt(superpage_hits)
+    }
+    // Handle SFENCE.VMA when send request to PTW.
+    // SFENCE.VMA    io.ptw.req.ready     kill
+    //       ?                 ?            1
+    //       0                 0            0
+    //       0                 1            0 -> s_wait
+    //       1                 0            0 -> s_wait_invalidate
+    //       1                 0            0 -> s_ready
+    when (state === s_request) {
+      // SFENCE.VMA will kill TLB entries based on rs1 and rs2. It will take 1 cycle.
+      when (sfence) { state := s_ready }
+      // here should be io.ptw.req.fire, but assert(io.ptw.req.ready === true.B)
+      // fire -> s_wait
+      when (io.ptw.req.ready) { state := Mux(sfence, s_wait_invalidate, s_wait) }
+      // If CPU kills request(frontend.s2_redirect)
+      when (io.kill) { state := s_ready }
+    }
+    // sfence in refill will results in invalidate
+    when (state === s_wait && sfence) {
+      state := s_wait_invalidate
+    }
+    // after CPU acquire response, go back to s_ready.
+    when (io.ptw.resp.valid) {
+      state := s_ready
+    }
+
+    // SFENCE processing logic.
+    when (sfence) {
+      assert(!io.sfence.bits.rs1 || (io.sfence.bits.addr >> pgIdxBits) === vpn)
+      for (e <- all_real_entries) {
+        val hv = usingHypervisor.B && io.sfence.bits.hv
+        val hg = usingHypervisor.B && io.sfence.bits.hg
+        when (!hg && io.sfence.bits.rs1) { e.invalidateVPN(vpn, hv) }
+        .elsewhen (!hg && io.sfence.bits.rs2) { e.invalidateNonGlobal(hv) }
+        .otherwise { e.invalidate(hv || hg) }
+      }
+    }
+    when(io.req.fire && vsatp_mode_mismatch) {
+      all_real_entries.foreach(_.invalidate(true.B))
+      v_entries_use_stage1 := vstage1_en
+    }
+    when (multipleHits || reset.asBool) {
+      all_real_entries.foreach(_.invalidate())
+    }
+
+    ccover(io.ptw.req.fire, "MISS", "TLB miss")
+    ccover(io.ptw.req.valid && !io.ptw.req.ready, "PTW_STALL", "TLB miss, but PTW busy")
+    ccover(state === s_wait_invalidate, "SFENCE_DURING_REFILL", "flush TLB during TLB refill")
+    ccover(sfence && !io.sfence.bits.rs1 && !io.sfence.bits.rs2, "SFENCE_ALL", "flush TLB")
+    ccover(sfence && !io.sfence.bits.rs1 && io.sfence.bits.rs2, "SFENCE_ASID", "flush TLB ASID")
+    ccover(sfence && io.sfence.bits.rs1 && !io.sfence.bits.rs2, "SFENCE_LINE", "flush TLB line")
+    ccover(sfence && io.sfence.bits.rs1 && io.sfence.bits.rs2, "SFENCE_LINE_ASID", "flush TLB line/ASID")
+    ccover(multipleHits, "MULTIPLE_HITS", "Two matching translations in TLB")
+  }
+
+  def ccover(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) =
+    property.cover(cond, s"${if (instruction) "I" else "D"}TLB_$label", "MemorySystem;;" + desc)
+  /** Decides which entry to be replaced
+    *
+    * If there is a invalid entry, replace it with priorityencoder;
+    * if not, replace the alt entry
+    *
+    * @return mask for TLBEntry replacement
+    */
+  def replacementEntry(set: Seq[TLBEntry], alt: UInt) = {
+    val valids = set.map(_.valid.orR).asUInt
+    Mux(valids.andR, alt, PriorityEncoder(~valids))
+  }
+}
diff --git a/diplomatic/src/rocket/TLBPermissions.scala b/diplomatic/src/rocket/TLBPermissions.scala
new file mode 100644
index 000000000..26c7c055e
--- /dev/null
+++ b/diplomatic/src/rocket/TLBPermissions.scala
@@ -0,0 +1,114 @@
+// See LICENSE.SiFive for license details.
+
+package org.chipsalliance.rocket
+
+import chisel3._
+import chisel3.util.isPow2
+
+import freechips.rocketchip.diplomacy._
+import freechips.rocketchip.tilelink._
+
+case class TLBPermissions(
+  homogeneous: Bool, // if false, the below are undefined
+  r: Bool, // readable
+  w: Bool, // writeable
+  x: Bool, // executable
+  c: Bool, // cacheable
+  a: Bool, // arithmetic ops
+  l: Bool) // logical ops
+
+object TLBPageLookup
+{
+  private case class TLBFixedPermissions(
+    e: Boolean, // get-/put-effects
+    r: Boolean, // readable
+    w: Boolean, // writeable
+    x: Boolean, // executable
+    c: Boolean, // cacheable
+    a: Boolean, // arithmetic ops
+    l: Boolean) { // logical ops
+    val useful = r || w || x || c || a || l
+  }
+
+  private def groupRegions(managers: Seq[TLManagerParameters]): Map[TLBFixedPermissions, Seq[AddressSet]] = {
+    val permissions = managers.map { m =>
+      (m.address, TLBFixedPermissions(
+        e = Seq(RegionType.PUT_EFFECTS, RegionType.GET_EFFECTS) contains m.regionType,
+        r = m.supportsGet     || m.supportsAcquireB, // if cached, never uses Get
+        w = m.supportsPutFull || m.supportsAcquireT, // if cached, never uses Put
+        x = m.executable,
+        c = m.supportsAcquireB,
+        a = m.supportsArithmetic,
+        l = m.supportsLogical))
+    }
+
+    permissions
+      .filter(_._2.useful) // get rid of no-permission devices
+      .groupBy(_._2) // group by permission type
+      .mapValues(seq =>
+        AddressSet.unify(seq.flatMap(_._1))) // coalesce same-permission regions
+      .toMap
+  }
+
+  // Unmapped memory is considered to be inhomogeneous
+  def apply(managers: Seq[TLManagerParameters], xLen: Int, cacheBlockBytes: Int, pageSize: BigInt): UInt => TLBPermissions = {
+    require (isPow2(xLen) && xLen >= 8)
+    require (isPow2(cacheBlockBytes) && cacheBlockBytes >= xLen/8)
+    require (isPow2(pageSize) && pageSize >= cacheBlockBytes)
+
+    val xferSizes = TransferSizes(cacheBlockBytes, cacheBlockBytes)
+    val allSizes = TransferSizes(1, cacheBlockBytes)
+    val amoSizes = TransferSizes(4, xLen/8)
+
+    val permissions = managers.foreach { m =>
+      require (!m.supportsGet        || m.supportsGet       .contains(allSizes),  s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsGet} Get, but must support ${allSizes}")
+      require (!m.supportsPutFull    || m.supportsPutFull   .contains(allSizes),  s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsPutFull} PutFull, but must support ${allSizes}")
+      require (!m.supportsPutPartial || m.supportsPutPartial.contains(allSizes),  s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsPutPartial} PutPartial, but must support ${allSizes}")
+      require (!m.supportsAcquireB   || m.supportsAcquireB  .contains(xferSizes), s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsAcquireB} AcquireB, but must support ${xferSizes}")
+      require (!m.supportsAcquireT   || m.supportsAcquireT  .contains(xferSizes), s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsAcquireT} AcquireT, but must support ${xferSizes}")
+      require (!m.supportsLogical    || m.supportsLogical   .contains(amoSizes),  s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsLogical} Logical, but must support ${amoSizes}")
+      require (!m.supportsArithmetic || m.supportsArithmetic.contains(amoSizes),  s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsArithmetic} Arithmetic, but must support ${amoSizes}")
+      require (!(m.supportsAcquireB && m.supportsPutFull && !m.supportsAcquireT), s"Memory region '${m.name}' supports AcquireB (cached read) and PutFull (un-cached write) but not AcquireT (cached write)")
+    }
+
+    val grouped = groupRegions(managers)
+      .mapValues(_.filter(_.alignment >= pageSize)) // discard any region that's not big enough
+
+    def lowCostProperty(prop: TLBFixedPermissions => Boolean): UInt => Bool = {
+      val (yesm, nom) = grouped.partition { case (k, eq) => prop(k) }
+      val (yes, no) = (yesm.values.flatten.toList, nom.values.flatten.toList)
+      // Find the minimal bits needed to distinguish between yes and no
+      val decisionMask = AddressDecoder(Seq(yes, no))
+      def simplify(x: Seq[AddressSet]) = AddressSet.unify(x.map(_.widen(~decisionMask)).distinct)
+      val (yesf, nof) = (simplify(yes), simplify(no))
+      if (yesf.size < no.size) {
+        (x: UInt) => yesf.map(_.contains(x)).foldLeft(false.B)(_ || _)
+      } else {
+        (x: UInt) => !nof.map(_.contains(x)).foldLeft(false.B)(_ || _)
+      }
+    }
+
+    // Derive simplified property circuits (don't care when !homo)
+    val rfn = lowCostProperty(_.r)
+    val wfn = lowCostProperty(_.w)
+    val xfn = lowCostProperty(_.x)
+    val cfn = lowCostProperty(_.c)
+    val afn = lowCostProperty(_.a)
+    val lfn = lowCostProperty(_.l)
+
+    val homo = AddressSet.unify(grouped.values.flatten.toList)
+    (x: UInt) => TLBPermissions(
+      homogeneous = homo.map(_.contains(x)).foldLeft(false.B)(_ || _),
+      r = rfn(x),
+      w = wfn(x),
+      x = xfn(x),
+      c = cfn(x),
+      a = afn(x),
+      l = lfn(x))
+  }
+
+  // Are all pageSize intervals of mapped regions homogeneous?
+  def homogeneous(managers: Seq[TLManagerParameters], pageSize: BigInt): Boolean = {
+    groupRegions(managers).values.forall(_.forall(_.alignment >= pageSize))
+  }
+}

From 784e8bbdd3b540a99db26da48344e38717662ed5 Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Tue, 6 Jun 2023 14:58:11 +0800
Subject: [PATCH 29/32] TLB: resolve diplomatic dependency conflicts

---
 diplomatic/src/rocket/CSR.scala        | 1 -
 diplomatic/src/rocket/Core.scala       | 1 +
 diplomatic/src/rocket/FPU.scala        | 1 -
 diplomatic/src/rocket/RocketCore.scala | 3 ++-
 4 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/diplomatic/src/rocket/CSR.scala b/diplomatic/src/rocket/CSR.scala
index 51da1e5b9..7d8a6e0b5 100644
--- a/diplomatic/src/rocket/CSR.scala
+++ b/diplomatic/src/rocket/CSR.scala
@@ -13,7 +13,6 @@ import freechips.rocketchip.util.property
 
 import scala.collection.mutable.LinkedHashMap
 import Instructions._
-import Instructions64._
 import CustomInstructions._
 
 class MStatus extends Bundle {
diff --git a/diplomatic/src/rocket/Core.scala b/diplomatic/src/rocket/Core.scala
index 10f29666e..09a308c9e 100644
--- a/diplomatic/src/rocket/Core.scala
+++ b/diplomatic/src/rocket/Core.scala
@@ -6,6 +6,7 @@ import Chisel._
 import org.chipsalliance.cde.config._
 import freechips.rocketchip.util._
 import org.chipsalliance.rocket._
+import org.chipsalliance.rockettile.CustomCSRs
 
 case object XLen extends Field[Int]
 case object MaxHartIdBits extends Field[Int]
diff --git a/diplomatic/src/rocket/FPU.scala b/diplomatic/src/rocket/FPU.scala
index c83024ce5..fa5597015 100644
--- a/diplomatic/src/rocket/FPU.scala
+++ b/diplomatic/src/rocket/FPU.scala
@@ -11,7 +11,6 @@ import chisel3.internal.sourceinfo.SourceInfo
 import org.chipsalliance.cde.config.Parameters
 import org.chipsalliance.rocket._
 import org.chipsalliance.rocket.Instructions._
-import org.chipsalliance.rocket.Instructions64._
 import freechips.rocketchip.util._
 import freechips.rocketchip.util.property
 
diff --git a/diplomatic/src/rocket/RocketCore.scala b/diplomatic/src/rocket/RocketCore.scala
index 523819d94..bc0060336 100644
--- a/diplomatic/src/rocket/RocketCore.scala
+++ b/diplomatic/src/rocket/RocketCore.scala
@@ -11,6 +11,7 @@ import org.chipsalliance.rockettile._
 import freechips.rocketchip.util._
 import freechips.rocketchip.util.property
 import scala.collection.mutable.ArrayBuffer
+import org.chipsalliance.rockettile.CustomCSRs
 
 case class RocketCoreParams(
   bootFreqHz: BigInt = 0,
@@ -65,7 +66,7 @@ case class RocketCoreParams(
   val traceHasWdata: Boolean = false // ooo wb, so no wdata in trace
   override val customIsaExt = Some("Xrocket") // CEASE instruction
   override def minFLen: Int = fpu.map(_.minFLen).getOrElse(32)
-  override def customCSRs(implicit p: Parameters) = new RocketCustomCSRs
+  override def customCSRs(implicit p: Parameters) = new CustomCSRs
 }
 
 trait HasRocketCoreParameters extends HasCoreParameters {

From 9549113ec74c2e828a778709f28ac17cfa7eba2e Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Tue, 6 Jun 2023 16:09:11 +0800
Subject: [PATCH 30/32] TLB: remove some diplomatic

---
 diplomatic/src/rocket/CSR.scala            | 1515 --------------------
 diplomatic/src/rocket/CustomCSRs.scala     |   50 -
 diplomatic/src/rocket/HellaCache.scala     |  334 -----
 diplomatic/src/rocket/PTW.scala            |  798 -----------
 diplomatic/src/rocket/TLB.scala            |  746 ----------
 diplomatic/src/rocket/TLBPermissions.scala |  114 --
 rocket/src/HellaCache.scala                |    8 +-
 7 files changed, 5 insertions(+), 3560 deletions(-)
 delete mode 100644 diplomatic/src/rocket/CSR.scala
 delete mode 100644 diplomatic/src/rocket/CustomCSRs.scala
 delete mode 100644 diplomatic/src/rocket/HellaCache.scala
 delete mode 100644 diplomatic/src/rocket/PTW.scala
 delete mode 100644 diplomatic/src/rocket/TLB.scala
 delete mode 100644 diplomatic/src/rocket/TLBPermissions.scala

diff --git a/diplomatic/src/rocket/CSR.scala b/diplomatic/src/rocket/CSR.scala
deleted file mode 100644
index 51da1e5b9..000000000
--- a/diplomatic/src/rocket/CSR.scala
+++ /dev/null
@@ -1,1515 +0,0 @@
-// See LICENSE.SiFive for license details.
-// See LICENSE.Berkeley for license details.
-
-package org.chipsalliance.rocket
-
-import chisel3._
-import chisel3.util.{BitPat, Cat, Fill, Mux1H, PopCount, PriorityMux, RegEnable, UIntToOH, Valid, log2Ceil, log2Up}
-import org.chipsalliance.cde.config.Parameters
-import freechips.rocketchip.devices.debug.DebugModuleKey
-import org.chipsalliance.rockettile._
-import freechips.rocketchip.util._
-import freechips.rocketchip.util.property
-
-import scala.collection.mutable.LinkedHashMap
-import Instructions._
-import Instructions64._
-import CustomInstructions._
-
-class MStatus extends Bundle {
-  // not truly part of mstatus, but convenient
-  val debug = Bool()
-  val cease = Bool()
-  val wfi = Bool()
-  val isa = UInt(32.W)
-
-  val dprv = UInt(PRV.SZ.W) // effective prv for data accesses
-  val dv = Bool() // effective v for data accesses
-  val prv = UInt(PRV.SZ.W)
-  val v = Bool()
-
-  val sd = Bool()
-  val zero2 = UInt(23.W)
-  val mpv = Bool()
-  val gva = Bool()
-  val mbe = Bool()
-  val sbe = Bool()
-  val sxl = UInt(2.W)
-  val uxl = UInt(2.W)
-  val sd_rv32 = Bool()
-  val zero1 = UInt(8.W)
-  val tsr = Bool()
-  val tw = Bool()
-  val tvm = Bool()
-  val mxr = Bool()
-  val sum = Bool()
-  val mprv = Bool()
-  val xs = UInt(2.W)
-  val fs = UInt(2.W)
-  val mpp = UInt(2.W)
-  val vs = UInt(2.W)
-  val spp = UInt(1.W)
-  val mpie = Bool()
-  val ube = Bool()
-  val spie = Bool()
-  val upie = Bool()
-  val mie = Bool()
-  val hie = Bool()
-  val sie = Bool()
-  val uie = Bool()
-}
-
-class MNStatus extends Bundle {
-  val mpp   = UInt(2.W)
-  val zero3 = UInt(3.W)
-  val mpv   = Bool()
-  val zero2 = UInt(3.W)
-  val mie   = Bool()
-  val zero1 = UInt(3.W)
-}
-
-class HStatus extends Bundle {
-  val zero6 = UInt(30.W)
-  val vsxl = UInt(2.W)
-  val zero5 = UInt(9.W)
-  val vtsr = Bool()
-  val vtw = Bool()
-  val vtvm = Bool()
-  val zero3 = UInt(2.W)
-  val vgein = UInt(6.W)
-  val zero2 = UInt(2.W)
-  val hu = Bool()
-  val spvp = Bool()
-  val spv = Bool()
-  val gva = Bool()
-  val vsbe = Bool()
-  val zero1 = UInt(5.W)
-}
-
-class DCSR extends Bundle {
-  val xdebugver = UInt(2.W)
-  val zero4 = UInt(2.W)
-  val zero3 = UInt(12.W)
-  val ebreakm = Bool()
-  val ebreakh = Bool()
-  val ebreaks = Bool()
-  val ebreaku = Bool()
-  val zero2 = Bool()
-  val stopcycle = Bool()
-  val stoptime = Bool()
-  val cause = UInt(3.W)
-  val v = Bool()
-  val zero1 = UInt(2.W)
-  val step = Bool()
-  val prv = UInt(PRV.SZ.W)
-}
-
-class MIP(implicit p: Parameters) extends CoreBundle()(p)
-    with HasCoreParameters {
-  val lip = Vec(coreParams.nLocalInterrupts, Bool())
-  val zero1 = Bool()
-  val debug = Bool() // keep in sync with CSR.debugIntCause
-  val rocc = Bool()
-  val sgeip = Bool()
-  val meip = Bool()
-  val vseip = Bool()
-  val seip = Bool()
-  val ueip = Bool()
-  val mtip = Bool()
-  val vstip = Bool()
-  val stip = Bool()
-  val utip = Bool()
-  val msip = Bool()
-  val vssip = Bool()
-  val ssip = Bool()
-  val usip = Bool()
-}
-
-class PTBR(implicit p: Parameters) extends CoreBundle()(p) {
-  def additionalPgLevels = mode.extract(log2Ceil(pgLevels-minPgLevels+1)-1, 0)
-  def pgLevelsToMode(i: Int) = (xLen, i) match {
-    case (32, 2) => 1
-    case (64, x) if x >= 3 && x <= 6 => x + 5
-  }
-  val (modeBits, maxASIdBits) = xLen match {
-    case 32 => (1, 9)
-    case 64 => (4, 16)
-  }
-  require(modeBits + maxASIdBits + maxPAddrBits - pgIdxBits == xLen)
-
-  val mode = UInt(modeBits.W)
-  val asid = UInt(maxASIdBits.W)
-  val ppn = UInt((maxPAddrBits - pgIdxBits).W)
-}
-
-
-class PerfCounterIO(implicit p: Parameters) extends CoreBundle
-    with HasCoreParameters {
-  val eventSel = Output(UInt(xLen.W))
-  val inc = Input(UInt(log2Ceil(1+retireWidth).W))
-}
-
-class TracedInstruction(implicit p: Parameters) extends CoreBundle {
-  val valid = Bool()
-  val iaddr = UInt(coreMaxAddrBits.W)
-  val insn = UInt(iLen.W)
-  val priv = UInt(3.W)
-  val exception = Bool()
-  val interrupt = Bool()
-  val cause = UInt(xLen.W)
-  val tval = UInt((coreMaxAddrBits max iLen).W)
-  val wdata = Option.when(traceHasWdata)(UInt((vLen max xLen).W))
-}
-
-class TraceAux extends Bundle {
-  val enable = Bool()
-  val stall = Bool()
-}
-
-class CSRDecodeIO(implicit p: Parameters) extends CoreBundle {
-  val inst = Input(UInt(iLen.W))
-
-  def csr_addr = (inst >> 20)(CSR.ADDRSZ-1, 0)
-
-  val fp_illegal = Output(Bool())
-  val vector_illegal = Output(Bool())
-  val fp_csr = Output(Bool())
-  val rocc_illegal = Output(Bool())
-  val read_illegal = Output(Bool())
-  val write_illegal = Output(Bool())
-  val write_flush = Output(Bool())
-  val system_illegal = Output(Bool())
-  val virtual_access_illegal = Output(Bool())
-  val virtual_system_illegal = Output(Bool())
-}
-
-class CSRFileIO(implicit p: Parameters) extends CoreBundle
-    with HasCoreParameters {
-  val ungated_clock = Input(Clock())
-  val interrupts = Input(new CoreInterrupts())
-  val hartid = Input(UInt(hartIdLen.W))
-  val rw = new Bundle {
-    val addr = Input(UInt(CSR.ADDRSZ.W))
-    val cmd = Input(Bits(CSR.SZ.W))
-    val rdata = Output(Bits(xLen.W))
-    val wdata = Input(Bits(xLen.W))
-  }
-
-  val decode = Vec(decodeWidth, new CSRDecodeIO)
-
-  val csr_stall = Output(Bool())
-  val eret = Output(Bool())
-  val singleStep = Output(Bool())
-
-  val status = Output(new MStatus())
-  val hstatus = Output(new HStatus())
-  val gstatus = Output(new MStatus())
-  val ptbr = Output(new PTBR())
-  val hgatp = Output(new PTBR())
-  val vsatp = Output(new PTBR())
-  val evec = Output(UInt(vaddrBitsExtended.W))
-  val exception = Input(Bool())
-  val retire = Input(UInt(log2Up(1+retireWidth).W))
-  val cause = Input(UInt(xLen.W))
-  val pc = Input(UInt(vaddrBitsExtended.W))
-  val tval = Input(UInt(vaddrBitsExtended.W))
-  val htval = Input(UInt(((maxSVAddrBits + 1) min xLen).W))
-  val gva = Input(Bool())
-  val time = Output(UInt(xLen.W))
-  val fcsr_rm = Output(Bits(FPConstants.RM_SZ.W))
-  val fcsr_flags = Flipped(Valid(Bits(FPConstants.FLAGS_SZ.W)))
-  val set_fs_dirty = coreParams.haveFSDirty.option(Input(Bool()))
-  val rocc_interrupt = Input(Bool())
-  val interrupt = Output(Bool())
-  val interrupt_cause = Output(UInt(xLen.W))
-  val bp = Output(Vec(nBreakpoints, new BP))
-  val pmp = Output(Vec(nPMPs, new PMP(paddrBits, pmpGranularity, pgIdxBits, pgLevels, pgLevelBits)))
-  val counters = Vec(nPerfCounters, new PerfCounterIO)
-  val csrw_counter = Output(UInt(CSR.nCtr.W))
-  val inhibit_cycle = Output(Bool())
-  val inst = Input(Vec(retireWidth, UInt(iLen.W)))
-  val trace = Output(Vec(retireWidth, new TracedInstruction))
-  val mcontext = Output(UInt(coreParams.mcontextWidth.W))
-  val scontext = Output(UInt(coreParams.scontextWidth.W))
-
-  val vector = usingVector.option(new Bundle {
-    val vconfig = Output(new VConfig())
-    val vstart = Output(UInt(maxVLMax.log2.W))
-    val vxrm = Output(UInt(2.W))
-    val set_vs_dirty = Input(Bool())
-    val set_vconfig = Flipped(Valid(new VConfig))
-    val set_vstart = Flipped(Valid(vstart))
-    val set_vxsat = Input(Bool())
-  })
-}
-
-class VConfig(implicit p: Parameters) extends CoreBundle {
-  val vl = UInt((maxVLMax.log2 + 1).W)
-  val vtype = new VType
-}
-
-object VType {
-  def fromUInt(that: UInt, ignore_vill: Boolean = false)(implicit p: Parameters): VType = {
-    val res = 0.U.asTypeOf(new VType)
-    val in = that.asTypeOf(res)
-    val vill = (in.max_vsew.U < in.vsew) || !in.lmul_ok || in.reserved =/= 0.U || in.vill
-    when (!vill || ignore_vill.B) {
-      res := in
-      res.vsew := in.vsew(log2Ceil(1 + in.max_vsew) - 1, 0)
-    }
-    res.reserved := 0.U
-    res.vill := vill
-    res
-  }
-
-  def computeVL(avl: UInt, vtype: UInt, currentVL: UInt, useCurrentVL: Bool, useMax: Bool, useZero: Bool)(implicit p: Parameters): UInt =
-    VType.fromUInt(vtype, true).vl(avl, currentVL, useCurrentVL, useMax, useZero)
-}
-
-class VType(implicit p: Parameters) extends CoreBundle {
-  val vill = Bool()
-  val reserved = UInt((xLen - 9).W)
-  val vma = Bool()
-  val vta = Bool()
-  val vsew = UInt(3.W)
-  val vlmul_sign = Bool()
-  val vlmul_mag = UInt(2.W)
-
-  def vlmul_signed: SInt = Cat(vlmul_sign, vlmul_mag).asSInt
-
-  @deprecated("use vlmul_sign, vlmul_mag, or vlmul_signed", "RVV 0.9")
-  def vlmul: UInt = vlmul_mag
-
-  def max_vsew = log2Ceil(eLen/8)
-  def max_vlmul = (1 << vlmul_mag.getWidth) - 1
-
-  def lmul_ok: Bool = Mux(this.vlmul_sign, this.vlmul_mag =/= 0.U && ~this.vlmul_mag < max_vsew.U - this.vsew, true.B)
-
-  def minVLMax: Int = ((maxVLMax / eLen) >> ((1 << vlmul_mag.getWidth) - 1)) max 1
-
-  def vlMax: UInt = (maxVLMax.U >> (this.vsew +& Cat(this.vlmul_sign, ~this.vlmul_mag))).andNot((minVLMax-1).U)
-
-  def vl(avl: UInt, currentVL: UInt, useCurrentVL: Bool, useMax: Bool, useZero: Bool): UInt = {
-    val atLeastMaxVLMax = useMax || Mux(useCurrentVL, currentVL >= maxVLMax.U, avl >= maxVLMax.U)
-    val avl_lsbs = Mux(useCurrentVL, currentVL, avl)(maxVLMax.log2 - 1, 0)
-
-    val atLeastVLMax = atLeastMaxVLMax || (avl_lsbs & (-maxVLMax.S >> (this.vsew +& Cat(this.vlmul_sign, ~this.vlmul_mag))).asUInt.andNot((minVLMax-1).U)).orR
-    val isZero = vill || useZero
-    Mux(!isZero && atLeastVLMax, vlMax, 0.U) | Mux(!isZero && !atLeastVLMax, avl_lsbs, 0.U)
-  }
-}
-
-class CSRFile(
-  perfEventSets: EventSets = new EventSets(Seq()),
-  customCSRs: Seq[CustomCSR] = Nil)(implicit p: Parameters)
-    extends CoreModule()(p)
-    with HasCoreParameters {
-  val io = IO(new CSRFileIO {
-    val customCSRs = Output(Vec(CSRFile.this.customCSRs.size, new CustomCSRIO))
-  })
-
-  val reset_mstatus = WireDefault(0.U.asTypeOf(new MStatus()))
-  reset_mstatus.mpp := PRV.M.U
-  reset_mstatus.prv := PRV.M.U
-  reset_mstatus.xs := (if (usingRoCC) 3.U else 0.U)
-  val reg_mstatus = RegInit(reset_mstatus)
-
-  val new_prv = WireDefault(reg_mstatus.prv)
-  reg_mstatus.prv := legalizePrivilege(new_prv)
-
-  val reset_dcsr = WireDefault(0.U.asTypeOf(new DCSR()))
-  reset_dcsr.xdebugver := 1.U
-  reset_dcsr.prv := PRV.M.U
-  val reg_dcsr = RegInit(reset_dcsr)
-
-  val (supported_interrupts, delegable_interrupts) = {
-    val sup = Wire(new MIP)
-    sup.usip := false.B
-    sup.ssip := usingSupervisor.B
-    sup.vssip := usingHypervisor.B
-    sup.msip := true.B
-    sup.utip := false.B
-    sup.stip := usingSupervisor.B
-    sup.vstip := usingHypervisor.B
-    sup.mtip := true.B
-    sup.ueip := false.B
-    sup.seip := usingSupervisor.B
-    sup.vseip := usingHypervisor.B
-    sup.meip := true.B
-    sup.sgeip := false.B
-    sup.rocc := usingRoCC.B
-    sup.debug := false.B
-    sup.zero1 := false.B
-    sup.lip foreach { _ := true.B }
-    val supported_high_interrupts = if (io.interrupts.buserror.nonEmpty && !usingNMI) (BigInt(1) << CSR.busErrorIntCause).U else 0.U
-
-    val del = WireDefault(sup)
-    del.msip := false.B
-    del.mtip := false.B
-    del.meip := false.B
-
-    (sup.asUInt | supported_high_interrupts, del.asUInt)
-  }
-  val delegable_exceptions = Seq(
-    Causes.misaligned_fetch,
-    Causes.fetch_page_fault,
-    Causes.breakpoint,
-    Causes.load_page_fault,
-    Causes.store_page_fault,
-    Causes.misaligned_load,
-    Causes.misaligned_store,
-    Causes.illegal_instruction,
-    Causes.user_ecall,
-    Causes.virtual_supervisor_ecall,
-    Causes.fetch_guest_page_fault,
-    Causes.load_guest_page_fault,
-    Causes.virtual_instruction,
-    Causes.store_guest_page_fault).map(1 << _).sum.U
-
-  val hs_delegable_exceptions = Seq(
-    Causes.misaligned_fetch,
-    Causes.fetch_access,
-    Causes.illegal_instruction,
-    Causes.breakpoint,
-    Causes.misaligned_load,
-    Causes.load_access,
-    Causes.misaligned_store,
-    Causes.store_access,
-    Causes.user_ecall,
-    Causes.fetch_page_fault,
-    Causes.load_page_fault,
-    Causes.store_page_fault).map(1 << _).sum.U
-
-  val (hs_delegable_interrupts, mideleg_always_hs) = {
-    val always = WireDefault(0.U.asTypeOf(new MIP()))
-    always.vssip := usingHypervisor.B
-    always.vstip := usingHypervisor.B
-    always.vseip := usingHypervisor.B
-
-    val deleg = WireDefault(always)
-    deleg.lip.foreach { _ := usingHypervisor.B }
-
-    (deleg.asUInt, always.asUInt)
-  }
-
-  val reg_debug = RegInit(false.B)
-  val reg_dpc = Reg(UInt(vaddrBitsExtended.W))
-  val reg_dscratch0 = Reg(UInt(xLen.W))
-  val reg_dscratch1 = (p(DebugModuleKey).map(_.nDscratch).getOrElse(1) > 1).option(Reg(UInt(xLen.W)))
-  val reg_singleStepped = Reg(Bool())
-
-  val reg_mcontext = (coreParams.mcontextWidth > 0).option(RegInit(0.U(coreParams.mcontextWidth.W)))
-  val reg_scontext = (coreParams.scontextWidth > 0).option(RegInit(0.U(coreParams.scontextWidth.W)))
-
-  val reg_tselect = Reg(UInt(log2Up(nBreakpoints).W))
-  val reg_bp = Reg(Vec(1 << log2Up(nBreakpoints), new BP))
-  val reg_pmp = Reg(Vec(nPMPs, new PMPReg(paddrBits, pmpGranularity)))
-
-  val reg_mie = Reg(UInt(xLen.W))
-  val (reg_mideleg, read_mideleg) = {
-    val reg = Reg(UInt(xLen.W))
-    (reg, Mux(usingSupervisor.B, reg & delegable_interrupts | mideleg_always_hs, 0.U))
-  }
-  val (reg_medeleg, read_medeleg) = {
-    val reg = Reg(UInt(xLen.W))
-    (reg, Mux(usingSupervisor.B, reg & delegable_exceptions, 0.U))
-  }
-  val reg_mip = Reg(new MIP)
-  val reg_mepc = Reg(UInt(vaddrBitsExtended.W))
-  val reg_mcause = RegInit(0.U(xLen.W))
-  val reg_mtval = Reg(UInt(vaddrBitsExtended.W))
-  val reg_mtval2 = Reg(UInt(((maxSVAddrBits + 1) min xLen).W))
-  val reg_mscratch = Reg(Bits(xLen.W))
-  val mtvecWidth = paddrBits min xLen
-  val reg_mtvec = mtvecInit match {
-    case Some(addr) => RegInit(addr.U(mtvecWidth.W))
-    case None => Reg(UInt(mtvecWidth.W))
-  }
-
-  val reset_mnstatus = WireDefault(0.U.asTypeOf(new MNStatus()))
-  reset_mnstatus.mpp := PRV.M.U
-  val reg_mnscratch = Reg(Bits(xLen.W))
-  val reg_mnepc = Reg(UInt(vaddrBitsExtended.W))
-  val reg_mncause = RegInit(0.U(xLen.W))
-  val reg_mnstatus = RegInit(reset_mnstatus)
-  val reg_rnmie = RegInit(true.B)
-  val nmie = reg_rnmie
-
-  val delegable_counters = ((BigInt(1) << (nPerfCounters + CSR.firstHPM)) - 1).U
-  val (reg_mcounteren, read_mcounteren) = {
-    val reg = Reg(UInt(32.W))
-    (reg, Mux(usingUser.B, reg & delegable_counters, 0.U))
-  }
-  val (reg_scounteren, read_scounteren) = {
-    val reg = Reg(UInt(32.W))
-    (reg, Mux(usingSupervisor.B, reg & delegable_counters, 0.U))
-  }
-
-  val (reg_hideleg, read_hideleg) = {
-    val reg = Reg(UInt(xLen.W))
-    (reg, Mux(usingHypervisor.B, reg & hs_delegable_interrupts, 0.U))
-  }
-  val (reg_hedeleg, read_hedeleg) = {
-    val reg = Reg(UInt(xLen.W))
-    (reg, Mux(usingHypervisor.B, reg & hs_delegable_exceptions, 0.U))
-  }
-  val hs_delegable_counters = delegable_counters
-  val (reg_hcounteren, read_hcounteren) = {
-    val reg = Reg(UInt(32.W))
-    (reg, Mux(usingHypervisor.B, reg & hs_delegable_counters, 0.U))
-  }
-  val reg_hstatus = RegInit(0.U.asTypeOf(new HStatus))
-  val reg_hgatp = Reg(new PTBR)
-  val reg_htval = Reg(reg_mtval2.cloneType)
-  val read_hvip = reg_mip.asUInt & hs_delegable_interrupts
-  val read_hie = reg_mie & hs_delegable_interrupts
-
-  val (reg_vstvec, read_vstvec) = {
-    val reg = Reg(UInt(vaddrBitsExtended.W))
-    (reg, formTVec(reg).sextTo(xLen))
-  }
-  val reg_vsstatus = Reg(new MStatus)
-  val reg_vsscratch = Reg(Bits(xLen.W))
-  val reg_vsepc = Reg(UInt(vaddrBitsExtended.W))
-  val reg_vscause = Reg(Bits(xLen.W))
-  val reg_vstval = Reg(UInt(vaddrBitsExtended.W))
-  val reg_vsatp = Reg(new PTBR)
-
-  val reg_sepc = Reg(UInt(vaddrBitsExtended.W))
-  val reg_scause = Reg(Bits(xLen.W))
-  val reg_stval = Reg(UInt(vaddrBitsExtended.W))
-  val reg_sscratch = Reg(Bits(xLen.W))
-  val reg_stvec = Reg(UInt((if (usingHypervisor) vaddrBitsExtended else vaddrBits).W))
-  val reg_satp = Reg(new PTBR)
-  val reg_wfi = withClock(io.ungated_clock) { RegInit(false.B) }
-
-  val reg_fflags = Reg(UInt(5.W))
-  val reg_frm = Reg(UInt(3.W))
-  val reg_vconfig = usingVector.option(Reg(new VConfig))
-  val reg_vstart = usingVector.option(Reg(UInt(maxVLMax.log2.W)))
-  val reg_vxsat = usingVector.option(Reg(Bool()))
-  val reg_vxrm = usingVector.option(Reg(UInt(io.vector.get.vxrm.getWidth.W)))
-
-  val reg_mcountinhibit = RegInit(0.U((CSR.firstHPM + nPerfCounters).W))
-  io.inhibit_cycle := reg_mcountinhibit(0)
-  val reg_instret = WideCounter(64, io.retire, inhibit = reg_mcountinhibit(2))
-  val reg_cycle = if (enableCommitLog) WideCounter(64, io.retire,     inhibit = reg_mcountinhibit(0))
-    else withClock(io.ungated_clock) { WideCounter(64, !io.csr_stall, inhibit = reg_mcountinhibit(0)) }
-  val reg_hpmevent = io.counters.map(c => RegInit(0.U(xLen.W)))
-    (io.counters zip reg_hpmevent) foreach { case (c, e) => c.eventSel := e }
-  val reg_hpmcounter = io.counters.zipWithIndex.map { case (c, i) =>
-    WideCounter(CSR.hpmWidth, c.inc, reset = false, inhibit = reg_mcountinhibit(CSR.firstHPM+i)) }
-
-  val mip = WireDefault(reg_mip)
-  mip.lip := (io.interrupts.lip: Seq[Bool])
-  mip.mtip := io.interrupts.mtip
-  mip.msip := io.interrupts.msip
-  mip.meip := io.interrupts.meip
-  // seip is the OR of reg_mip.seip and the actual line from the PLIC
-  io.interrupts.seip.foreach { mip.seip := reg_mip.seip || _ }
-  // Simimlar sort of thing would apply if the PLIC had a VSEIP line:
-  //io.interrupts.vseip.foreach { mip.vseip := reg_mip.vseip || _ }
-  mip.rocc := io.rocc_interrupt
-  val read_mip = mip.asUInt & supported_interrupts
-  val read_hip = read_mip & hs_delegable_interrupts
-  val high_interrupts = (if (usingNMI) 0.U else io.interrupts.buserror.map(_ << CSR.busErrorIntCause).getOrElse(0.U))
-
-  val pending_interrupts = high_interrupts | (read_mip & reg_mie)
-  val d_interrupts = io.interrupts.debug << CSR.debugIntCause
-  val (nmi_interrupts, nmiFlag) = io.interrupts.nmi.map(nmi =>
-    (((nmi.rnmi && reg_rnmie) << CSR.rnmiIntCause) |
-    io.interrupts.buserror.map(_ << CSR.rnmiBEUCause).getOrElse(0.U),
-    !io.interrupts.debug && nmi.rnmi && reg_rnmie)).getOrElse(0.U, false.B)
-  val m_interrupts = Mux(nmie && (reg_mstatus.prv <= PRV.S.U || reg_mstatus.mie), ~(~pending_interrupts | read_mideleg), 0.U)
-  val s_interrupts = Mux(nmie && (reg_mstatus.v || reg_mstatus.prv < PRV.S.U || (reg_mstatus.prv === PRV.S.U && reg_mstatus.sie)), pending_interrupts & read_mideleg & ~read_hideleg, 0.U)
-  val vs_interrupts = Mux(nmie && (reg_mstatus.v && (reg_mstatus.prv < PRV.S.U || reg_mstatus.prv === PRV.S.U && reg_vsstatus.sie)), pending_interrupts & read_hideleg, 0.U)
-  val (anyInterrupt, whichInterrupt) = chooseInterrupt(Seq(vs_interrupts, s_interrupts, m_interrupts, nmi_interrupts, d_interrupts))
-  val interruptMSB = BigInt(1) << (xLen-1)
-  val interruptCause = interruptMSB.U + (nmiFlag << (xLen-2)) + whichInterrupt
-  io.interrupt := (anyInterrupt && !io.singleStep || reg_singleStepped) && !(reg_debug || io.status.cease)
-  io.interrupt_cause := interruptCause
-  io.bp := reg_bp take nBreakpoints
-  io.mcontext := reg_mcontext.getOrElse(0.U)
-  io.scontext := reg_scontext.getOrElse(0.U)
-  io.pmp := reg_pmp.map(PMP(_, paddrBits, pmpGranularity, pgIdxBits, pgLevels, pgLevelBits))
-
-  val isaMaskString =
-    (if (usingMulDiv) "M" else "") +
-    (if (usingAtomics) "A" else "") +
-    (if (fLen >= 32) "F" else "") +
-    (if (fLen >= 64) "D" else "") +
-    (if (usingVector) "V" else "") +
-    // The current spec does not define what sub-extensions constitute the 'B' misa bit
-    // (if (usingBitManip) "B" else "") +
-    (if (usingCompressed) "C" else "")
-  val isaString = (if (coreParams.useRVE) "E" else "I") +
-    isaMaskString +
-    (if (customIsaExt.isDefined) "X" else "") +
-    (if (usingSupervisor) "S" else "") +
-    (if (usingHypervisor) "H" else "") +
-    (if (usingUser) "U" else "")
-  val isaMax = (BigInt(log2Ceil(xLen) - 4) << (xLen-2)) | isaStringToMask(isaString)
-  val reg_misa = RegInit(isaMax.U)
-  val read_mstatus = io.status.asUInt.extract(xLen-1,0)
-  val read_mtvec = formTVec(reg_mtvec).padTo(xLen)
-  val read_stvec = formTVec(reg_stvec).sextTo(xLen)
-
-  val read_mapping = LinkedHashMap[Int,Bits](
-    CSRs.tselect -> reg_tselect,
-    CSRs.tdata1 -> reg_bp(reg_tselect).control.asUInt,
-    CSRs.tdata2 -> reg_bp(reg_tselect).address.sextTo(xLen),
-    CSRs.tdata3 -> reg_bp(reg_tselect).textra.asUInt,
-    CSRs.misa -> reg_misa,
-    CSRs.mstatus -> read_mstatus,
-    CSRs.mtvec -> read_mtvec,
-    CSRs.mip -> read_mip,
-    CSRs.mie -> reg_mie,
-    CSRs.mscratch -> reg_mscratch,
-    CSRs.mepc -> readEPC(reg_mepc).sextTo(xLen),
-    CSRs.mtval -> reg_mtval.sextTo(xLen),
-    CSRs.mcause -> reg_mcause,
-    CSRs.mhartid -> io.hartid)
-
-  val debug_csrs = if (!usingDebug) LinkedHashMap() else LinkedHashMap[Int,Bits](
-    CSRs.dcsr -> reg_dcsr.asUInt,
-    CSRs.dpc -> readEPC(reg_dpc).sextTo(xLen),
-    CSRs.dscratch0 -> reg_dscratch0.asUInt) ++
-    reg_dscratch1.map(r => CSRs.dscratch1 -> r)
-
-  val read_mnstatus = WireInit(0.U.asTypeOf(new MNStatus()))
-  read_mnstatus.mpp := reg_mnstatus.mpp
-  read_mnstatus.mpv := reg_mnstatus.mpv
-  read_mnstatus.mie := reg_rnmie
-  val nmi_csrs = if (!usingNMI) LinkedHashMap() else LinkedHashMap[Int,Bits](
-    CustomCSRs.mnscratch -> reg_mnscratch,
-    CustomCSRs.mnepc -> readEPC(reg_mnepc).sextTo(xLen),
-    CustomCSRs.mncause -> reg_mncause,
-    CustomCSRs.mnstatus -> read_mnstatus.asUInt)
-
-  val context_csrs = LinkedHashMap[Int,Bits]() ++
-    reg_mcontext.map(r => CSRs.mcontext -> r) ++
-    reg_scontext.map(r => CSRs.scontext -> r)
-
-  val read_fcsr = Cat(reg_frm, reg_fflags)
-  val fp_csrs = LinkedHashMap[Int,Bits]() ++
-    usingFPU.option(CSRs.fflags -> reg_fflags) ++
-    usingFPU.option(CSRs.frm -> reg_frm) ++
-    (usingFPU || usingVector).option(CSRs.fcsr -> read_fcsr)
-
-  val read_vcsr = Cat(reg_vxrm.getOrElse(0.U), reg_vxsat.getOrElse(0.U))
-  val vector_csrs = if (!usingVector) LinkedHashMap() else LinkedHashMap[Int,Bits](
-    CSRs.vxsat -> reg_vxsat.get,
-    CSRs.vxrm -> reg_vxrm.get,
-    CSRs.vcsr -> read_vcsr,
-    CSRs.vstart -> reg_vstart.get,
-    CSRs.vtype -> reg_vconfig.get.vtype.asUInt,
-    CSRs.vl -> reg_vconfig.get.vl,
-    CSRs.vlenb -> (vLen / 8).U)
-
-  read_mapping ++= debug_csrs
-  read_mapping ++= nmi_csrs
-  read_mapping ++= context_csrs
-  read_mapping ++= fp_csrs
-  read_mapping ++= vector_csrs
-
-  if (coreParams.haveBasicCounters) {
-    read_mapping += CSRs.mcountinhibit -> reg_mcountinhibit
-    read_mapping += CSRs.mcycle -> reg_cycle
-    read_mapping += CSRs.minstret -> reg_instret
-
-    for (((e, c), i) <- (reg_hpmevent.padTo(CSR.nHPM, 0.U)
-                         zip reg_hpmcounter.map(x => x: UInt).padTo(CSR.nHPM, 0.U)).zipWithIndex) {
-      read_mapping += (i + CSR.firstHPE) -> e // mhpmeventN
-      read_mapping += (i + CSR.firstMHPC) -> c // mhpmcounterN
-      read_mapping += (i + CSR.firstHPC) -> c // hpmcounterN
-      if (xLen == 32) {
-        read_mapping += (i + CSR.firstMHPCH) -> (c >> 32) // mhpmcounterNh
-        read_mapping += (i + CSR.firstHPCH) -> (c >> 32) // hpmcounterNh
-      }
-    }
-
-    if (usingUser) {
-      read_mapping += CSRs.mcounteren -> read_mcounteren
-    }
-    read_mapping += CSRs.cycle -> reg_cycle
-    read_mapping += CSRs.instret -> reg_instret
-
-    if (xLen == 32) {
-      read_mapping += CSRs.mcycleh -> (reg_cycle >> 32)
-      read_mapping += CSRs.minstreth -> (reg_instret >> 32)
-      read_mapping += CSRs.cycleh -> (reg_cycle >> 32)
-      read_mapping += CSRs.instreth -> (reg_instret >> 32)
-    }
-  }
-
-  val sie_mask = {
-    val sgeip_mask = WireInit(0.U.asTypeOf(new MIP))
-    sgeip_mask.sgeip := true.B
-    read_mideleg & ~(hs_delegable_interrupts | sgeip_mask.asUInt)
-  }
-  if (usingSupervisor) {
-    val read_sie = reg_mie & sie_mask
-    val read_sip = read_mip & sie_mask
-    val read_sstatus = WireDefault(0.U.asTypeOf(new MStatus))
-    read_sstatus.sd := io.status.sd
-    read_sstatus.uxl := io.status.uxl
-    read_sstatus.sd_rv32 := io.status.sd_rv32
-    read_sstatus.mxr := io.status.mxr
-    read_sstatus.sum := io.status.sum
-    read_sstatus.xs := io.status.xs
-    read_sstatus.fs := io.status.fs
-    read_sstatus.vs := io.status.vs
-    read_sstatus.spp := io.status.spp
-    read_sstatus.spie := io.status.spie
-    read_sstatus.sie := io.status.sie
-
-    read_mapping += CSRs.sstatus -> (read_sstatus.asUInt)(xLen-1,0)
-    read_mapping += CSRs.sip -> read_sip.asUInt
-    read_mapping += CSRs.sie -> read_sie.asUInt
-    read_mapping += CSRs.sscratch -> reg_sscratch
-    read_mapping += CSRs.scause -> reg_scause
-    read_mapping += CSRs.stval -> reg_stval.sextTo(xLen)
-    read_mapping += CSRs.satp -> reg_satp.asUInt
-    read_mapping += CSRs.sepc -> readEPC(reg_sepc).sextTo(xLen)
-    read_mapping += CSRs.stvec -> read_stvec
-    read_mapping += CSRs.scounteren -> read_scounteren
-    read_mapping += CSRs.mideleg -> read_mideleg
-    read_mapping += CSRs.medeleg -> read_medeleg
-  }
-
-  val pmpCfgPerCSR = xLen / new PMPConfig().getWidth
-  def pmpCfgIndex(i: Int) = (xLen / 32) * (i / pmpCfgPerCSR)
-  if (reg_pmp.nonEmpty) {
-    require(reg_pmp.size <= CSR.maxPMPs)
-    val read_pmp = reg_pmp.padTo(CSR.maxPMPs, 0.U.asTypeOf(new PMP(paddrBits, pmpGranularity, pgIdxBits, pgLevels, pgLevelBits)))
-    for (i <- 0 until read_pmp.size by pmpCfgPerCSR)
-      read_mapping += (CSRs.pmpcfg0 + pmpCfgIndex(i)) -> read_pmp.map(_.cfg).slice(i, i + pmpCfgPerCSR).asUInt
-    for ((pmp, i) <- read_pmp.zipWithIndex)
-      read_mapping += (CSRs.pmpaddr0 + i) -> pmp.readAddr
-  }
-
-  // implementation-defined CSRs
-  val reg_custom = customCSRs.map { csr =>
-    require(csr.mask >= 0 && csr.mask.bitLength <= xLen)
-    require(!read_mapping.contains(csr.id))
-    val reg = csr.init.map(init => RegInit(init.U(xLen.W))).getOrElse(Reg(UInt(xLen.W)))
-    read_mapping += csr.id -> reg
-    reg
-  }
-
-  if (usingHypervisor) {
-    read_mapping += CSRs.mtinst -> 0.U
-    read_mapping += CSRs.mtval2 -> reg_mtval2
-
-    val read_hstatus = io.hstatus.asUInt.extract(xLen-1,0)
-
-    read_mapping += CSRs.hstatus -> read_hstatus
-    read_mapping += CSRs.hedeleg -> read_hedeleg
-    read_mapping += CSRs.hideleg -> read_hideleg
-    read_mapping += CSRs.hcounteren-> read_hcounteren
-    read_mapping += CSRs.hgatp -> reg_hgatp.asUInt
-    read_mapping += CSRs.hip -> read_hip
-    read_mapping += CSRs.hie -> read_hie
-    read_mapping += CSRs.hvip -> read_hvip
-    read_mapping += CSRs.hgeie -> 0.U
-    read_mapping += CSRs.hgeip -> 0.U
-    read_mapping += CSRs.htval -> reg_htval
-    read_mapping += CSRs.htinst -> 0.U
-
-    val read_vsie = (read_hie & read_hideleg) >> 1
-    val read_vsip = (read_hip & read_hideleg) >> 1
-    val read_vsepc = readEPC(reg_vsepc).sextTo(xLen)
-    val read_vstval = reg_vstval.sextTo(xLen)
-    val read_vsstatus = io.gstatus.asUInt.extract(xLen-1,0)
-
-    read_mapping += CSRs.vsstatus -> read_vsstatus
-    read_mapping += CSRs.vsip -> read_vsip
-    read_mapping += CSRs.vsie -> read_vsie
-    read_mapping += CSRs.vsscratch -> reg_vsscratch
-    read_mapping += CSRs.vscause -> reg_vscause
-    read_mapping += CSRs.vstval -> read_vstval
-    read_mapping += CSRs.vsatp -> reg_vsatp.asUInt
-    read_mapping += CSRs.vsepc -> read_vsepc
-    read_mapping += CSRs.vstvec -> read_vstvec
-  }
-
-  // mimpid, marchid, and mvendorid are 0 unless overridden by customCSRs
-  Seq(CSRs.mimpid, CSRs.marchid, CSRs.mvendorid).foreach(id => read_mapping.getOrElseUpdate(id, 0.U))
-
-  val decoded_addr = {
-    val addr = Cat(io.status.v, io.rw.addr)
-    val pats = for (((k, _), i) <- read_mapping.zipWithIndex)
-      yield (BitPat(k.U), (0 until read_mapping.size).map(j => BitPat((i == j).B)))
-    val decoded = DecodeLogic(addr, Seq.fill(read_mapping.size)(X), pats)
-    val unvirtualized_mapping = (for (((k, _), v) <- read_mapping zip decoded) yield k -> v.asBool).toMap
-
-    for ((k, v) <- unvirtualized_mapping) yield k -> {
-      val alt = CSR.mode(k) match {
-        case PRV.S => unvirtualized_mapping.lift(k + (1 << CSR.modeLSB))
-        case PRV.H => unvirtualized_mapping.lift(k - (1 << CSR.modeLSB))
-        case _ => None
-      }
-      alt.map(Mux(reg_mstatus.v, _, v)).getOrElse(v)
-    }
-  }
-
-  val wdata = readModifyWriteCSR(io.rw.cmd, io.rw.rdata, io.rw.wdata)
-
-  val system_insn = io.rw.cmd === CSR.I
-  val hlsv = Seq(HLV_B, HLV_BU, HLV_H, HLV_HU, HLV_W, HLV_WU, HLV_D, HSV_B, HSV_H, HSV_W, HSV_D, HLVX_HU, HLVX_WU)
-  val decode_table = Seq(        ECALL->       List(Y,N,N,N,N,N,N,N,N),
-                                 EBREAK->      List(N,Y,N,N,N,N,N,N,N),
-                                 MRET->        List(N,N,Y,N,N,N,N,N,N),
-                                 CEASE->       List(N,N,N,Y,N,N,N,N,N),
-                                 WFI->         List(N,N,N,N,Y,N,N,N,N)) ++
-    usingDebug.option(           DRET->        List(N,N,Y,N,N,N,N,N,N)) ++
-    usingNMI.option(             MNRET->       List(N,N,Y,N,N,N,N,N,N)) ++
-    coreParams.haveCFlush.option(CFLUSH_D_L1-> List(N,N,N,N,N,N,N,N,N)) ++
-    usingSupervisor.option(      SRET->        List(N,N,Y,N,N,N,N,N,N)) ++
-    usingVM.option(              SFENCE_VMA->  List(N,N,N,N,N,Y,N,N,N)) ++
-    usingHypervisor.option(      HFENCE_VVMA-> List(N,N,N,N,N,N,Y,N,N)) ++
-    usingHypervisor.option(      HFENCE_GVMA-> List(N,N,N,N,N,N,N,Y,N)) ++
-    (if (usingHypervisor)        hlsv.map(_->  List(N,N,N,N,N,N,N,N,Y)) else Seq())
-  val insn_call :: insn_break :: insn_ret :: insn_cease :: insn_wfi :: _ :: _ :: _ :: _ :: Nil = {
-    val insn = ECALL.value.U | (io.rw.addr << 20)
-    DecodeLogic(insn, decode_table(0)._2.map(x=>X), decode_table).map(system_insn && _.asBool)
-  }
-
-  for (io_dec <- io.decode) {
-    val addr = io_dec.inst(31, 20)
-
-    def decodeAny(m: LinkedHashMap[Int,Bits]): Bool = m.map { case(k: Int, _: Bits) => addr === k.U }.reduce(_||_)
-    def decodeFast(s: Seq[Int]): Bool = DecodeLogic(addr, s.map(_.U), (read_mapping -- s).keys.toList.map(_.U))
-
-    val _ :: is_break :: is_ret :: _ :: is_wfi :: is_sfence :: is_hfence_vvma :: is_hfence_gvma :: is_hlsv :: Nil =
-      DecodeLogic(io_dec.inst, decode_table(0)._2.map(x=>X), decode_table).map(_.asBool)
-    val is_counter = (addr.inRange(CSR.firstCtr.U, (CSR.firstCtr + CSR.nCtr).U) || addr.inRange(CSR.firstCtrH.U, (CSR.firstCtrH + CSR.nCtr).U))
-
-    val allow_wfi = (!usingSupervisor).B || reg_mstatus.prv > PRV.S.U || !reg_mstatus.tw && (!reg_mstatus.v || !reg_hstatus.vtw)
-    val allow_sfence_vma = (!usingVM).B || reg_mstatus.prv > PRV.S.U || !Mux(reg_mstatus.v, reg_hstatus.vtvm, reg_mstatus.tvm)
-    val allow_hfence_vvma = (!usingHypervisor).B || !reg_mstatus.v && (reg_mstatus.prv >= PRV.S.U)
-    val allow_hlsv = (!usingHypervisor).B || !reg_mstatus.v && (reg_mstatus.prv >= PRV.S.U || reg_hstatus.hu)
-    val allow_sret = (!usingSupervisor).B || reg_mstatus.prv > PRV.S.U || !Mux(reg_mstatus.v, reg_hstatus.vtsr, reg_mstatus.tsr)
-    val counter_addr = addr(log2Ceil(read_mcounteren.getWidth)-1, 0)
-    val allow_counter = (reg_mstatus.prv > PRV.S.U || read_mcounteren(counter_addr)) &&
-      (!usingSupervisor.B || reg_mstatus.prv >= PRV.S.U || read_scounteren(counter_addr)) &&
-      (!usingHypervisor.B || !reg_mstatus.v || read_hcounteren(counter_addr))
-    io_dec.fp_illegal := io.status.fs === 0.U || reg_mstatus.v && reg_vsstatus.fs === 0.U || !reg_misa('f'-'a')
-    io_dec.vector_illegal := io.status.vs === 0.U || reg_mstatus.v && reg_vsstatus.vs === 0.U || !reg_misa('v'-'a')
-    io_dec.fp_csr := decodeFast(fp_csrs.keys.toList)
-    io_dec.rocc_illegal := io.status.xs === 0.U || reg_mstatus.v && reg_vsstatus.xs === 0.U || !reg_misa('x'-'a')
-    val csr_addr_legal = reg_mstatus.prv >= CSR.mode(addr) ||
-      usingHypervisor.B && !reg_mstatus.v && reg_mstatus.prv === PRV.S.U && CSR.mode(addr) === PRV.H.U
-    val csr_exists = decodeAny(read_mapping)
-    io_dec.read_illegal := !csr_addr_legal ||
-      !csr_exists ||
-      ((addr === CSRs.satp.U || addr === CSRs.hgatp.U) && !allow_sfence_vma) ||
-      is_counter && !allow_counter ||
-      decodeFast(debug_csrs.keys.toList) && !reg_debug ||
-      decodeFast(vector_csrs.keys.toList) && io_dec.vector_illegal ||
-      io_dec.fp_csr && io_dec.fp_illegal
-    io_dec.write_illegal := addr(11,10).andR
-    io_dec.write_flush := {
-      val addr_m = addr | (PRV.M.U << CSR.modeLSB)
-      !(addr_m >= CSRs.mscratch.U && addr_m <= CSRs.mtval.U)
-    }
-    io_dec.system_illegal := !csr_addr_legal && !is_hlsv ||
-      is_wfi && !allow_wfi ||
-      is_ret && !allow_sret ||
-      is_ret && addr(10) && addr(7) && !reg_debug ||
-      (is_sfence || is_hfence_gvma) && !allow_sfence_vma ||
-      is_hfence_vvma && !allow_hfence_vvma ||
-      is_hlsv && !allow_hlsv
-
-    io_dec.virtual_access_illegal := reg_mstatus.v && csr_exists && (
-      CSR.mode(addr) === PRV.H.U ||
-      is_counter && read_mcounteren(counter_addr) && (!read_hcounteren(counter_addr) || !reg_mstatus.prv(0) && !read_scounteren(counter_addr)) ||
-      CSR.mode(addr) === PRV.S.U && !reg_mstatus.prv(0) ||
-      addr === CSRs.satp.U && reg_mstatus.prv(0) && reg_hstatus.vtvm)
-
-    io_dec.virtual_system_illegal := reg_mstatus.v && (
-      is_hfence_vvma ||
-      is_hfence_gvma ||
-      is_hlsv ||
-      is_wfi && (!reg_mstatus.prv(0) || !reg_mstatus.tw && reg_hstatus.vtw) ||
-      is_ret && CSR.mode(addr) === PRV.S.U && (!reg_mstatus.prv(0) || reg_hstatus.vtsr) ||
-      is_sfence && (!reg_mstatus.prv(0) || reg_hstatus.vtvm))
-  }
-
-  val cause =
-    Mux(insn_call, Causes.user_ecall.U + Mux(reg_mstatus.prv(0) && reg_mstatus.v, PRV.H.U, reg_mstatus.prv),
-    Mux[UInt](insn_break, Causes.breakpoint.U, io.cause))
-  val cause_lsbs = cause(log2Ceil(1 + CSR.busErrorIntCause)-1, 0)
-  val causeIsDebugInt = cause(xLen-1) && cause_lsbs === CSR.debugIntCause.U
-  val causeIsDebugTrigger = !cause(xLen-1) && cause_lsbs === CSR.debugTriggerCause.U
-  val causeIsDebugBreak = !cause(xLen-1) && insn_break && Cat(reg_dcsr.ebreakm, reg_dcsr.ebreakh, reg_dcsr.ebreaks, reg_dcsr.ebreaku)(reg_mstatus.prv)
-  val trapToDebug = usingDebug.B && (reg_singleStepped || causeIsDebugInt || causeIsDebugTrigger || causeIsDebugBreak || reg_debug)
-  val debugEntry = p(DebugModuleKey).map(_.debugEntry).getOrElse(BigInt(0x800))
-  val debugException = p(DebugModuleKey).map(_.debugException).getOrElse(BigInt(0x808))
-  val debugTVec = Mux(reg_debug, Mux(insn_break, debugEntry.U, debugException.U), debugEntry.U)
-  val delegate = usingSupervisor.B && reg_mstatus.prv <= PRV.S.U && Mux(cause(xLen-1), read_mideleg(cause_lsbs), read_medeleg(cause_lsbs))
-  val delegateVS = reg_mstatus.v && delegate && Mux(cause(xLen-1), read_hideleg(cause_lsbs), read_hedeleg(cause_lsbs))
-  def mtvecBaseAlign = 2
-  def mtvecInterruptAlign = {
-    require(reg_mip.getWidth <= xLen)
-    log2Ceil(xLen)
-  }
-  val notDebugTVec = {
-    val base = Mux(delegate, Mux(delegateVS, read_vstvec, read_stvec), read_mtvec)
-    val interruptOffset = cause(mtvecInterruptAlign-1, 0) << mtvecBaseAlign
-    val interruptVec = Cat(base >> (mtvecInterruptAlign + mtvecBaseAlign), interruptOffset)
-    val doVector = base(0) && cause(cause.getWidth-1) && (cause_lsbs >> mtvecInterruptAlign) === 0.U
-    Mux(doVector, interruptVec, base >> mtvecBaseAlign << mtvecBaseAlign)
-  }
-
-  val causeIsRnmiInt = cause(xLen-1) && cause(xLen-2) && (cause_lsbs === CSR.rnmiIntCause.U || cause_lsbs === CSR.rnmiBEUCause.U)
-  val causeIsRnmiBEU = cause(xLen-1) && cause(xLen-2) && cause_lsbs === CSR.rnmiBEUCause.U
-  val causeIsNmi = causeIsRnmiInt
-  val nmiTVecInt = io.interrupts.nmi.map(nmi => nmi.rnmi_interrupt_vector).getOrElse(0.U)
-  val nmiTVecXcpt = io.interrupts.nmi.map(nmi => nmi.rnmi_exception_vector).getOrElse(0.U)
-  val trapToNmiInt = usingNMI.B && causeIsNmi
-  val trapToNmiXcpt = usingNMI.B && !nmie
-  val trapToNmi = trapToNmiInt || trapToNmiXcpt
-  val nmiTVec = (Mux(causeIsNmi, nmiTVecInt, nmiTVecXcpt)>>1)<<1
-
-  val tvec = Mux(trapToDebug, debugTVec, Mux(trapToNmi, nmiTVec, notDebugTVec))
-  io.evec := tvec
-  io.ptbr := reg_satp
-  io.hgatp := reg_hgatp
-  io.vsatp := reg_vsatp
-  io.eret := insn_call || insn_break || insn_ret
-  io.singleStep := reg_dcsr.step && !reg_debug
-  io.status := reg_mstatus
-  io.status.sd := io.status.fs.andR || io.status.xs.andR || io.status.vs.andR
-  io.status.debug := reg_debug
-  io.status.isa := reg_misa
-  io.status.uxl := (if (usingUser) log2Ceil(xLen) - 4 else 0).U
-  io.status.sxl := (if (usingSupervisor) log2Ceil(xLen) - 4 else 0).U
-  io.status.dprv := Mux(reg_mstatus.mprv && !reg_debug, reg_mstatus.mpp, reg_mstatus.prv)
-  io.status.dv := reg_mstatus.v || Mux(reg_mstatus.mprv && !reg_debug, reg_mstatus.mpv, false.B)
-  io.status.sd_rv32 := (xLen == 32).B && io.status.sd
-  io.status.mpv := reg_mstatus.mpv
-  io.status.gva := reg_mstatus.gva
-  io.hstatus := reg_hstatus
-  io.hstatus.vsxl := (if (usingSupervisor) log2Ceil(xLen) - 4 else 0).U
-  io.gstatus := reg_vsstatus
-  io.gstatus.sd := io.gstatus.fs.andR || io.gstatus.xs.andR || io.gstatus.vs.andR
-  io.gstatus.uxl := (if (usingUser) log2Ceil(xLen) - 4 else 0).U
-  io.gstatus.sd_rv32 := (xLen == 32).B && io.gstatus.sd
-
-  val exception = insn_call || insn_break || io.exception
-  assert(PopCount(insn_ret :: insn_call :: insn_break :: io.exception :: Nil) <= 1.U, "these conditions must be mutually exclusive")
-
-  when (insn_wfi && !io.singleStep && !reg_debug) { reg_wfi := true.B }
-  when (pending_interrupts.orR || io.interrupts.debug || exception) { reg_wfi := false.B }
-  io.interrupts.nmi.map(nmi => when (nmi.rnmi) { reg_wfi := false.B } )
-
-  when (io.retire(0) || exception) { reg_singleStepped := true.B }
-  when (!io.singleStep) { reg_singleStepped := false.B }
-  assert(!io.singleStep || io.retire <= 1.U)
-  assert(!reg_singleStepped || io.retire === 0.U)
-
-  val epc = formEPC(io.pc)
-  val tval = Mux(insn_break, epc, io.tval)
-
-  when (exception) {
-    when (trapToDebug) {
-      when (!reg_debug) {
-        reg_mstatus.v := false.B
-        reg_debug := true.B
-        reg_dpc := epc
-        reg_dcsr.cause := Mux(reg_singleStepped, 4.U, Mux(causeIsDebugInt, 3.U, Mux[UInt](causeIsDebugTrigger, 2.U, 1.U)))
-        reg_dcsr.prv := trimPrivilege(reg_mstatus.prv)
-        reg_dcsr.v := reg_mstatus.v
-        new_prv := PRV.M.U
-      }
-    }.elsewhen (trapToNmiInt) {
-      when (reg_rnmie) {
-        reg_mstatus.v := false.B
-        reg_mnstatus.mpv := reg_mstatus.v
-        reg_rnmie := false.B
-        reg_mnepc := epc
-        reg_mncause := (BigInt(1) << (xLen-1)).U | Mux(causeIsRnmiBEU, 3.U, 2.U)
-        reg_mnstatus.mpp := trimPrivilege(reg_mstatus.prv)
-        new_prv := PRV.M.U
-      }
-    }.elsewhen (delegateVS && nmie) {
-      reg_mstatus.v := true.B
-      reg_vsstatus.spp := reg_mstatus.prv
-      reg_vsepc := epc
-      reg_vscause := Mux(cause(xLen-1), Cat(cause(xLen-1, 2), 1.U(2.W)), cause)
-      reg_vstval := tval
-      reg_vsstatus.spie := reg_vsstatus.sie
-      reg_vsstatus.sie := false.B
-      new_prv := PRV.S.U
-    }.elsewhen (delegate && nmie) {
-      reg_mstatus.v := false.B
-      reg_hstatus.spvp := Mux(reg_mstatus.v, reg_mstatus.prv(0),reg_hstatus.spvp)
-      reg_hstatus.gva := io.gva
-      reg_hstatus.spv := reg_mstatus.v
-      reg_sepc := epc
-      reg_scause := cause
-      reg_stval := tval
-      reg_htval := io.htval
-      reg_mstatus.spie := reg_mstatus.sie
-      reg_mstatus.spp := reg_mstatus.prv
-      reg_mstatus.sie := false.B
-      new_prv := PRV.S.U
-    }.otherwise {
-      reg_mstatus.v := false.B
-      reg_mstatus.mpv := reg_mstatus.v
-      reg_mstatus.gva := io.gva
-      reg_mepc := epc
-      reg_mcause := cause
-      reg_mtval := tval
-      reg_mtval2 := io.htval
-      reg_mstatus.mpie := reg_mstatus.mie
-      reg_mstatus.mpp := trimPrivilege(reg_mstatus.prv)
-      reg_mstatus.mie := false.B
-      new_prv := PRV.M.U
-    }
-  }
-
-  for (i <- 0 until supported_interrupts.getWidth) {
-    val en = exception && (supported_interrupts & (BigInt(1) << i).U) =/= 0.U && cause === (BigInt(1) << (xLen - 1)).U + i.U
-    val delegable = (delegable_interrupts & (BigInt(1) << i).U) =/= 0.U
-    property.cover(en && !delegate, s"INTERRUPT_M_$i")
-    property.cover(en && delegable && delegate, s"INTERRUPT_S_$i")
-  }
-  for (i <- 0 until xLen) {
-    val supported_exceptions: BigInt = 0x8fe |
-      (if (usingCompressed && !coreParams.misaWritable) 0 else 1) |
-      (if (usingUser) 0x100 else 0) |
-      (if (usingSupervisor) 0x200 else 0) |
-      (if (usingVM) 0xb000 else 0)
-    if (((supported_exceptions >> i) & 1) != 0) {
-      val en = exception && cause === i.U
-      val delegable = (delegable_exceptions & (BigInt(1) << i).U) =/= 0.U
-      property.cover(en && !delegate, s"EXCEPTION_M_$i")
-      property.cover(en && delegable && delegate, s"EXCEPTION_S_$i")
-    }
-  }
-
-  when (insn_ret) {
-    val ret_prv = WireInit(UInt(), DontCare)
-    when (usingSupervisor.B && !io.rw.addr(9)) {
-      when (!reg_mstatus.v) {
-        reg_mstatus.sie := reg_mstatus.spie
-        reg_mstatus.spie := true.B
-        reg_mstatus.spp := PRV.U.U
-        ret_prv := reg_mstatus.spp
-        reg_mstatus.v := usingHypervisor.B && reg_hstatus.spv
-        io.evec := readEPC(reg_sepc)
-        reg_hstatus.spv := false.B
-      }.otherwise {
-        reg_vsstatus.sie := reg_vsstatus.spie
-        reg_vsstatus.spie := true.B
-        reg_vsstatus.spp := PRV.U.U
-        ret_prv := reg_vsstatus.spp
-        reg_mstatus.v := usingHypervisor.B
-        io.evec := readEPC(reg_vsepc)
-      }
-    }.elsewhen (usingDebug.B && io.rw.addr(10) && io.rw.addr(7)) {
-      ret_prv := reg_dcsr.prv
-      reg_mstatus.v := usingHypervisor.B && reg_dcsr.v && reg_dcsr.prv <= PRV.S.U
-      reg_debug := false.B
-      io.evec := readEPC(reg_dpc)
-    }.elsewhen (usingNMI.B && io.rw.addr(10) && !io.rw.addr(7)) {
-      ret_prv := reg_mnstatus.mpp
-      reg_mstatus.v := usingHypervisor.B && reg_mnstatus.mpv && reg_mnstatus.mpp <= PRV.S.U
-      reg_rnmie := true.B
-      io.evec := readEPC(reg_mnepc)
-    }.otherwise {
-      reg_mstatus.mie := reg_mstatus.mpie
-      reg_mstatus.mpie := true.B
-      reg_mstatus.mpp := legalizePrivilege(PRV.U.U)
-      reg_mstatus.mpv := false.B
-      ret_prv := reg_mstatus.mpp
-      reg_mstatus.v := usingHypervisor.B && reg_mstatus.mpv && reg_mstatus.mpp <= PRV.S.U
-      io.evec := readEPC(reg_mepc)
-    }
-
-    new_prv := ret_prv
-    when (usingUser.B && ret_prv <= PRV.S.U) {
-      reg_mstatus.mprv := false.B
-    }
-  }
-
-  io.time := reg_cycle
-  io.csr_stall := reg_wfi || io.status.cease
-  io.status.cease := RegEnable(true.B, false.B, insn_cease)
-  io.status.wfi := reg_wfi
-
-  for ((io, reg) <- io.customCSRs zip reg_custom) {
-    io.wen := false.B
-    io.wdata := wdata
-    io.value := reg
-  }
-
-  io.rw.rdata := Mux1H(for ((k, v) <- read_mapping) yield decoded_addr(k) -> v)
-
-  // cover access to register
-  val coverable_counters = read_mapping.filterNot { case (k, _) =>
-    k >= CSR.firstHPC + nPerfCounters && k < CSR.firstHPC + CSR.nHPM
-  }
-  coverable_counters.foreach( {case (k, v) => {
-    when (!k.U(11,10).andR) {  // Cover points for RW CSR registers
-      property.cover(io.rw.cmd.isOneOf(CSR.W, CSR.S, CSR.C) && io.rw.addr===k.U, "CSR_access_"+k.toString, "Cover Accessing Core CSR field")
-    } .otherwise { // Cover points for RO CSR registers
-      property.cover(io.rw.cmd===CSR.R && io.rw.addr===k.U, "CSR_access_"+k.toString, "Cover Accessing Core CSR field")
-    }
-  }})
-
-  val set_vs_dirty = WireDefault(io.vector.map(_.set_vs_dirty).getOrElse(false.B))
-  io.vector.foreach { vio =>
-    when (set_vs_dirty) {
-      assert(reg_mstatus.vs > 0.U)
-      when (reg_mstatus.v) { reg_vsstatus.vs := 3.U }
-      reg_mstatus.vs := 3.U
-    }
-  }
-
-  val set_fs_dirty = WireDefault(io.set_fs_dirty.getOrElse(false.B))
-  if (coreParams.haveFSDirty) {
-    when (set_fs_dirty) {
-      assert(reg_mstatus.fs > 0.U)
-      when (reg_mstatus.v) { reg_vsstatus.fs := 3.U }
-      reg_mstatus.fs := 3.U
-    }
-  }
-
-  io.fcsr_rm := reg_frm
-  when (io.fcsr_flags.valid) {
-    reg_fflags := reg_fflags | io.fcsr_flags.bits
-    set_fs_dirty := true.B
-  }
-
-  io.vector.foreach { vio =>
-    when (vio.set_vxsat) {
-      reg_vxsat.get := true.B
-      set_vs_dirty := true.B
-    }
-  }
-
-  val csr_wen = io.rw.cmd.isOneOf(CSR.S, CSR.C, CSR.W)
-  io.csrw_counter := Mux(coreParams.haveBasicCounters.B && csr_wen && (io.rw.addr.inRange(CSRs.mcycle.U, (CSRs.mcycle + CSR.nCtr).U) || io.rw.addr.inRange(CSRs.mcycleh.U, (CSRs.mcycleh + CSR.nCtr).U)), UIntToOH(io.rw.addr(log2Ceil(CSR.nCtr+nPerfCounters)-1, 0)), 0.U)
-  when (csr_wen) {
-    val scause_mask = ((BigInt(1) << (xLen-1)) + 31).U /* only implement 5 LSBs and MSB */
-    val satp_valid_modes = 0 +: (minPgLevels to pgLevels).map(new PTBR().pgLevelsToMode(_))
-
-    when (decoded_addr(CSRs.mstatus)) {
-      val new_mstatus = wdata.asTypeOf(new MStatus())
-      reg_mstatus.mie := new_mstatus.mie
-      reg_mstatus.mpie := new_mstatus.mpie
-
-      if (usingUser) {
-        reg_mstatus.mprv := new_mstatus.mprv
-        reg_mstatus.mpp := legalizePrivilege(new_mstatus.mpp)
-        if (usingSupervisor) {
-          reg_mstatus.spp := new_mstatus.spp
-          reg_mstatus.spie := new_mstatus.spie
-          reg_mstatus.sie := new_mstatus.sie
-          reg_mstatus.tw := new_mstatus.tw
-          reg_mstatus.tsr := new_mstatus.tsr
-        }
-        if (usingVM) {
-          reg_mstatus.mxr := new_mstatus.mxr
-          reg_mstatus.sum := new_mstatus.sum
-          reg_mstatus.tvm := new_mstatus.tvm
-        }
-        if (usingHypervisor) {
-          reg_mstatus.mpv := new_mstatus.mpv
-          reg_mstatus.gva := new_mstatus.gva
-        }
-      }
-
-      if (usingSupervisor || usingFPU) reg_mstatus.fs := formFS(new_mstatus.fs)
-      reg_mstatus.vs := formVS(new_mstatus.vs)
-    }
-    when (decoded_addr(CSRs.misa)) {
-      val mask = isaStringToMask(isaMaskString).U(xLen.W)
-      val f = wdata('f' - 'a')
-      // suppress write if it would cause the next fetch to be misaligned
-      when (!usingCompressed.B || !io.pc(1) || wdata('c' - 'a')) {
-        if (coreParams.misaWritable)
-          reg_misa := ~(~wdata | (!f << ('d' - 'a'))) & mask | reg_misa & ~mask
-      }
-    }
-    when (decoded_addr(CSRs.mip)) {
-      // MIP should be modified based on the value in reg_mip, not the value
-      // in read_mip, since read_mip.seip is the OR of reg_mip.seip and
-      // io.interrupts.seip.  We don't want the value on the PLIC line to
-      // inadvertently be OR'd into read_mip.seip.
-      val new_mip = readModifyWriteCSR(io.rw.cmd, reg_mip.asUInt, io.rw.wdata).asTypeOf(new MIP)
-      if (usingSupervisor) {
-        reg_mip.ssip := new_mip.ssip
-        reg_mip.stip := new_mip.stip
-        reg_mip.seip := new_mip.seip
-      }
-      if (usingHypervisor) {
-        reg_mip.vssip := new_mip.vssip
-      }
-    }
-    when (decoded_addr(CSRs.mie))      { reg_mie := wdata & supported_interrupts }
-    when (decoded_addr(CSRs.mepc))     { reg_mepc := formEPC(wdata) }
-    when (decoded_addr(CSRs.mscratch)) { reg_mscratch := wdata }
-    if (mtvecWritable)
-      when (decoded_addr(CSRs.mtvec))  { reg_mtvec := wdata }
-    when (decoded_addr(CSRs.mcause))   { reg_mcause := wdata & ((BigInt(1) << (xLen-1)) + (BigInt(1) << whichInterrupt.getWidth) - 1).U }
-    when (decoded_addr(CSRs.mtval))    { reg_mtval := wdata }
-
-    if (usingNMI) {
-      val new_mnstatus = wdata.asTypeOf(new MNStatus())
-      when (decoded_addr(CustomCSRs.mnscratch)) { reg_mnscratch := wdata }
-      when (decoded_addr(CustomCSRs.mnepc))     { reg_mnepc := formEPC(wdata) }
-      when (decoded_addr(CustomCSRs.mncause))   { reg_mncause := wdata & ((BigInt(1) << (xLen-1)) + BigInt(3)).U }
-      when (decoded_addr(CustomCSRs.mnstatus))  {
-        reg_mnstatus.mpp := legalizePrivilege(new_mnstatus.mpp)
-        reg_mnstatus.mpv := usingHypervisor.B && new_mnstatus.mpv
-        reg_rnmie := reg_rnmie | new_mnstatus.mie  // mnie bit settable but not clearable from software
-      }
-    }
-
-    for (((e, c), i) <- (reg_hpmevent zip reg_hpmcounter).zipWithIndex) {
-      writeCounter(i + CSR.firstMHPC, c, wdata)
-      when (decoded_addr(i + CSR.firstHPE)) { e := perfEventSets.maskEventSelector(wdata) }
-    }
-    if (coreParams.haveBasicCounters) {
-      when (decoded_addr(CSRs.mcountinhibit)) { reg_mcountinhibit := wdata & ~2.U(xLen.W) }  // mcountinhibit bit [1] is tied zero
-      writeCounter(CSRs.mcycle, reg_cycle, wdata)
-      writeCounter(CSRs.minstret, reg_instret, wdata)
-    }
-
-    if (usingFPU) {
-      when (decoded_addr(CSRs.fflags)) { set_fs_dirty := true.B; reg_fflags := wdata }
-      when (decoded_addr(CSRs.frm))    { set_fs_dirty := true.B; reg_frm := wdata }
-      when (decoded_addr(CSRs.fcsr)) {
-        set_fs_dirty := true.B
-        reg_fflags := wdata
-        reg_frm := wdata >> reg_fflags.getWidth
-      }
-    }
-    if (usingDebug) {
-      when (decoded_addr(CSRs.dcsr)) {
-        val new_dcsr = wdata.asTypeOf(new DCSR())
-        reg_dcsr.step := new_dcsr.step
-        reg_dcsr.ebreakm := new_dcsr.ebreakm
-        if (usingSupervisor) reg_dcsr.ebreaks := new_dcsr.ebreaks
-        if (usingUser) reg_dcsr.ebreaku := new_dcsr.ebreaku
-        if (usingUser) reg_dcsr.prv := legalizePrivilege(new_dcsr.prv)
-        if (usingHypervisor) reg_dcsr.v := new_dcsr.v
-      }
-      when (decoded_addr(CSRs.dpc))      { reg_dpc := formEPC(wdata) }
-      when (decoded_addr(CSRs.dscratch0)) { reg_dscratch0 := wdata }
-      reg_dscratch1.foreach { r =>
-        when (decoded_addr(CSRs.dscratch1)) { r := wdata }
-      }
-    }
-    if (usingSupervisor) {
-      when (decoded_addr(CSRs.sstatus)) {
-        val new_sstatus = wdata.asTypeOf(new MStatus())
-        reg_mstatus.sie := new_sstatus.sie
-        reg_mstatus.spie := new_sstatus.spie
-        reg_mstatus.spp := new_sstatus.spp
-        reg_mstatus.fs := formFS(new_sstatus.fs)
-        reg_mstatus.vs := formVS(new_sstatus.vs)
-        if (usingVM) {
-          reg_mstatus.mxr := new_sstatus.mxr
-          reg_mstatus.sum := new_sstatus.sum
-        }
-      }
-      when (decoded_addr(CSRs.sip)) {
-        val new_sip = ((read_mip & ~read_mideleg) | (wdata & read_mideleg)).asTypeOf(new MIP())
-        reg_mip.ssip := new_sip.ssip
-      }
-      when (decoded_addr(CSRs.satp)) {
-        if (usingVM) {
-          val new_satp = wdata.asTypeOf(new PTBR())
-          when (new_satp.mode.isOneOf(satp_valid_modes.map(_.U))) {
-            reg_satp.mode := new_satp.mode & satp_valid_modes.reduce(_|_).U
-            reg_satp.ppn := new_satp.ppn(ppnBits-1,0)
-            if (asIdBits > 0) reg_satp.asid := new_satp.asid(asIdBits-1,0)
-          }
-        }
-      }
-      when (decoded_addr(CSRs.sie))      { reg_mie := (reg_mie & ~sie_mask) | (wdata & sie_mask) }
-      when (decoded_addr(CSRs.sscratch)) { reg_sscratch := wdata }
-      when (decoded_addr(CSRs.sepc))     { reg_sepc := formEPC(wdata) }
-      when (decoded_addr(CSRs.stvec))    { reg_stvec := wdata }
-      when (decoded_addr(CSRs.scause))   { reg_scause := wdata & scause_mask }
-      when (decoded_addr(CSRs.stval))    { reg_stval := wdata }
-      when (decoded_addr(CSRs.mideleg))  { reg_mideleg := wdata }
-      when (decoded_addr(CSRs.medeleg))  { reg_medeleg := wdata }
-      when (decoded_addr(CSRs.scounteren)) { reg_scounteren := wdata }
-    }
-
-    if (usingHypervisor) {
-      when (decoded_addr(CSRs.hstatus)) {
-        val new_hstatus = wdata.asTypeOf(new HStatus())
-        reg_hstatus.gva := new_hstatus.gva
-        reg_hstatus.spv := new_hstatus.spv
-        reg_hstatus.spvp := new_hstatus.spvp
-        reg_hstatus.hu := new_hstatus.hu
-        reg_hstatus.vtvm := new_hstatus.vtvm
-        reg_hstatus.vtw := new_hstatus.vtw
-        reg_hstatus.vtsr := new_hstatus.vtsr
-        reg_hstatus.vsxl := new_hstatus.vsxl
-      }
-      when (decoded_addr(CSRs.hideleg))  { reg_hideleg := wdata }
-      when (decoded_addr(CSRs.hedeleg))  { reg_hedeleg := wdata }
-      when (decoded_addr(CSRs.hgatp)) {
-        val new_hgatp = wdata.asTypeOf(new PTBR())
-        val valid_modes = 0 +: (minPgLevels to pgLevels).map(new_hgatp.pgLevelsToMode(_))
-        when (new_hgatp.mode.isOneOf(valid_modes.map(_.U))) {
-          reg_hgatp.mode := new_hgatp.mode & valid_modes.reduce(_|_).U
-        }
-        reg_hgatp.ppn := Cat(new_hgatp.ppn(ppnBits-1,2), 0.U(2.W))
-        if (vmIdBits > 0) reg_hgatp.asid := new_hgatp.asid(vmIdBits-1,0)
-      }
-      when (decoded_addr(CSRs.hip)) {
-        val new_hip = ((read_mip & ~hs_delegable_interrupts) | (wdata & hs_delegable_interrupts)).asTypeOf(new MIP())
-        reg_mip.vssip := new_hip.vssip
-      }
-      when (decoded_addr(CSRs.hie)) { reg_mie := (reg_mie & ~hs_delegable_interrupts) | (wdata & hs_delegable_interrupts) }
-      when (decoded_addr(CSRs.hvip)) {
-        val new_sip = ((read_mip & ~hs_delegable_interrupts) | (wdata & hs_delegable_interrupts)).asTypeOf(new MIP())
-        reg_mip.vssip := new_sip.vssip
-        reg_mip.vstip := new_sip.vstip
-        reg_mip.vseip := new_sip.vseip
-      }
-      when (decoded_addr(CSRs.hcounteren)) { reg_hcounteren := wdata }
-      when (decoded_addr(CSRs.htval))      { reg_htval := wdata }
-      when (decoded_addr(CSRs.mtval2))     { reg_mtval2 := wdata }
-
-      when (decoded_addr(CSRs.vsstatus)) {
-        val new_vsstatus = wdata.asTypeOf(new MStatus())
-        reg_vsstatus.sie := new_vsstatus.sie
-        reg_vsstatus.spie := new_vsstatus.spie
-        reg_vsstatus.spp := new_vsstatus.spp
-        reg_vsstatus.mxr := new_vsstatus.mxr
-        reg_vsstatus.sum := new_vsstatus.sum
-        reg_vsstatus.fs := formFS(new_vsstatus.fs)
-        reg_vsstatus.vs := formVS(new_vsstatus.vs)
-      }
-      when (decoded_addr(CSRs.vsip)) {
-        val new_vsip = ((read_hip & ~read_hideleg) | ((wdata << 1) & read_hideleg)).asTypeOf(new MIP())
-        reg_mip.vssip := new_vsip.vssip
-      }
-      when (decoded_addr(CSRs.vsatp)) {
-        val new_vsatp = wdata.asTypeOf(new PTBR())
-        val mode_ok = new_vsatp.mode.isOneOf(satp_valid_modes.map(_.U))
-        when (mode_ok) {
-          reg_vsatp.mode := new_vsatp.mode & satp_valid_modes.reduce(_|_).U
-        }
-        when (mode_ok || !reg_mstatus.v) {
-          reg_vsatp.ppn := new_vsatp.ppn(vpnBits.min(new_vsatp.ppn.getWidth)-1,0)
-          if (asIdBits > 0) reg_vsatp.asid := new_vsatp.asid(asIdBits-1,0)
-        }
-      }
-      when (decoded_addr(CSRs.vsie))      { reg_mie := (reg_mie & ~read_hideleg) | ((wdata << 1) & read_hideleg) }
-      when (decoded_addr(CSRs.vsscratch)) { reg_vsscratch := wdata }
-      when (decoded_addr(CSRs.vsepc))     { reg_vsepc := formEPC(wdata) }
-      when (decoded_addr(CSRs.vstvec))    { reg_vstvec := wdata }
-      when (decoded_addr(CSRs.vscause))   { reg_vscause := wdata & scause_mask }
-      when (decoded_addr(CSRs.vstval))    { reg_vstval := wdata }
-    }
-    if (usingUser) {
-      when (decoded_addr(CSRs.mcounteren)) { reg_mcounteren := wdata }
-    }
-    if (nBreakpoints > 0) {
-      when (decoded_addr(CSRs.tselect)) { reg_tselect := wdata }
-
-      for ((bp, i) <- reg_bp.zipWithIndex) {
-        when (i.U === reg_tselect && (!bp.control.dmode || reg_debug)) {
-          when (decoded_addr(CSRs.tdata2)) { bp.address := wdata }
-          when (decoded_addr(CSRs.tdata3)) {
-            if (coreParams.mcontextWidth > 0) {
-              bp.textra.mselect := wdata(bp.textra.mselectPos)
-              bp.textra.mvalue  := wdata >> bp.textra.mvaluePos
-            }
-            if (coreParams.scontextWidth > 0) {
-              bp.textra.sselect := wdata(bp.textra.sselectPos)
-              bp.textra.svalue  := wdata >> bp.textra.svaluePos
-            }
-          }
-          when (decoded_addr(CSRs.tdata1)) {
-            bp.control := wdata.asTypeOf(bp.control)
-
-            val prevChain = if (i == 0) false.B else reg_bp(i-1).control.chain
-            val prevDMode = if (i == 0) false.B else reg_bp(i-1).control.dmode
-            val nextChain = if (i >= nBreakpoints-1) true.B else reg_bp(i+1).control.chain
-            val nextDMode = if (i >= nBreakpoints-1) true.B else reg_bp(i+1).control.dmode
-            val newBPC = readModifyWriteCSR(io.rw.cmd, bp.control.asUInt, io.rw.wdata).asTypeOf(bp.control)
-            val dMode = newBPC.dmode && reg_debug && (prevDMode || !prevChain)
-            bp.control.dmode := dMode
-            when (dMode || (newBPC.action > 1.U)) { bp.control.action := newBPC.action }.otherwise { bp.control.action := 0.U }
-            bp.control.chain := newBPC.chain && !(prevChain || nextChain) && (dMode || !nextDMode)
-          }
-        }
-      }
-    }
-    reg_mcontext.foreach { r => when (decoded_addr(CSRs.mcontext)) { r := wdata }}
-    reg_scontext.foreach { r => when (decoded_addr(CSRs.scontext)) { r := wdata }}
-    if (reg_pmp.nonEmpty) for (((pmp, next), i) <- (reg_pmp zip (reg_pmp.tail :+ reg_pmp.last)).zipWithIndex) {
-      require(xLen % pmp.cfg.getWidth == 0)
-      when (decoded_addr(CSRs.pmpcfg0 + pmpCfgIndex(i)) && !pmp.cfgLocked) {
-        val newCfg = (wdata >> ((i * pmp.cfg.getWidth) % xLen)).asTypeOf(new PMPConfig())
-        pmp.cfg := newCfg
-        // disallow unreadable but writable PMPs
-        pmp.cfg.w := newCfg.w && newCfg.r
-        // can't select a=NA4 with coarse-grained PMPs
-        if (pmpGranularity.log2 > PMP.lgAlign)
-          pmp.cfg.a := Cat(newCfg.a(1), newCfg.a.orR)
-      }
-      when (decoded_addr(CSRs.pmpaddr0 + i) && !pmp.addrLocked(next)) {
-        pmp.addr := wdata
-      }
-    }
-    for ((io, csr, reg) <- (io.customCSRs, customCSRs, reg_custom).zipped) {
-      val mask = csr.mask.U(xLen.W)
-      when (decoded_addr(csr.id)) {
-        reg := (wdata & mask) | (reg & ~mask)
-        io.wen := true.B
-      }
-    }
-    if (usingVector) {
-      when (decoded_addr(CSRs.vstart)) { set_vs_dirty := true.B; reg_vstart.get := wdata }
-      when (decoded_addr(CSRs.vxrm))   { set_vs_dirty := true.B; reg_vxrm.get := wdata }
-      when (decoded_addr(CSRs.vxsat))  { set_vs_dirty := true.B; reg_vxsat.get := wdata }
-      when (decoded_addr(CSRs.vcsr))   {
-        set_vs_dirty := true.B
-        reg_vxsat.get := wdata
-        reg_vxrm.get := wdata >> 1
-      }
-    }
-  }
-
-  io.vector.map { vio =>
-    when (vio.set_vconfig.valid) {
-      // user of CSRFile is responsible for set_vs_dirty in this case
-      assert(vio.set_vconfig.bits.vl <= vio.set_vconfig.bits.vtype.vlMax)
-      reg_vconfig.get := vio.set_vconfig.bits
-    }
-    when (vio.set_vstart.valid) {
-      set_vs_dirty := true.B
-      reg_vstart.get := vio.set_vstart.bits
-    }
-    vio.vstart := reg_vstart.get
-    vio.vconfig := reg_vconfig.get
-    vio.vxrm := reg_vxrm.get
-
-    when (reset.asBool) {
-      reg_vconfig.get.vl := 0.U
-      reg_vconfig.get.vtype := 0.U.asTypeOf(new VType)
-      reg_vconfig.get.vtype.vill := true.B
-    }
-  }
-
-  when(reset.asBool) {
-    reg_satp.mode  := 0.U
-    reg_vsatp.mode := 0.U
-    reg_hgatp.mode := 0.U
-  }
-  if (!usingVM) {
-    reg_satp.mode := 0.U
-    reg_satp.ppn  := 0.U
-    reg_satp.asid := 0.U
-  }
-  if (!usingHypervisor) {
-    reg_vsatp.mode := 0.U
-    reg_vsatp.ppn  := 0.U
-    reg_vsatp.asid := 0.U
-    reg_hgatp.mode := 0.U
-    reg_hgatp.ppn  := 0.U
-    reg_hgatp.asid := 0.U
-  }
-  if (!(asIdBits > 0)) {
-    reg_satp.asid  := 0.U
-    reg_vsatp.asid := 0.U
-  }
-  if (!(vmIdBits > 0)) {
-    reg_hgatp.asid := 0.U
-  }
-  reg_vsstatus.xs := (if (usingRoCC) 3.U else 0.U)
-
-  if (nBreakpoints <= 1) reg_tselect := 0.U
-  for (bpc <- reg_bp map {_.control}) {
-    bpc.ttype := bpc.tType.U
-    bpc.maskmax := bpc.maskMax.U
-    bpc.reserved := 0.U
-    bpc.zero := 0.U
-    bpc.h := false.B
-    if (!usingSupervisor) bpc.s := false.B
-    if (!usingUser) bpc.u := false.B
-    if (!usingSupervisor && !usingUser) bpc.m := true.B
-    when (reset.asBool) {
-      bpc.action := 0.U
-      bpc.dmode := false.B
-      bpc.chain := false.B
-      bpc.r := false.B
-      bpc.w := false.B
-      bpc.x := false.B
-    }
-  }
-  for (bpx <- reg_bp map {_.textra}) {
-    if (coreParams.mcontextWidth == 0) bpx.mselect := false.B
-    if (coreParams.scontextWidth == 0) bpx.sselect := false.B
-  }
-  for (bp <- reg_bp drop nBreakpoints)
-    bp := 0.U.asTypeOf(new BP())
-  for (pmp <- reg_pmp) {
-    pmp.cfg.res := 0.U
-    when (reset.asBool) { pmp.reset() }
-  }
-
-  for (((t, insn), i) <- (io.trace zip io.inst).zipWithIndex) {
-    t.exception := io.retire >= i.U && exception
-    t.valid := io.retire > i.U || t.exception
-    t.insn := insn
-    t.iaddr := io.pc
-    t.priv := Cat(reg_debug, reg_mstatus.prv)
-    t.cause := cause
-    t.interrupt := cause(xLen-1)
-    t.tval := io.tval
-  }
-
-  def chooseInterrupt(masksIn: Seq[UInt]): (Bool, UInt) = {
-    val nonstandard = supported_interrupts.getWidth-1 to 12 by -1
-    // MEI, MSI, MTI,  SEI, SSI, STI, VSEI, VSSI, VSTI, UEI, USI, UTI
-    val standard = Seq(11, 3, 7, 9, 1, 5, 10, 2, 6, 8, 0, 4)
-    val priority = nonstandard ++ standard
-    val masks = masksIn.reverse
-    val any = masks.flatMap(m => priority.filter(_ < m.getWidth).map(i => m(i))).reduce(_||_)
-    val which = PriorityMux(masks.flatMap(m => priority.filter(_ < m.getWidth).map(i => (m(i), i.U))))
-    (any, which)
-  }
-
-  def readModifyWriteCSR(cmd: UInt, rdata: UInt, wdata: UInt) = {
-    (Mux(cmd(1), rdata, 0.U) | wdata) & ~Mux(cmd(1,0).andR, wdata, 0.U)
-  }
-
-  def legalizePrivilege(priv: UInt): UInt =
-    if (usingSupervisor) Mux(priv === PRV.H.U, PRV.U.U, priv)
-    else if (usingUser) Fill(2, priv(0))
-    else PRV.M.U
-
-  def trimPrivilege(priv: UInt): UInt =
-    if (usingSupervisor) priv
-    else legalizePrivilege(priv)
-
-  def writeCounter(lo: Int, ctr: WideCounter, wdata: UInt) = {
-    if (xLen == 32) {
-      val hi = lo + CSRs.mcycleh - CSRs.mcycle
-      when (decoded_addr(lo)) { ctr := Cat(ctr(ctr.getWidth-1, 32), wdata) }
-      when (decoded_addr(hi)) { ctr := Cat(wdata(ctr.getWidth-33, 0), ctr(31, 0)) }
-    } else {
-      when (decoded_addr(lo)) { ctr := wdata(ctr.getWidth-1, 0) }
-    }
-  }
-  def formEPC(x: UInt) = ~(~x | (if (usingCompressed) 1.U else 3.U))
-  def readEPC(x: UInt) = ~(~x | Mux(reg_misa('c' - 'a'), 1.U, 3.U))
-  def formTVec(x: UInt) = x andNot Mux(x(0), ((((BigInt(1) << mtvecInterruptAlign) - 1) << mtvecBaseAlign) | 2).U, 2.U)
-  def isaStringToMask(s: String) = s.map(x => 1 << (x - 'A')).foldLeft(0)(_|_)
-  def formFS(fs: UInt) = if (coreParams.haveFSDirty) fs else Fill(2, fs.orR)
-  def formVS(vs: UInt) = if (usingVector) vs else 0.U
-}
diff --git a/diplomatic/src/rocket/CustomCSRs.scala b/diplomatic/src/rocket/CustomCSRs.scala
deleted file mode 100644
index f2424da45..000000000
--- a/diplomatic/src/rocket/CustomCSRs.scala
+++ /dev/null
@@ -1,50 +0,0 @@
-// See LICENSE.SiFive for license details.
-
-package org.chipsalliance.rockettile
-
-import chisel3._
-
-import org.chipsalliance.cde.config.Parameters
-
-case class CustomCSR(id: Int, mask: BigInt, init: Option[BigInt])
-
-object CustomCSR {
-  def constant(id: Int, value: BigInt): CustomCSR = CustomCSR(id, BigInt(0), Some(value))
-}
-
-class CustomCSRIO(implicit p: Parameters) extends CoreBundle {
-  val wen = Bool()
-  val wdata = UInt(xLen.W)
-  val value = UInt(xLen.W)
-}
-
-class CustomCSRs(implicit p: Parameters) extends CoreBundle {
-  // Not all cores have these CSRs, but those that do should follow the same
-  // numbering conventions.  So we list them here but default them to None.
-  protected def bpmCSRId = 0x7c0
-  protected def bpmCSR: Option[CustomCSR] = None
-
-  protected def chickenCSRId = 0x7c1
-  protected def chickenCSR: Option[CustomCSR] = None
-
-  // If you override this, you'll want to concatenate super.decls
-  def decls: Seq[CustomCSR] = bpmCSR.toSeq ++ chickenCSR
-
-  val csrs = Vec(decls.size, new CustomCSRIO)
-
-  def flushBTB = getOrElse(bpmCSR, _.wen, false.B)
-  def bpmStatic = getOrElse(bpmCSR, _.value(0), false.B)
-  def disableDCacheClockGate = getOrElse(chickenCSR, _.value(0), false.B)
-  def disableICacheClockGate = getOrElse(chickenCSR, _.value(1), false.B)
-  def disableCoreClockGate = getOrElse(chickenCSR, _.value(2), false.B)
-  def disableSpeculativeICacheRefill = getOrElse(chickenCSR, _.value(3), false.B)
-  def suppressCorruptOnGrantData = getOrElse(chickenCSR, _.value(9), false.B)
-
-  protected def getByIdOrElse[T](id: Int, f: CustomCSRIO => T, alt: T): T = {
-    val idx = decls.indexWhere(_.id == id)
-    if (idx < 0) alt else f(csrs(idx))
-  }
-
-  protected def getOrElse[T](csr: Option[CustomCSR], f: CustomCSRIO => T, alt: T): T =
-    csr.map(c => getByIdOrElse(c.id, f, alt)).getOrElse(alt)
-}
diff --git a/diplomatic/src/rocket/HellaCache.scala b/diplomatic/src/rocket/HellaCache.scala
deleted file mode 100644
index 5e634b29e..000000000
--- a/diplomatic/src/rocket/HellaCache.scala
+++ /dev/null
@@ -1,334 +0,0 @@
-// See LICENSE.SiFive for license details.
-// See LICENSE.Berkeley for license details.
-
-package org.chipsalliance.rocket
-
-import chisel3._
-import chisel3.util.{isPow2,log2Ceil,log2Up,Decoupled,Valid}
-import chisel3.dontTouch
-import freechips.rocketchip.amba._
-import org.chipsalliance.cde.config.{Parameters, Field}
-import freechips.rocketchip.diplomacy._
-import org.chipsalliance.rockettile._
-import freechips.rocketchip.tilelink._
-import freechips.rocketchip.util._
-import scala.collection.mutable.ListBuffer
-
-case class DCacheParams(
-    nSets: Int = 64,
-    nWays: Int = 4,
-    rowBits: Int = 64,
-    subWordBits: Option[Int] = None,
-    replacementPolicy: String = "random",
-    nTLBSets: Int = 1,
-    nTLBWays: Int = 32,
-    nTLBBasePageSectors: Int = 4,
-    nTLBSuperpages: Int = 4,
-    tagECC: Option[String] = None,
-    dataECC: Option[String] = None,
-    dataECCBytes: Int = 1,
-    nMSHRs: Int = 1,
-    nSDQ: Int = 17,
-    nRPQ: Int = 16,
-    nMMIOs: Int = 1,
-    blockBytes: Int = 64,
-    separateUncachedResp: Boolean = false,
-    acquireBeforeRelease: Boolean = false,
-    pipelineWayMux: Boolean = false,
-    clockGate: Boolean = false,
-    scratch: Option[BigInt] = None) extends L1CacheParams {
-
-  def tagCode: Code = Code.fromString(tagECC)
-  def dataCode: Code = Code.fromString(dataECC)
-
-  def dataScratchpadBytes: Int = scratch.map(_ => nSets*blockBytes).getOrElse(0)
-
-  def replacement = new RandomReplacement(nWays)
-
-  def silentDrop: Boolean = !acquireBeforeRelease
-
-  require((!scratch.isDefined || nWays == 1),
-    "Scratchpad only allowed in direct-mapped cache.")
-  require((!scratch.isDefined || nMSHRs == 0),
-    "Scratchpad only allowed in blocking cache.")
-  if (scratch.isEmpty)
-    require(isPow2(nSets), s"nSets($nSets) must be pow2")
-}
-
-trait HasL1HellaCacheParameters extends HasL1CacheParameters with HasCoreParameters {
-  val cacheParams = tileParams.dcache.get
-  val cfg = cacheParams
-
-  def wordBits = coreDataBits
-  def wordBytes = coreDataBytes
-  def subWordBits = cacheParams.subWordBits.getOrElse(wordBits)
-  def subWordBytes = subWordBits / 8
-  def wordOffBits = log2Up(wordBytes)
-  def beatBytes = cacheBlockBytes / cacheDataBeats
-  def beatWords = beatBytes / wordBytes
-  def beatOffBits = log2Up(beatBytes)
-  def idxMSB = untagBits-1
-  def idxLSB = blockOffBits
-  def offsetmsb = idxLSB-1
-  def offsetlsb = wordOffBits
-  def rowWords = rowBits/wordBits
-  def doNarrowRead = coreDataBits * nWays % rowBits == 0
-  def eccBytes = cacheParams.dataECCBytes
-  val eccBits = cacheParams.dataECCBytes * 8
-  val encBits = cacheParams.dataCode.width(eccBits)
-  val encWordBits = encBits * (wordBits / eccBits)
-  def encDataBits = cacheParams.dataCode.width(coreDataBits) // NBDCache only
-  def encRowBits = encDataBits*rowWords
-  def lrscCycles = coreParams.lrscCycles // ISA requires 16-insn LRSC sequences to succeed
-  def lrscBackoff = 3 // disallow LRSC reacquisition briefly
-  def blockProbeAfterGrantCycles = 8 // give the processor some time to issue a request after a grant
-  def nIOMSHRs = cacheParams.nMMIOs
-  def maxUncachedInFlight = cacheParams.nMMIOs
-  def dataScratchpadSize = cacheParams.dataScratchpadBytes
-
-  require(rowBits >= coreDataBits, s"rowBits($rowBits) < coreDataBits($coreDataBits)")
-  if (!usingDataScratchpad)
-    require(rowBits == cacheDataBits, s"rowBits($rowBits) != cacheDataBits($cacheDataBits)")
-  // would need offset addr for puts if data width < xlen
-  require(xLen <= cacheDataBits, s"xLen($xLen) > cacheDataBits($cacheDataBits)")
-}
-
-abstract class L1HellaCacheModule(implicit val p: Parameters) extends Module
-  with HasL1HellaCacheParameters
-
-abstract class L1HellaCacheBundle(implicit val p: Parameters) extends ParameterizedBundle()(p)
-  with HasL1HellaCacheParameters
-
-/** Bundle definitions for HellaCache interfaces */
-
-trait HasCoreMemOp extends HasL1HellaCacheParameters {
-  val addr = UInt(coreMaxAddrBits.W)
-  val idx  = (usingVM && untagBits > pgIdxBits).option(UInt(coreMaxAddrBits.W))
-  val tag  = UInt((coreParams.dcacheReqTagBits + log2Ceil(dcacheArbPorts)).W)
-  val cmd  = UInt(M_SZ.W)
-  val size = UInt(log2Ceil(coreDataBytes.log2 + 1).W)
-  val signed = Bool()
-  val dprv = UInt(PRV.SZ.W)
-  val dv = Bool()
-}
-
-trait HasCoreData extends HasCoreParameters {
-  val data = UInt(coreDataBits.W)
-  val mask = UInt(coreDataBytes.W)
-}
-
-class HellaCacheReqInternal(implicit p: Parameters) extends CoreBundle()(p) with HasCoreMemOp {
-  val phys = Bool()
-  val no_alloc = Bool()
-  val no_xcpt = Bool()
-}
-
-class HellaCacheReq(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasCoreData
-
-class HellaCacheResp(implicit p: Parameters) extends CoreBundle()(p)
-    with HasCoreMemOp
-    with HasCoreData {
-  val replay = Bool()
-  val has_data = Bool()
-  val data_word_bypass = UInt(coreDataBits.W)
-  val data_raw = UInt(coreDataBits.W)
-  val store_data = UInt(coreDataBits.W)
-}
-
-class AlignmentExceptions extends Bundle {
-  val ld = Bool()
-  val st = Bool()
-}
-
-class HellaCacheExceptions extends Bundle {
-  val ma = new AlignmentExceptions
-  val pf = new AlignmentExceptions
-  val gf = new AlignmentExceptions
-  val ae = new AlignmentExceptions
-}
-
-class HellaCacheWriteData(implicit p: Parameters) extends CoreBundle()(p) with HasCoreData
-
-class HellaCachePerfEvents extends Bundle {
-  val acquire = Bool()
-  val release = Bool()
-  val grant = Bool()
-  val tlbMiss = Bool()
-  val blocked = Bool()
-  val canAcceptStoreThenLoad = Bool()
-  val canAcceptStoreThenRMW = Bool()
-  val canAcceptLoadThenLoad = Bool()
-  val storeBufferEmptyAfterLoad = Bool()
-  val storeBufferEmptyAfterStore = Bool()
-}
-
-// interface between D$ and processor/DTLB
-class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) {
-  val req = Decoupled(new HellaCacheReq)
-  val s1_kill = Output(Bool()) // kill previous cycle's req
-  val s1_data = Output(new HellaCacheWriteData()) // data for previous cycle's req
-  val s2_nack = Input(Bool()) // req from two cycles ago is rejected
-  val s2_nack_cause_raw = Input(Bool()) // reason for nack is store-load RAW hazard (performance hint)
-  val s2_kill = Output(Bool()) // kill req from two cycles ago
-  val s2_uncached = Input(Bool()) // advisory signal that the access is MMIO
-  val s2_paddr = Input(UInt(paddrBits.W)) // translated address
-
-  val resp = Flipped(Valid(new HellaCacheResp))
-  val replay_next = Input(Bool())
-  val s2_xcpt = Input(new HellaCacheExceptions)
-  val s2_gpa = Input(UInt(vaddrBitsExtended.W))
-  val s2_gpa_is_pte = Input(Bool())
-  val uncached_resp = tileParams.dcache.get.separateUncachedResp.option(Flipped(Decoupled(new HellaCacheResp)))
-  val ordered = Input(Bool())
-  val perf = Input(new HellaCachePerfEvents())
-
-  val keep_clock_enabled = Output(Bool()) // should D$ avoid clock-gating itself?
-  val clock_enabled = Input(Bool()) // is D$ currently being clocked?
-}
-
-/** Base classes for Diplomatic TL2 HellaCaches */
-
-abstract class HellaCache(staticIdForMetadataUseOnly: Int)(implicit p: Parameters) extends LazyModule
-    with HasNonDiplomaticTileParameters {
-  protected val cfg = tileParams.dcache.get
-
-  protected def cacheClientParameters = cfg.scratch.map(x => Seq()).getOrElse(Seq(TLMasterParameters.v1(
-    name          = s"Core ${staticIdForMetadataUseOnly} DCache",
-    sourceId      = IdRange(0, 1 max cfg.nMSHRs),
-    supportsProbe = TransferSizes(cfg.blockBytes, cfg.blockBytes))))
-
-  protected def mmioClientParameters = Seq(TLMasterParameters.v1(
-    name          = s"Core ${staticIdForMetadataUseOnly} DCache MMIO",
-    sourceId      = IdRange(firstMMIO, firstMMIO + cfg.nMMIOs),
-    requestFifo   = true))
-
-  def firstMMIO = (cacheClientParameters.map(_.sourceId.end) :+ 0).max
-
-  val node = TLClientNode(Seq(TLMasterPortParameters.v1(
-    clients = cacheClientParameters ++ mmioClientParameters,
-    minLatency = 1,
-    requestFields = tileParams.core.useVM.option(Seq()).getOrElse(Seq(AMBAProtField())))))
-
-  val hartIdSinkNodeOpt = cfg.scratch.map(_ => BundleBridgeSink[UInt]())
-  val mmioAddressPrefixSinkNodeOpt = cfg.scratch.map(_ => BundleBridgeSink[UInt]())
-
-  val module: HellaCacheModule
-
-  def flushOnFenceI = cfg.scratch.isEmpty && !node.edges.out(0).manager.managers.forall(m => !m.supportsAcquireB || !m.executable || m.regionType >= RegionType.TRACKED || m.regionType <= RegionType.IDEMPOTENT)
-
-  def canSupportCFlushLine = !usingVM || cfg.blockBytes * cfg.nSets <= (1 << pgIdxBits)
-
-  require(!tileParams.core.haveCFlush || cfg.scratch.isEmpty, "CFLUSH_D_L1 instruction requires a D$")
-}
-
-class HellaCacheBundle(val outer: HellaCache)(implicit p: Parameters) extends CoreBundle()(p) {
-  val cpu = Flipped((new HellaCacheIO))
-  val ptw = new TLBPTWIO()
-  val errors = new DCacheErrors
-}
-
-class HellaCacheModule(outer: HellaCache) extends LazyModuleImp(outer)
-    with HasL1HellaCacheParameters {
-  implicit val edge = outer.node.edges.out(0)
-  val (tl_out, _) = outer.node.out(0)
-  val io = IO(new HellaCacheBundle(outer))
-  val io_hartid = outer.hartIdSinkNodeOpt.map(_.bundle)
-  val io_mmio_address_prefix = outer.mmioAddressPrefixSinkNodeOpt.map(_.bundle)
-  dontTouch(io.cpu.resp) // Users like to monitor these fields even if the core ignores some signals
-  dontTouch(io.cpu.s1_data)
-
-  private val fifoManagers = edge.manager.managers.filter(TLFIFOFixer.allVolatile)
-  fifoManagers.foreach { m =>
-    require (m.fifoId == fifoManagers.head.fifoId,
-      s"IOMSHRs must be FIFO for all regions with effects, but HellaCache sees\n"+
-      s"${m.nodePath.map(_.name)}\nversus\n${fifoManagers.head.nodePath.map(_.name)}")
-  }
-}
-
-/** Support overriding which HellaCache is instantiated */
-
-case object BuildHellaCache extends Field[BaseTile => Parameters => HellaCache](HellaCacheFactory.apply)
-
-object HellaCacheFactory {
-  def apply(tile: BaseTile)(p: Parameters): HellaCache = {
-    if (tile.tileParams.dcache.get.nMSHRs == 0)
-      new DCache(tile.staticIdForMetadataUseOnly, tile.crossing)(p)
-    else
-      new NonBlockingDCache(tile.staticIdForMetadataUseOnly)(p)
-  }
-}
-
-/** Mix-ins for constructing tiles that have a HellaCache */
-
-trait HasHellaCache { this: BaseTile =>
-  val module: HasHellaCacheModule
-  implicit val p: Parameters
-  var nDCachePorts = 0
-  lazy val dcache: HellaCache = LazyModule(p(BuildHellaCache)(this)(p))
-
-  tlMasterXbar.node := dcache.node
-  dcache.hartIdSinkNodeOpt.map { _ := hartIdNexusNode }
-  dcache.mmioAddressPrefixSinkNodeOpt.map { _ := mmioAddressPrefixNexusNode }
-}
-
-trait HasHellaCacheModule {
-  val outer: HasHellaCache with HasTileParameters
-  implicit val p: Parameters
-  val dcachePorts = ListBuffer[HellaCacheIO]()
-  val dcacheArb = Module(new HellaCacheArbiter(outer.nDCachePorts)(outer.p))
-  outer.dcache.module.io.cpu <> dcacheArb.io.mem
-}
-
-/** Metadata array used for all HellaCaches */
-
-class L1Metadata(implicit p: Parameters) extends L1HellaCacheBundle()(p) {
-  val coh = new ClientMetadata
-  val tag = UInt(tagBits.W)
-}
-
-object L1Metadata {
-  def apply(tag: Bits, coh: ClientMetadata)(implicit p: Parameters) = {
-    val meta = Wire(new L1Metadata)
-    meta.tag := tag
-    meta.coh := coh
-    meta
-  }
-}
-
-class L1MetaReadReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) {
-  val idx    = UInt(idxBits.W)
-  val way_en = UInt(nWays.W)
-  val tag    = UInt(tagBits.W)
-}
-
-class L1MetaWriteReq(implicit p: Parameters) extends L1MetaReadReq()(p) {
-  val data = new L1Metadata
-}
-
-class L1MetadataArray[T <: L1Metadata](onReset: () => T)(implicit p: Parameters) extends L1HellaCacheModule()(p) {
-  val rstVal = onReset()
-  val io = IO(new Bundle {
-    val read = Flipped(Decoupled(new L1MetaReadReq))
-    val write = Flipped(Decoupled(new L1MetaWriteReq))
-    val resp = Output(Vec(nWays, rstVal.cloneType))
-  })
-
-  val rst_cnt = RegInit(0.U(log2Up(nSets+1).W))
-  val rst = rst_cnt < nSets.U
-  val waddr = Mux(rst, rst_cnt, io.write.bits.idx)
-  val wdata = Mux(rst, rstVal, io.write.bits.data).asUInt
-  val wmask = Mux(rst || (nWays == 1).B, (-1).S, io.write.bits.way_en.asSInt).asBools
-  val rmask = Mux(rst || (nWays == 1).B, (-1).S, io.read.bits.way_en.asSInt).asBools
-  when (rst) { rst_cnt := rst_cnt+1.U }
-
-  val metabits = rstVal.getWidth
-  val tag_array = SyncReadMem(nSets, Vec(nWays, UInt(metabits.W)))
-  val wen = rst || io.write.valid
-  when (wen) {
-    tag_array.write(waddr, VecInit.fill(nWays)(wdata), wmask)
-  }
-  io.resp := tag_array.read(io.read.bits.idx, io.read.fire()).map(_.asTypeOf(chiselTypeOf(rstVal)))
-
-  io.read.ready := !wen // so really this could be a 6T RAM
-  io.write.ready := !rst
-}
diff --git a/diplomatic/src/rocket/PTW.scala b/diplomatic/src/rocket/PTW.scala
deleted file mode 100644
index 4100ec858..000000000
--- a/diplomatic/src/rocket/PTW.scala
+++ /dev/null
@@ -1,798 +0,0 @@
-// See LICENSE.Berkeley for license details.
-// See LICENSE.SiFive for license details.
-
-package org.chipsalliance.rocket
-
-import chisel3._
-import chisel3.util.{Arbiter, Cat, Decoupled, Enum, Mux1H, OHToUInt, PopCount, PriorityEncoder, PriorityEncoderOH, RegEnable, UIntToOH, Valid, is, isPow2, log2Ceil, switch}
-import chisel3.withClock
-import chisel3.internal.sourceinfo.SourceInfo
-import org.chipsalliance.cde.config.Parameters
-import freechips.rocketchip.subsystem.CacheBlockBytes
-import org.chipsalliance.rockettile._
-import freechips.rocketchip.tilelink._
-import freechips.rocketchip.util._
-import freechips.rocketchip.util.property
-
-import scala.collection.mutable.ListBuffer
-
-/** PTE request from TLB to PTW
-  *
-  * TLB send a PTE request to PTW when L1TLB miss
-  */
-class PTWReq(implicit p: Parameters) extends CoreBundle()(p) {
-  val addr = UInt(vpnBits.W)
-  val need_gpa = Bool()
-  val vstage1 = Bool()
-  val stage2 = Bool()
-}
-
-/** PTE info from L2TLB to TLB
-  *
-  * containing: target PTE, exceptions, two-satge tanslation info
-  */
-class PTWResp(implicit p: Parameters) extends CoreBundle()(p) {
-  /** ptw access exception */
-  val ae_ptw = Bool()
-  /** final access exception */
-  val ae_final = Bool()
-  /** page fault */
-  val pf = Bool()
-  /** guest page fault */
-  val gf = Bool()
-  /** hypervisor read */
-  val hr = Bool()
-  /** hypervisor write */
-  val hw = Bool()
-  /** hypervisor execute */
-  val hx = Bool()
-  /** PTE to refill L1TLB
-    *
-    * source: L2TLB
-    */
-  val pte = new PTE
-  /** pte pglevel */
-  val level = UInt(log2Ceil(pgLevels).W)
-  /** fragmented_superpage support */
-  val fragmented_superpage = Bool()
-  /** homogeneous for both pma and pmp  */
-  val homogeneous = Bool()
-  val gpa = Valid(UInt(vaddrBits.W))
-  val gpa_is_pte = Bool()
-}
-
-/** IO between TLB and PTW
-  *
-  * PTW receives :
-  *   - PTE request
-  *   - CSRs info
-  *   - pmp results from PMP(in TLB)
-  */
-class TLBPTWIO(implicit p: Parameters) extends CoreBundle()(p)
-    with HasCoreParameters {
-  val req = Decoupled(Valid(new PTWReq))
-  val resp = Flipped(Valid(new PTWResp))
-  val ptbr = Input(new PTBR())
-  val hgatp = Input(new PTBR())
-  val vsatp = Input(new PTBR())
-  val status = Input(new MStatus())
-  val hstatus = Input(new HStatus())
-  val gstatus = Input(new MStatus())
-  val pmp = Input(Vec(nPMPs, new PMP(paddrBits, pmpGranularity, pgIdxBits, pgLevels, pgLevelBits)))
-  val customCSRs = Input(coreParams.customCSRs)
-}
-/** PTW performance statistics */
-class PTWPerfEvents extends Bundle {
-  val l2miss = Bool()
-  val l2hit = Bool()
-  val pte_miss = Bool()
-  val pte_hit = Bool()
-}
-
-/** Datapath IO between PTW and Core
-  *
-  * PTW receives CSRs info, pmp checks, sfence instruction info
-  *
-  * PTW sends its performance statistics to core
-  */
-class DatapathPTWIO(implicit p: Parameters) extends CoreBundle()(p)
-    with HasCoreParameters {
-  val ptbr = Input(new PTBR())
-  val hgatp = Input(new PTBR())
-  val vsatp = Input(new PTBR())
-  val sfence = Flipped(Valid(new SFenceReq))
-  val status = Input(new MStatus())
-  val hstatus = Input(new HStatus())
-  val gstatus = Input(new MStatus())
-  val pmp = Input(Vec(nPMPs, new PMP(paddrBits, pmpGranularity, pgIdxBits, pgLevels, pgLevelBits)))
-  val perf = Output(new PTWPerfEvents())
-  val customCSRs = Input(coreParams.customCSRs)
-  /** enable clock generated by ptw */
-  val clock_enabled = Output(Bool())
-}
-/** PTE template for transmission
-  *
-  * contains useful methods to check PTE attributes
-  * @see RV-priv spec 4.3.1 for pgae table entry format
-  */
-class PTE(implicit p: Parameters) extends CoreBundle()(p) {
-  val reserved_for_future = UInt(10.W)
-  val ppn = UInt(44.W)
-  val reserved_for_software = Bits(2.W)
-  /** dirty bit */
-  val d = Bool()
-  /** access bit */
-  val a = Bool()
-  /** global mapping */
-  val g = Bool()
-  /** user mode accessible */
-  val u = Bool()
-  /** whether the page is executable */
-  val x = Bool()
-  /** whether the page is writable */
-  val w = Bool()
-  /** whether the page is readable */
-  val r = Bool()
-  /** valid bit */
-  val v = Bool()
-  /** return true if find a pointer to next level page table */
-  def table(dummy: Int = 0) = v && !r && !w && !x && !d && !a && !u && reserved_for_future === 0.U
-  /** return true if find a leaf PTE */
-  def leaf(dummy: Int = 0) = v && (r || (x && !w)) && a
-  /** user read */
-  def ur(dummy: Int = 0) = sr() && u
-  /** user write*/
-  def uw(dummy: Int = 0) = sw() && u
-  /** user execute */
-  def ux(dummy: Int = 0) = sx() && u
-  /** supervisor read */
-  def sr(dummy: Int = 0) = leaf() && r
-  /** supervisor write */
-  def sw(dummy: Int = 0) = leaf() && w && d
-  /** supervisor execute */
-  def sx(dummy: Int = 0) = leaf() && x
-  /** full permission: writable and executable in user mode */
-  def isFullPerm(dummy: Int = 0) = uw() && ux()
-}
-
-/** L2TLB PTE template
-  *
-  * contains tag bits
-  * @param nSets number of sets in L2TLB
-  * @see RV-priv spec 4.3.1 for page table entry format
-  */
-class L2TLBEntry(nSets: Int)(implicit p: Parameters) extends CoreBundle()(p)
-    with HasCoreParameters {
-  val idxBits = log2Ceil(nSets)
-  val tagBits = maxSVAddrBits - pgIdxBits - idxBits + (if (usingHypervisor) 1 else 0)
-  val tag = UInt(tagBits.W)
-  val ppn = UInt(ppnBits.W)
-  /** dirty bit */
-  val d = Bool()
-  /** access bit */
-  val a = Bool()
-  /** user mode accessible */
-  val u = Bool()
-  /** whether the page is executable */
-  val x = Bool()
-  /** whether the page is writable */
-  val w = Bool()
-  /** whether the page is readable */
-  val r = Bool()
-
-}
-/** PTW contains L2TLB, and performs page table walk for high level TLB, and cache queries from L1 TLBs(I$, D$, RoCC)
-  *
-  * It performs hierarchy page table query to mem for the desired leaf PTE and cache them in l2tlb.
-  * Besides leaf PTEs, it also caches non-leaf PTEs in pte_cache to accerlerate the process.
-  *
-  * ==Structure==
-  *  - l2tlb : for leaf PTEs
-  *   - set-associative (configurable with [[CoreParams.nL2TLBEntries]]and [[CoreParams.nL2TLBWays]]))
-  *   - PLRU
-  *  - pte_cache: for non-leaf PTEs
-  *   - set-associative
-  *   - LRU
-  *  - s2_pte_cache: for non-leaf PTEs in 2-stage translation
-  *   - set-associative
-  *   - PLRU
-  *
-  * l2tlb Pipeline: 3 stage
-  * {{{
-  * stage 0 : read
-  * stage 1 : decode
-  * stage 2 : hit check
-  * }}}
-  * ==State Machine==
-  * s_ready: ready to reveive request from TLB
-  * s_req: request mem; pte_cache hit judge
-  * s_wait1: deal with l2tlb error
-  * s_wait2: final hit judge
-  * s_wait3: receive mem response
-  * s_fragment_superpage: for superpage PTE
-  *
-  * @note l2tlb hit happens in s_req or s_wait1
-  * @see RV-priv spec 4.3-4.6 for Virtual-Memory System
-  * @see RV-priv spec 8.5 for Two-Stage Address Translation
-  * @todo details in two-stage translation
-  */
-class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(p) {
-  val io = IO(new Bundle {
-    /** to n TLB */
-    val requestor = Flipped(Vec(n, new TLBPTWIO))
-    /** to HellaCache */
-    val mem = new HellaCacheIO
-    /** to Core
-      *
-      * contains CSRs info and performance statistics
-      */
-    val dpath = new DatapathPTWIO
-  })
-
-  val s_ready :: s_req :: s_wait1 :: s_dummy1 :: s_wait2 :: s_wait3 :: s_dummy2 :: s_fragment_superpage :: Nil = Enum(8)
-  val state = RegInit(s_ready)
-  val l2_refill_wire = Wire(Bool())
-  /** Arbiter to arbite request from n TLB */
-  val arb = Module(new Arbiter(Valid(new PTWReq), n))
-  // use TLB req as arbitor's input
-  arb.io.in <> io.requestor.map(_.req)
-  // receive req only when s_ready and not in refill
-  arb.io.out.ready := (state === s_ready) && !l2_refill_wire
-
-  val resp_valid = RegNext(VecInit(Seq.fill(io.requestor.size)(false.B)))
-
-  val clock_en = state =/= s_ready || l2_refill_wire || arb.io.out.valid || io.dpath.sfence.valid || io.dpath.customCSRs.disableDCacheClockGate
-  io.dpath.clock_enabled := usingVM.B && clock_en
-  val gated_clock =
-    if (!usingVM || !tileParams.dcache.get.clockGate) clock
-    else ClockGate(clock, clock_en, "ptw_clock_gate")
-  withClock (gated_clock) { // entering gated-clock domain
-
-  val invalidated = Reg(Bool())
-  /** current PTE level
-    * {{{
-    * 0 <= count <= pgLevel-1
-    * count = pgLevel - 1 : leaf PTE
-    * count < pgLevel - 1 : non-leaf PTE
-    * }}}
-    */
-  val count = Reg(UInt(log2Ceil(pgLevels).W))
-  val resp_ae_ptw = Reg(Bool())
-  val resp_ae_final = Reg(Bool())
-  val resp_pf = Reg(Bool())
-  val resp_gf = Reg(Bool())
-  val resp_hr = Reg(Bool())
-  val resp_hw = Reg(Bool())
-  val resp_hx = Reg(Bool())
-  val resp_fragmented_superpage = Reg(Bool())
-
-  /** tlb request */
-  val r_req = Reg(new PTWReq)
-  /** current selected way in arbitor */
-  val r_req_dest = Reg(Bits())
-  // to respond to L1TLB : l2_hit
-  // to construct mem.req.addr
-  val r_pte = Reg(new PTE)
-  val r_hgatp = Reg(new PTBR)
-  // 2-stage pageLevel
-  val aux_count = Reg(UInt(log2Ceil(pgLevels).W))
-  /** pte for 2-stage translation */
-  val aux_pte = Reg(new PTE)
-  val aux_ppn_hi = (pgLevels > 4 && r_req.addr.getWidth > aux_pte.ppn.getWidth).option(Reg(UInt((r_req.addr.getWidth - aux_pte.ppn.getWidth).W)))
-  val gpa_pgoff = Reg(UInt(pgIdxBits.W)) // only valid in resp_gf case
-  val stage2 = Reg(Bool())
-  val stage2_final = Reg(Bool())
-
-  val satp = Mux(arb.io.out.bits.bits.vstage1, io.dpath.vsatp, io.dpath.ptbr)
-  val r_hgatp_initial_count = pgLevels.U - minPgLevels.U - r_hgatp.additionalPgLevels
-  /** 2-stage translation both enable */
-  val do_both_stages = r_req.vstage1 && r_req.stage2
-  val max_count = count max aux_count
-  val vpn = Mux(r_req.vstage1 && stage2, aux_pte.ppn, r_req.addr)
-
-  val mem_resp_valid = RegNext(io.mem.resp.valid)
-  val mem_resp_data = RegNext(io.mem.resp.bits.data)
-  io.mem.uncached_resp.map { resp =>
-    assert(!(resp.valid && io.mem.resp.valid))
-    resp.ready := true.B
-    when (resp.valid) {
-      mem_resp_valid := true.B
-      mem_resp_data := resp.bits.data
-    }
-  }
-  // construct pte from mem.resp
-  val (pte, invalid_paddr) = {
-    val tmp = mem_resp_data.asTypeOf(new PTE())
-    val res = WireDefault(tmp)
-    res.ppn := Mux(do_both_stages && !stage2, tmp.ppn(vpnBits.min(tmp.ppn.getWidth)-1, 0), tmp.ppn(ppnBits-1, 0))
-    when (tmp.r || tmp.w || tmp.x) {
-      // for superpage mappings, make sure PPN LSBs are zero
-      for (i <- 0 until pgLevels-1)
-        when (count <= i.U && tmp.ppn((pgLevels-1-i)*pgLevelBits-1, (pgLevels-2-i)*pgLevelBits) =/= 0.U) { res.v := false.B }
-    }
-    (res, Mux(do_both_stages && !stage2, (tmp.ppn >> vpnBits) =/= 0.U, (tmp.ppn >> ppnBits) =/= 0.U))
-  }
-  // find non-leaf PTE, need traverse
-  val traverse = pte.table() && !invalid_paddr && count < (pgLevels-1).U
-  /** address send to mem for enquerry */
-  val pte_addr = if (!usingVM) 0.U else {
-    val vpn_idxs = (0 until pgLevels).map { i =>
-      val width = pgLevelBits + (if (i <= pgLevels - minPgLevels) hypervisorExtraAddrBits else 0)
-      (vpn >> (pgLevels - i - 1) * pgLevelBits)(width - 1, 0)
-    }
-    val mask     = Mux(stage2 && count === r_hgatp_initial_count, ((1 << (hypervisorExtraAddrBits + pgLevelBits)) - 1).U, ((1 << pgLevelBits) - 1).U)
-    val vpn_idx  = vpn_idxs(count) & mask
-    val raw_pte_addr = ((r_pte.ppn << pgLevelBits) | vpn_idx) << log2Ceil(xLen / 8)
-    val size = if (usingHypervisor) vaddrBits else paddrBits
-    //use r_pte.ppn as page table base address
-    //use vpn slice as offset
-    raw_pte_addr.apply(size.min(raw_pte_addr.getWidth) - 1, 0)
-  }
-  /** pte_cache input addr */
-  val pte_cache_addr = if (!usingHypervisor) pte_addr else {
-    val vpn_idxs = (0 until pgLevels-1).map { i =>
-      val ext_aux_pte_ppn = aux_ppn_hi match {
-        case None     => aux_pte.ppn
-        case Some(hi) => Cat(hi, aux_pte.ppn)
-      }
-      (ext_aux_pte_ppn >> (pgLevels - i - 1) * pgLevelBits)(pgLevelBits - 1, 0)
-    }
-    val vpn_idx = vpn_idxs(count)
-    val raw_pte_cache_addr = Cat(r_pte.ppn, vpn_idx) << log2Ceil(xLen/8)
-    raw_pte_cache_addr(vaddrBits.min(raw_pte_cache_addr.getWidth)-1, 0)
-  }
-  /** stage2_pte_cache input addr */
-  val stage2_pte_cache_addr = if (!usingHypervisor) 0.U else {
-    val vpn_idxs = (0 until pgLevels - 1).map { i =>
-      (r_req.addr >> (pgLevels - i - 1) * pgLevelBits)(pgLevelBits - 1, 0)
-    }
-    val vpn_idx  = vpn_idxs(aux_count)
-    val raw_s2_pte_cache_addr = Cat(aux_pte.ppn, vpn_idx) << log2Ceil(xLen / 8)
-    raw_s2_pte_cache_addr(vaddrBits.min(raw_s2_pte_cache_addr.getWidth) - 1, 0)
-  }
-
-  def makeFragmentedSuperpagePPN(ppn: UInt): Seq[UInt] = {
-    (pgLevels-1 until 0 by -1).map(i => Cat(ppn >> (pgLevelBits*i), r_req.addr(((pgLevelBits*i) min vpnBits)-1, 0).padTo(pgLevelBits*i)))
-  }
-  /** PTECache caches non-leaf PTE
-    * @param s2 true: 2-stage address translation
-    */
-  def makePTECache(s2: Boolean): (Bool, UInt) = if (coreParams.nPTECacheEntries == 0) {
-    (false.B, 0.U)
-  } else {
-    val plru = new PseudoLRU(coreParams.nPTECacheEntries)
-    val valid = RegInit(0.U(coreParams.nPTECacheEntries.W))
-    val tags = Reg(Vec(coreParams.nPTECacheEntries, UInt((if (usingHypervisor) 1 + vaddrBits else paddrBits).W)))
-    // not include full pte, only ppn
-    val data = Reg(Vec(coreParams.nPTECacheEntries, UInt((if (usingHypervisor && s2) vpnBits else ppnBits).W)))
-    val can_hit =
-      if (s2) count === r_hgatp_initial_count && aux_count < (pgLevels-1).U && r_req.vstage1 && stage2 && !stage2_final
-      else count < (pgLevels-1).U && Mux(r_req.vstage1, stage2, !r_req.stage2)
-    val can_refill =
-      if (s2) do_both_stages && !stage2 && !stage2_final
-      else can_hit
-    val tag =
-      if (s2) Cat(true.B, stage2_pte_cache_addr.padTo(vaddrBits))
-      else Cat(r_req.vstage1, pte_cache_addr.padTo(if (usingHypervisor) vaddrBits else paddrBits))
-
-    val hits = tags.map(_ === tag).asUInt & valid
-    val hit = hits.orR && can_hit
-    // refill with mem response
-    when (mem_resp_valid && traverse && can_refill && !hits.orR && !invalidated) {
-      val r = Mux(valid.andR, plru.way, PriorityEncoder(~valid))
-      valid := valid | UIntToOH(r)
-      tags(r) := tag
-      data(r) := pte.ppn
-      plru.access(r)
-    }
-    // replace
-    when (hit && state === s_req) { plru.access(OHToUInt(hits)) }
-    when (io.dpath.sfence.valid && (!io.dpath.sfence.bits.rs1 || usingHypervisor.B && io.dpath.sfence.bits.hg)) { valid := 0.U }
-
-    val lcount = if (s2) aux_count else count
-    for (i <- 0 until pgLevels-1) {
-      ccover(hit && state === s_req && lcount === i.U, s"PTE_CACHE_HIT_L$i", s"PTE cache hit, level $i")
-    }
-
-    (hit, Mux1H(hits, data))
-  }
-  // generate pte_cache
-  val (pte_cache_hit, pte_cache_data) = makePTECache(false)
-  // generate pte_cache with 2-stage translation
-  val (stage2_pte_cache_hit, stage2_pte_cache_data) = makePTECache(true)
-  // pte_cache hit or 2-stage pte_cache hit
-  val pte_hit = RegNext(false.B)
-  io.dpath.perf.pte_miss := false.B
-  io.dpath.perf.pte_hit := pte_hit && (state === s_req) && !io.dpath.perf.l2hit
-  assert(!(io.dpath.perf.l2hit && (io.dpath.perf.pte_miss || io.dpath.perf.pte_hit)),
-    "PTE Cache Hit/Miss Performance Monitor Events are lower priority than L2TLB Hit event")
-  // l2_refill happens when find the leaf pte
-  val l2_refill = RegNext(false.B)
-  l2_refill_wire := l2_refill
-  io.dpath.perf.l2miss := false.B
-  io.dpath.perf.l2hit := false.B
-  // l2tlb
-  val (l2_hit, l2_error, l2_pte, l2_tlb_ram) = if (coreParams.nL2TLBEntries == 0) (false.B, false.B, WireDefault(0.U.asTypeOf(new PTE)), None) else {
-    val code = new ParityCode
-    require(isPow2(coreParams.nL2TLBEntries))
-    require(isPow2(coreParams.nL2TLBWays))
-    require(coreParams.nL2TLBEntries >= coreParams.nL2TLBWays)
-    val nL2TLBSets = coreParams.nL2TLBEntries / coreParams.nL2TLBWays
-    require(isPow2(nL2TLBSets))
-    val idxBits = log2Ceil(nL2TLBSets)
-
-    val l2_plru = new SetAssocLRU(nL2TLBSets, coreParams.nL2TLBWays, "plru")
-
-    val ram =  DescribedSRAM(
-      name = "l2_tlb_ram",
-      desc = "L2 TLB",
-      size = nL2TLBSets,
-      data = Vec(coreParams.nL2TLBWays, UInt(code.width(new L2TLBEntry(nL2TLBSets).getWidth).W))
-    )
-
-    val g = Reg(Vec(coreParams.nL2TLBWays, UInt(nL2TLBSets.W)))
-    val valid = RegInit(VecInit(Seq.fill(coreParams.nL2TLBWays)(0.U(nL2TLBSets.W))))
-    // use r_req to construct tag
-    val (r_tag, r_idx) = Split(Cat(r_req.vstage1, r_req.addr(maxSVAddrBits-pgIdxBits-1, 0)), idxBits)
-    /** the valid vec for the selected set(including n ways) */
-    val r_valid_vec = valid.map(_(r_idx)).asUInt
-    val r_valid_vec_q = Reg(UInt(coreParams.nL2TLBWays.W))
-    val r_l2_plru_way = Reg(UInt(log2Ceil(coreParams.nL2TLBWays max 1).W))
-    r_valid_vec_q := r_valid_vec
-    // replacement way
-    r_l2_plru_way := (if (coreParams.nL2TLBWays > 1) l2_plru.way(r_idx) else 0.U)
-    // refill with r_pte(leaf pte)
-    when (l2_refill && !invalidated) {
-      val entry = Wire(new L2TLBEntry(nL2TLBSets))
-      entry.ppn := r_pte.ppn
-      entry.d := r_pte.d
-      entry.a := r_pte.a
-      entry.u := r_pte.u
-      entry.x := r_pte.x
-      entry.w := r_pte.w
-      entry.r := r_pte.r
-      entry.tag := r_tag
-      // if all the way are valid, use plru to select one way to be replaced,
-      // otherwise use PriorityEncoderOH to select one
-      val wmask = if (coreParams.nL2TLBWays > 1) Mux(r_valid_vec_q.andR, UIntToOH(r_l2_plru_way, coreParams.nL2TLBWays), PriorityEncoderOH(~r_valid_vec_q)) else 1.U(1.W)
-      ram.write(r_idx, VecInit(Seq.fill(coreParams.nL2TLBWays)(code.encode(entry.asUInt))), wmask.asBools)
-
-      val mask = UIntToOH(r_idx)
-      for (way <- 0 until coreParams.nL2TLBWays) {
-        when (wmask(way)) {
-          valid(way) := valid(way) | mask
-          g(way) := Mux(r_pte.g, g(way) | mask, g(way) & ~mask)
-        }
-      }
-    }
-    // sfence happens
-    when (io.dpath.sfence.valid) {
-      val hg = usingHypervisor.B && io.dpath.sfence.bits.hg
-      for (way <- 0 until coreParams.nL2TLBWays) {
-        valid(way) :=
-          Mux(!hg && io.dpath.sfence.bits.rs1, valid(way) & ~UIntToOH(io.dpath.sfence.bits.addr(idxBits+pgIdxBits-1, pgIdxBits)),
-          Mux(!hg && io.dpath.sfence.bits.rs2, valid(way) & g(way),
-          0.U))
-      }
-    }
-
-    val s0_valid = !l2_refill && arb.io.out.fire
-    val s0_suitable = arb.io.out.bits.bits.vstage1 === arb.io.out.bits.bits.stage2 && !arb.io.out.bits.bits.need_gpa
-    val s1_valid = RegNext(s0_valid && s0_suitable && arb.io.out.bits.valid)
-    val s2_valid = RegNext(s1_valid)
-    // read from tlb idx
-    val s1_rdata = ram.read(arb.io.out.bits.bits.addr(idxBits-1, 0), s0_valid)
-    val s2_rdata = s1_rdata.map(s1_rdway => code.decode(RegEnable(s1_rdway, s1_valid)))
-    val s2_valid_vec = RegEnable(r_valid_vec, s1_valid)
-    val s2_g_vec = RegEnable(VecInit(g.map(_(r_idx))), s1_valid)
-    val s2_error = (0 until coreParams.nL2TLBWays).map(way => s2_valid_vec(way) && s2_rdata(way).error).orR
-    when (s2_valid && s2_error) { valid.foreach { _ := 0.U }}
-    // decode
-    val s2_entry_vec = s2_rdata.map(_.uncorrected.asTypeOf(new L2TLBEntry(nL2TLBSets)))
-    val s2_hit_vec = (0 until coreParams.nL2TLBWays).map(way => s2_valid_vec(way) && (r_tag === s2_entry_vec(way).tag))
-    val s2_hit = s2_valid && s2_hit_vec.orR
-    io.dpath.perf.l2miss := s2_valid && !(s2_hit_vec.orR)
-    io.dpath.perf.l2hit := s2_hit
-    when (s2_hit) {
-      l2_plru.access(r_idx, OHToUInt(s2_hit_vec))
-      assert((PopCount(s2_hit_vec) === 1.U) || s2_error, "L2 TLB multi-hit")
-    }
-
-    val s2_pte = Wire(new PTE)
-    val s2_hit_entry = Mux1H(s2_hit_vec, s2_entry_vec)
-    s2_pte.ppn := s2_hit_entry.ppn
-    s2_pte.d := s2_hit_entry.d
-    s2_pte.a := s2_hit_entry.a
-    s2_pte.g := Mux1H(s2_hit_vec, s2_g_vec)
-    s2_pte.u := s2_hit_entry.u
-    s2_pte.x := s2_hit_entry.x
-    s2_pte.w := s2_hit_entry.w
-    s2_pte.r := s2_hit_entry.r
-    s2_pte.v := true.B
-    s2_pte.reserved_for_future := 0.U
-    s2_pte.reserved_for_software := 0.U
-
-    for (way <- 0 until coreParams.nL2TLBWays) {
-      ccover(s2_hit && s2_hit_vec(way), s"L2_TLB_HIT_WAY$way", s"L2 TLB hit way$way")
-    }
-
-    (s2_hit, s2_error, s2_pte, Some(ram))
-  }
-
-  // if SFENCE occurs during walk, don't refill PTE cache or L2 TLB until next walk
-  invalidated := io.dpath.sfence.valid || (invalidated && state =/= s_ready)
-  // mem request
-  io.mem.req.valid := state === s_req || state === s_dummy1
-  io.mem.req.bits.phys := true.B
-  io.mem.req.bits.cmd  := M_XRD
-  io.mem.req.bits.size := log2Ceil(xLen/8).U
-  io.mem.req.bits.signed := false.B
-  io.mem.req.bits.addr := pte_addr
-  io.mem.req.bits.idx.foreach(_ := pte_addr)
-  io.mem.req.bits.dprv := PRV.S.U   // PTW accesses are S-mode by definition
-  io.mem.req.bits.dv := do_both_stages && !stage2
-  io.mem.s1_kill := l2_hit || state =/= s_wait1
-  io.mem.s2_kill := false.B
-
-  val pageGranularityPMPs = pmpGranularity >= (1 << pgIdxBits)
-  require(!usingHypervisor || pageGranularityPMPs, s"hypervisor requires pmpGranularity >= ${1<<pgIdxBits}")
-
-  val pmaPgLevelHomogeneous = (0 until pgLevels) map { i =>
-    val pgSize = BigInt(1) << (pgIdxBits + ((pgLevels - 1 - i) * pgLevelBits))
-    if (pageGranularityPMPs && i == pgLevels - 1) {
-      require(TLBPageLookup.homogeneous(edge.manager.managers, pgSize), s"All memory regions must be $pgSize-byte aligned")
-      true.B
-    } else {
-      TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), pgSize)(r_pte.ppn << pgIdxBits).homogeneous
-    }
-  }
-  val pmaHomogeneous = pmaPgLevelHomogeneous(count)
-  val pmpHomogeneous = new PMPHomogeneityChecker(io.dpath.pmp, paddrBits, pmpGranularity, pgIdxBits, pgLevels, pgLevelBits).apply(r_pte.ppn << pgIdxBits, count)
-  val homogeneous = pmaHomogeneous && pmpHomogeneous
-  // response to tlb
-  for (i <- 0 until io.requestor.size) {
-    io.requestor(i).resp.valid := resp_valid(i)
-    io.requestor(i).resp.bits.ae_ptw := resp_ae_ptw
-    io.requestor(i).resp.bits.ae_final := resp_ae_final
-    io.requestor(i).resp.bits.pf := resp_pf
-    io.requestor(i).resp.bits.gf := resp_gf
-    io.requestor(i).resp.bits.hr := resp_hr
-    io.requestor(i).resp.bits.hw := resp_hw
-    io.requestor(i).resp.bits.hx := resp_hx
-    io.requestor(i).resp.bits.pte := r_pte
-    io.requestor(i).resp.bits.level := max_count
-    io.requestor(i).resp.bits.homogeneous := homogeneous || pageGranularityPMPs.B
-    io.requestor(i).resp.bits.fragmented_superpage := resp_fragmented_superpage && pageGranularityPMPs.B
-    io.requestor(i).resp.bits.gpa.valid := r_req.need_gpa
-    io.requestor(i).resp.bits.gpa.bits :=
-      Cat(Mux(!stage2_final || !r_req.vstage1 || aux_count === (pgLevels - 1).U, aux_pte.ppn, makeFragmentedSuperpagePPN(aux_pte.ppn)(aux_count)), gpa_pgoff)
-    io.requestor(i).resp.bits.gpa_is_pte := !stage2_final
-    io.requestor(i).ptbr := io.dpath.ptbr
-    io.requestor(i).hgatp := io.dpath.hgatp
-    io.requestor(i).vsatp := io.dpath.vsatp
-    io.requestor(i).customCSRs := io.dpath.customCSRs
-    io.requestor(i).status := io.dpath.status
-    io.requestor(i).hstatus := io.dpath.hstatus
-    io.requestor(i).gstatus := io.dpath.gstatus
-    io.requestor(i).pmp := io.dpath.pmp
-  }
-
-  // control state machine
-  val next_state = WireDefault(state)
-  state := OptimizationBarrier(next_state)
-  val do_switch = WireDefault(false.B)
-
-  switch (state) {
-    is (s_ready) {
-      when (arb.io.out.fire) {
-        val satp_initial_count = pgLevels.U - minPgLevels.U - satp.additionalPgLevels
-        val vsatp_initial_count = pgLevels.U - minPgLevels.U - io.dpath.vsatp.additionalPgLevels
-        val hgatp_initial_count = pgLevels.U - minPgLevels.U - io.dpath.hgatp.additionalPgLevels
-        val aux_ppn             = Mux(arb.io.out.bits.bits.vstage1, io.dpath.vsatp.ppn, arb.io.out.bits.bits.addr)
-
-        r_req := arb.io.out.bits.bits
-        r_req_dest := arb.io.chosen
-        next_state := Mux(arb.io.out.bits.valid, s_req, s_ready)
-        stage2       := arb.io.out.bits.bits.stage2
-        stage2_final := arb.io.out.bits.bits.stage2 && !arb.io.out.bits.bits.vstage1
-        count       := Mux(arb.io.out.bits.bits.stage2, hgatp_initial_count, satp_initial_count)
-        aux_count   := Mux(arb.io.out.bits.bits.vstage1, vsatp_initial_count, 0.U)
-        aux_pte.ppn := aux_ppn
-        aux_ppn_hi.foreach { _ := aux_ppn >> aux_pte.ppn.getWidth }
-        aux_pte.reserved_for_future := 0.U
-        resp_ae_ptw := false.B
-        resp_ae_final := false.B
-        resp_pf := false.B
-        resp_gf := false.B
-        resp_hr := true.B
-        resp_hw := true.B
-        resp_hx := true.B
-        resp_fragmented_superpage := false.B
-        r_hgatp := io.dpath.hgatp
-
-        assert(!arb.io.out.bits.bits.need_gpa || arb.io.out.bits.bits.stage2)
-      }
-    }
-    is (s_req) {
-      when(stage2 && count === r_hgatp_initial_count) {
-        gpa_pgoff := Mux(aux_count === (pgLevels-1).U, r_req.addr << (xLen/8).log2, stage2_pte_cache_addr)
-      }
-      // pte_cache hit
-      when (stage2_pte_cache_hit) {
-        aux_count := aux_count + 1.U
-        aux_pte.ppn := stage2_pte_cache_data
-        aux_ppn_hi.foreach { _ := 0.U }
-        aux_pte.reserved_for_future := 0.U
-        pte_hit := true.B
-      }.elsewhen (pte_cache_hit) {
-        count := count + 1.U
-        pte_hit := true.B
-      }.otherwise {
-        next_state := Mux(io.mem.req.ready, s_wait1, s_req)
-      }
-    }
-    is (s_wait1) {
-      // This Mux is for the l2_error case; the l2_hit && !l2_error case is overriden below
-      next_state := Mux(l2_hit, s_req, s_wait2)
-    }
-    is (s_wait2) {
-      next_state := s_wait3
-      io.dpath.perf.pte_miss := count < (pgLevels-1).U
-      when (io.mem.s2_xcpt.ae.ld) {
-        resp_ae_ptw := true.B
-        next_state := s_ready
-        resp_valid(r_req_dest) := true.B
-      }
-    }
-    is (s_fragment_superpage) {
-      next_state := s_ready
-      resp_valid(r_req_dest) := true.B
-      when (!homogeneous) {
-        count := (pgLevels-1).U
-        resp_fragmented_superpage := true.B
-      }
-      when (do_both_stages) {
-        resp_fragmented_superpage := true.B
-      }
-    }
-  }
-
-  val merged_pte = {
-    val superpage_masks = (0 until pgLevels).map(i => ((BigInt(1) << pte.ppn.getWidth) - (BigInt(1) << (pgLevels-1-i)*pgLevelBits)).U)
-    val superpage_mask = superpage_masks(Mux(stage2_final, max_count, (pgLevels-1).U))
-    val stage1_ppns = (0 until pgLevels-1).map(i => Cat(pte.ppn(pte.ppn.getWidth-1, (pgLevels-i-1)*pgLevelBits), aux_pte.ppn((pgLevels-i-1)*pgLevelBits-1,0))) :+ pte.ppn
-    val stage1_ppn = stage1_ppns(count)
-    makePTE(stage1_ppn & superpage_mask, aux_pte)
-  }
-
-  r_pte := OptimizationBarrier(
-    // l2tlb hit->find a leaf PTE(l2_pte), respond to L1TLB
-    Mux(l2_hit && !l2_error, l2_pte,
-    // pte cache hit->find a non-leaf PTE(pte_cache),continue to request mem
-    Mux(state === s_req && !stage2_pte_cache_hit && pte_cache_hit, makePTE(pte_cache_data, l2_pte),
-    // 2-stage translation
-    Mux(do_switch, makeHypervisorRootPTE(r_hgatp, pte.ppn, r_pte),
-    // when mem respond, store mem.resp.pte
-    Mux(mem_resp_valid, Mux(!traverse && r_req.vstage1 && stage2, merged_pte, pte),
-    // fragment_superpage
-    Mux(state === s_fragment_superpage && !homogeneous && count =/= (pgLevels - 1).U, makePTE(makeFragmentedSuperpagePPN(r_pte.ppn)(count), r_pte),
-    // when tlb request come->request mem, use root address in satp(or vsatp,hgatp)
-    Mux(arb.io.out.fire, Mux(arb.io.out.bits.bits.stage2, makeHypervisorRootPTE(io.dpath.hgatp, io.dpath.vsatp.ppn, r_pte), makePTE(satp.ppn, r_pte)),
-    r_pte)))))))
-
-  when (l2_hit && !l2_error) {
-    assert(state === s_req || state === s_wait1)
-    next_state := s_ready
-    resp_valid(r_req_dest) := true.B
-    count := (pgLevels-1).U
-  }
-  when (mem_resp_valid) {
-    assert(state === s_wait3)
-    next_state := s_req
-    when (traverse) {
-      when (do_both_stages && !stage2) { do_switch := true.B }
-      count := count + 1.U
-    }.otherwise {
-      val gf = stage2 && !stage2_final && !pte.ur()
-      val ae = pte.v && invalid_paddr
-      val pf = pte.v && pte.reserved_for_future =/= 0.U
-      val success = pte.v && !ae && !pf && !gf
-
-      when (do_both_stages && !stage2_final && success) {
-        when (stage2) {
-          stage2 := false.B
-          count := aux_count
-        }.otherwise {
-          stage2_final := true.B
-          do_switch := true.B
-        }
-      }.otherwise {
-        // find a leaf pte, start l2 refill
-        l2_refill := success && count === (pgLevels-1).U && !r_req.need_gpa &&
-          (!r_req.vstage1 && !r_req.stage2 ||
-           do_both_stages && aux_count === (pgLevels-1).U && pte.isFullPerm())
-        count := max_count
-
-        when (pageGranularityPMPs.B && !(count === (pgLevels-1).U && (!do_both_stages || aux_count === (pgLevels-1).U))) {
-          next_state := s_fragment_superpage
-        }.otherwise {
-          next_state := s_ready
-          resp_valid(r_req_dest) := true.B
-        }
-
-        resp_ae_final := ae
-        resp_pf := pf && !stage2
-        resp_gf := gf || (pf && stage2)
-        resp_hr := !stage2 || (!pf && !gf && pte.ur())
-        resp_hw := !stage2 || (!pf && !gf && pte.uw())
-        resp_hx := !stage2 || (!pf && !gf && pte.ux())
-      }
-    }
-  }
-  when (io.mem.s2_nack) {
-    assert(state === s_wait2)
-    next_state := s_req
-  }
-
-  when (do_switch) {
-    aux_count := Mux(traverse, count + 1.U, count)
-    count := r_hgatp_initial_count
-    aux_pte := Mux(traverse, pte, {
-      val s1_ppns = (0 until pgLevels-1).map(i => Cat(pte.ppn(pte.ppn.getWidth-1, (pgLevels-i-1)*pgLevelBits), r_req.addr(((pgLevels-i-1)*pgLevelBits min vpnBits)-1,0).padTo((pgLevels-i-1)*pgLevelBits))) :+ pte.ppn
-      makePTE(s1_ppns(count), pte)
-    })
-    aux_ppn_hi.foreach { _ := 0.U }
-    stage2 := true.B
-  }
-
-  for (i <- 0 until pgLevels) {
-    val leaf = mem_resp_valid && !traverse && count === i.U
-    ccover(leaf && pte.v && !invalid_paddr && pte.reserved_for_future === 0.U, s"L$i", s"successful page-table access, level $i")
-    ccover(leaf && pte.v && invalid_paddr, s"L${i}_BAD_PPN_MSB", s"PPN too large, level $i")
-    ccover(leaf && pte.v && pte.reserved_for_future =/= 0.U, s"L${i}_BAD_RSV_MSB", s"reserved MSBs set, level $i")
-    ccover(leaf && !mem_resp_data(0), s"L${i}_INVALID_PTE", s"page not present, level $i")
-    if (i != pgLevels-1)
-      ccover(leaf && !pte.v && mem_resp_data(0), s"L${i}_BAD_PPN_LSB", s"PPN LSBs not zero, level $i")
-  }
-  ccover(mem_resp_valid && count === (pgLevels-1).U && pte.table(), s"TOO_DEEP", s"page table too deep")
-  ccover(io.mem.s2_nack, "NACK", "D$ nacked page-table access")
-  ccover(state === s_wait2 && io.mem.s2_xcpt.ae.ld, "AE", "access exception while walking page table")
-
-  } // leaving gated-clock domain
-
-  private def ccover(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) =
-    if (usingVM) property.cover(cond, s"PTW_$label", "MemorySystem;;" + desc)
-
-  /** Relace PTE.ppn with ppn */
-  private def makePTE(ppn: UInt, default: PTE) = {
-    val pte = WireDefault(default)
-    pte.ppn := ppn
-    pte
-  }
-  /** use hgatp and vpn to construct a new ppn */
-  private def makeHypervisorRootPTE(hgatp: PTBR, vpn: UInt, default: PTE) = {
-    val count = pgLevels.U - minPgLevels.U - hgatp.additionalPgLevels
-    val idxs = (0 to pgLevels-minPgLevels).map(i => (vpn >> (pgLevels-i)*pgLevelBits))
-    val lsbs = WireDefault(UInt(maxHypervisorExtraAddrBits.W), idxs(count))
-    val pte = WireDefault(default)
-    pte.ppn := Cat(hgatp.ppn >> maxHypervisorExtraAddrBits, lsbs)
-    pte
-  }
-}
-
-/** Mix-ins for constructing tiles that might have a PTW */
-trait CanHavePTW extends HasTileParameters with HasHellaCache { this: BaseTile =>
-  val module: CanHavePTWModule
-  var nPTWPorts = 1
-  nDCachePorts += usingPTW.toInt
-}
-
-trait CanHavePTWModule extends HasHellaCacheModule {
-  val outer: CanHavePTW
-  val ptwPorts = ListBuffer(outer.dcache.module.io.ptw)
-  val ptw = Module(new PTW(outer.nPTWPorts)(outer.dcache.node.edges.out(0), outer.p))
-  ptw.io.mem <> DontCare
-  if (outer.usingPTW) {
-    dcachePorts += ptw.io.mem
-  }
-}
diff --git a/diplomatic/src/rocket/TLB.scala b/diplomatic/src/rocket/TLB.scala
deleted file mode 100644
index c73a14536..000000000
--- a/diplomatic/src/rocket/TLB.scala
+++ /dev/null
@@ -1,746 +0,0 @@
-// See LICENSE.SiFive for license details.
-// See LICENSE.Berkeley for license details.
-
-package org.chipsalliance.rocket
-
-import chisel3._
-import chisel3.util._
-
-import org.chipsalliance.cde.config.{Field, Parameters}
-import freechips.rocketchip.subsystem.CacheBlockBytes
-import freechips.rocketchip.diplomacy.RegionType
-import org.chipsalliance.rockettile.{CoreModule, CoreBundle}
-import freechips.rocketchip.tilelink._
-import freechips.rocketchip.util._
-import freechips.rocketchip.util.property
-import freechips.rocketchip.devices.debug.DebugModuleKey
-import chisel3.internal.sourceinfo.SourceInfo
-
-case object PgLevels extends Field[Int](2)
-case object ASIdBits extends Field[Int](0)
-case object VMIdBits extends Field[Int](0)
-
-/** =SFENCE=
-  * rs1 rs2
-  * {{{
-  *  0   0 -> flush All
-  *  0   1 -> flush by ASID
-  *  1   1 -> flush by ADDR
-  *  1   0 -> flush by ADDR and ASID
-  * }}}
-  * {{{
-  * If rs1=x0 and rs2=x0, the fence orders all reads and writes made to any level of the page tables, for all address spaces.
-  * If rs1=x0 and rs2!=x0, the fence orders all reads and writes made to any level of the page tables, but only for the address space identified by integer register rs2. Accesses to global mappings (see Section 4.3.1) are not ordered.
-  * If rs1!=x0 and rs2=x0, the fence orders only reads and writes made to the leaf page table entry corresponding to the virtual address in rs1, for all address spaces.
-  * If rs1!=x0 and rs2!=x0, the fence orders only reads and writes made to the leaf page table entry corresponding to the virtual address in rs1, for the address space identified by integer register rs2. Accesses to global mappings are not ordered.
-  * }}}
-  */
-class SFenceReq(implicit p: Parameters) extends CoreBundle()(p) {
-  val rs1 = Bool()
-  val rs2 = Bool()
-  val addr = UInt(vaddrBits.W)
-  val asid = UInt((asIdBits max 1).W) // TODO zero-width
-  val hv = Bool()
-  val hg = Bool()
-}
-
-class TLBReq(lgMaxSize: Int)(implicit p: Parameters) extends CoreBundle()(p) {
-  /** request address from CPU. */
-  val vaddr = UInt(vaddrBitsExtended.W)
-  /** don't lookup TLB, bypass vaddr as paddr */
-  val passthrough = Bool()
-  /** granularity */
-  val size = UInt(log2Ceil(lgMaxSize + 1).W)
-  /** memory command. */
-  val cmd  = Bits(M_SZ.W)
-  val prv = UInt(PRV.SZ.W)
-  /** virtualization mode */
-  val v = Bool()
-
-}
-
-class TLBExceptions extends Bundle {
-  val ld = Bool()
-  val st = Bool()
-  val inst = Bool()
-}
-
-class TLBResp(implicit p: Parameters) extends CoreBundle()(p) {
-  // lookup responses
-  val miss = Bool()
-  /** physical address */
-  val paddr = UInt(paddrBits.W)
-  val gpa = UInt(vaddrBitsExtended.W)
-  val gpa_is_pte = Bool()
-  /** page fault exception */
-  val pf = new TLBExceptions
-  /** guest page fault exception */
-  val gf = new TLBExceptions
-  /** access exception */
-  val ae = new TLBExceptions
-  /** misaligned access exception */
-  val ma = new TLBExceptions
-  /** if this address is cacheable */
-  val cacheable = Bool()
-  /** if caches must allocate this address */
-  val must_alloc = Bool()
-  /** if this address is prefetchable for caches*/
-  val prefetchable = Bool()
-}
-
-class TLBEntryData(implicit p: Parameters) extends CoreBundle()(p) {
-  val ppn = UInt(ppnBits.W)
-  /** pte.u user */
-  val u = Bool()
-  /** pte.g global */
-  val g = Bool()
-  /** access exception.
-    * D$ -> PTW -> TLB AE
-    * Alignment failed.
-    */
-  val ae_ptw = Bool()
-  val ae_final = Bool()
-  /** page fault */
-  val pf = Bool()
-  /** guest page fault */
-  val gf = Bool()
-  /** supervisor write */
-  val sw = Bool()
-  /** supervisor execute */
-  val sx = Bool()
-  /** supervisor read */
-  val sr = Bool()
-  /** hypervisor write */
-  val hw = Bool()
-  /** hypervisor excute */
-  val hx = Bool()
-  /** hypervisor read */
-  val hr = Bool()
-  /** prot_w */
-  val pw = Bool()
-  /** prot_x */
-  val px = Bool()
-  /** prot_r */
-  val pr = Bool()
-
-  /** PutPartial */
-  val ppp = Bool()
-  /** AMO logical */
-  val pal = Bool()
-  /** AMO arithmetic */
-  val paa = Bool()
-  /** get/put effects */
-  val eff = Bool()
-  /** cacheable */
-  val c = Bool()
-  /** fragmented_superpage support */
-  val fragmented_superpage = Bool()
-}
-
-/** basic cell for TLB data */
-class TLBEntry(val nSectors: Int, val superpage: Boolean, val superpageOnly: Boolean)(implicit p: Parameters) extends CoreBundle()(p) {
-  require(nSectors == 1 || !superpage)
-  require(!superpageOnly || superpage)
-
-  val level = UInt(log2Ceil(pgLevels).W)
-  /** use vpn as tag */
-  val tag_vpn = UInt(vpnBits.W)
-  /** tag in vitualization mode */
-  val tag_v = Bool()
-  /** entry data */
-  val data = Vec(nSectors, UInt(new TLBEntryData().getWidth.W))
-  /** valid bit */
-  val valid = Vec(nSectors, Bool())
-  /** returns all entry data in this entry */
-  def entry_data = data.map(_.asTypeOf(new TLBEntryData))
-  /** returns the index of sector */
-  private def sectorIdx(vpn: UInt) = vpn.extract(nSectors.log2-1, 0)
-  /** returns the entry data matched with this vpn*/
-  def getData(vpn: UInt) = OptimizationBarrier(data(sectorIdx(vpn)).asTypeOf(new TLBEntryData))
-  /** returns whether a sector hits */
-  def sectorHit(vpn: UInt, virtual: Bool) = valid.orR && sectorTagMatch(vpn, virtual)
-  /** returns whether tag matches vpn */
-  def sectorTagMatch(vpn: UInt, virtual: Bool) = (((tag_vpn ^ vpn) >> nSectors.log2) === 0.U) && (tag_v === virtual)
-  /** returns hit signal */
-  def hit(vpn: UInt, virtual: Bool): Bool = {
-    if (superpage && usingVM) {
-      var tagMatch = valid.head && (tag_v === virtual)
-      for (j <- 0 until pgLevels) {
-        val base = (pgLevels - 1 - j) * pgLevelBits
-        val n = pgLevelBits + (if (j == 0) hypervisorExtraAddrBits else 0)
-        val ignore = level < j.U || (superpageOnly && j == pgLevels - 1).B
-        tagMatch = tagMatch && (ignore || (tag_vpn ^ vpn)(base + n - 1, base) === 0.U)
-      }
-      tagMatch
-    } else {
-      val idx = sectorIdx(vpn)
-      valid(idx) && sectorTagMatch(vpn, virtual)
-    }
-  }
-  /** returns the ppn of the input TLBEntryData */
-  def ppn(vpn: UInt, data: TLBEntryData) = {
-    val supervisorVPNBits = pgLevels * pgLevelBits
-    if (superpage && usingVM) {
-      var res = data.ppn >> pgLevelBits*(pgLevels - 1)
-      for (j <- 1 until pgLevels) {
-        val ignore = level < j.U || (superpageOnly && j == pgLevels - 1).B
-        res = Cat(res, (Mux(ignore, vpn, 0.U) | data.ppn)(supervisorVPNBits - j*pgLevelBits - 1, supervisorVPNBits - (j + 1)*pgLevelBits))
-      }
-      res
-    } else {
-      data.ppn
-    }
-  }
-  /** does the refill
-    *
-    * find the target entry with vpn tag
-    * and replace the target entry with the input entry data
-    */
-  def insert(vpn: UInt, virtual: Bool, level: UInt, entry: TLBEntryData): Unit = {
-    this.tag_vpn := vpn
-    this.tag_v := virtual
-    this.level := level.extract(log2Ceil(pgLevels - superpageOnly.toInt)-1, 0)
-
-    val idx = sectorIdx(vpn)
-    valid(idx) := true.B
-    data(idx) := entry.asUInt
-  }
-
-  def invalidate(): Unit = { valid.foreach(_ := false.B) }
-  def invalidate(virtual: Bool): Unit = {
-    for ((v, e) <- valid zip entry_data)
-      when (tag_v === virtual) { v := false.B }
-  }
-  def invalidateVPN(vpn: UInt, virtual: Bool): Unit = {
-    if (superpage) {
-      when (hit(vpn, virtual)) { invalidate() }
-    } else {
-      when (sectorTagMatch(vpn, virtual)) {
-        for (((v, e), i) <- (valid zip entry_data).zipWithIndex)
-          when (tag_v === virtual && i.U === sectorIdx(vpn)) { v := false.B }
-      }
-    }
-    // For fragmented superpage mappings, we assume the worst (largest)
-    // case, and zap entries whose most-significant VPNs match
-    when (((tag_vpn ^ vpn) >> (pgLevelBits * (pgLevels - 1))) === 0.U) {
-      for ((v, e) <- valid zip entry_data)
-        when (tag_v === virtual && e.fragmented_superpage) { v := false.B }
-    }
-  }
-  def invalidateNonGlobal(virtual: Bool): Unit = {
-    for ((v, e) <- valid zip entry_data)
-      when (tag_v === virtual && !e.g) { v := false.B }
-  }
-}
-
-/** TLB config
-  *
-  * @param nSets the number of sets of PTE, follow [[ICacheParams.nSets]]
-  * @param nWays the total number of wayss of PTE, follow [[ICacheParams.nWays]]
-  * @param nSectors the number of ways in a single PTE TLBEntry
-  * @param nSuperpageEntries the number of SuperpageEntries
-  */
-case class TLBConfig(
-    nSets: Int,
-    nWays: Int,
-    nSectors: Int = 4,
-    nSuperpageEntries: Int = 4)
-
-/** =Overview=
-  * [[TLB]] is a TLB template which contains PMA logic and PMP checker.
-  *
-  * TLB caches PTE and accelerates the address translation process.
-  * When tlb miss happens, ask PTW(L2TLB) for Page Table Walk.
-  * Perform PMP and PMA check during the translation and throw exception if there were any.
-  *
-  *  ==Cache Structure==
-  *  - Sectored Entry (PTE)
-  *   - set-associative or direct-mapped
-  *    - nsets = [[TLBConfig.nSets]]
-  *    - nways = [[TLBConfig.nWays]] / [[TLBConfig.nSectors]]
-  *    - PTEEntry( sectors = [[TLBConfig.nSectors]] )
-  *   - LRU(if set-associative)
-  *
-  *  - Superpage Entry(superpage PTE)
-  *   - fully associative
-  *    - nsets = [[TLBConfig.nSuperpageEntries]]
-  *    - PTEEntry(sectors = 1)
-  *   - PseudoLRU
-  *
-  *  - Special Entry(PTE across PMP)
-  *   - nsets = 1
-  *   - PTEEntry(sectors = 1)
-  *
-  * ==Address structure==
-  * {{{
-  * |vaddr                                                 |
-  * |ppn/vpn                                   | pgIndex   |
-  * |                                          |           |
-  * |           |nSets             |nSector    |           |}}}
-  *
-  * ==State Machine==
-  * {{{
-  * s_ready: ready to accept request from CPU.
-  * s_request: when L1TLB(this) miss, send request to PTW(L2TLB), .
-  * s_wait: wait for PTW to refill L1TLB.
-  * s_wait_invalidate: L1TLB is waiting for respond from PTW, but L1TLB will invalidate respond from PTW.}}}
-  *
-  * ==PMP==
-  * pmp check
-  *  - special_entry: always check
-  *  - other entry: check on refill
-  *
-  * ==Note==
-  * PMA consume diplomacy parameter generate physical memory address checking logic
-  *
-  * Boom use Rocket ITLB, and its own DTLB.
-  *
-  * Accelerators:{{{
-  *   sha3: DTLB
-  *   gemmini: DTLB
-  *   hwacha: DTLB*2+ITLB}}}
-  * @param instruction true for ITLB, false for DTLB
-  * @param lgMaxSize @todo seems granularity
-  * @param cfg [[TLBConfig]]
-  * @param edge collect SoC metadata.
-  */
-class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(p) {
-  val io = IO(new Bundle {
-    /** request from Core */
-    val req = Flipped(Decoupled(new TLBReq(lgMaxSize)))
-    /** response to Core */
-    val resp = Output(new TLBResp())
-    /** SFence Input */
-    val sfence = Flipped(Valid(new SFenceReq))
-    /** IO to PTW */
-    val ptw = new TLBPTWIO
-    /** suppress a TLB refill, one cycle after a miss */
-    val kill = Input(Bool())
-  })
-
-  val pageGranularityPMPs = pmpGranularity >= (1 << pgIdxBits)
-  val vpn = io.req.bits.vaddr(vaddrBits-1, pgIdxBits)
-  /** index for sectored_Entry */
-  val memIdx = vpn.extract(cfg.nSectors.log2 + cfg.nSets.log2 - 1, cfg.nSectors.log2)
-  /** TLB Entry */
-  val sectored_entries = Reg(Vec(cfg.nSets, Vec(cfg.nWays / cfg.nSectors, new TLBEntry(cfg.nSectors, false, false))))
-  /** Superpage Entry */
-  val superpage_entries = Reg(Vec(cfg.nSuperpageEntries, new TLBEntry(1, true, true)))
-  /** Special Entry
-    *
-    * If PMP granularity is less than page size, thus need additional "special" entry manage PMP.
-    */
-  val special_entry = (!pageGranularityPMPs).option(Reg(new TLBEntry(1, true, false)))
-  def ordinary_entries = sectored_entries(memIdx) ++ superpage_entries
-  def all_entries = ordinary_entries ++ special_entry
-  def all_real_entries = sectored_entries.flatten ++ superpage_entries ++ special_entry
-
-  val s_ready :: s_request :: s_wait :: s_wait_invalidate :: Nil = Enum(4)
-  val state = RegInit(s_ready)
-  // use vpn as refill_tag
-  val r_refill_tag = Reg(UInt(vpnBits.W))
-  val r_superpage_repl_addr = Reg(UInt(log2Ceil(superpage_entries.size).W))
-  val r_sectored_repl_addr = Reg(UInt(log2Ceil(sectored_entries.head.size).W))
-  val r_sectored_hit = Reg(Valid(UInt(log2Ceil(sectored_entries.head.size).W)))
-  val r_superpage_hit = Reg(Valid(UInt(log2Ceil(superpage_entries.size).W)))
-  val r_vstage1_en = Reg(Bool())
-  val r_stage2_en = Reg(Bool())
-  val r_need_gpa = Reg(Bool())
-  val r_gpa_valid = Reg(Bool())
-  val r_gpa = Reg(UInt(vaddrBits.W))
-  val r_gpa_vpn = Reg(UInt(vpnBits.W))
-  val r_gpa_is_pte = Reg(Bool())
-
-  /** privilege mode */
-  val priv = io.req.bits.prv
-  val priv_v = usingHypervisor.B && io.req.bits.v
-  val priv_s = priv(0)
-  // user mode and supervisor mode
-  val priv_uses_vm = priv <= PRV.S.U
-  val satp = Mux(priv_v, io.ptw.vsatp, io.ptw.ptbr)
-  val stage1_en = usingVM.B && satp.mode(satp.mode.getWidth-1)
-  /** VS-stage translation enable */
-  val vstage1_en = usingHypervisor.B && priv_v && io.ptw.vsatp.mode(io.ptw.vsatp.mode.getWidth-1)
-  /** G-stage translation enable */
-  val stage2_en  = usingHypervisor.B && priv_v && io.ptw.hgatp.mode(io.ptw.hgatp.mode.getWidth-1)
-  /** Enable Virtual Memory when:
-    *  1. statically configured
-    *  1. satp highest bits enabled
-    *   i. RV32:
-    *     - 0 -> Bare
-    *     - 1 -> SV32
-    *   i. RV64:
-    *     - 0000 -> Bare
-    *     - 1000 -> SV39
-    *     - 1001 -> SV48
-    *     - 1010 -> SV57
-    *     - 1011 -> SV64
-    *  1. In virtualization mode, vsatp highest bits enabled
-    *  1. priv mode in U and S.
-    *  1. in H & M mode, disable VM.
-    *  1. no passthrough(micro-arch defined.)
-    *
-    * @see RV-priv spec 4.1.11 Supervisor Address Translation and Protection (satp) Register
-    * @see RV-priv spec 8.2.18 Virtual Supervisor Address Translation and Protection Register (vsatp)
-    */
-  val vm_enabled = (stage1_en || stage2_en) && priv_uses_vm && !io.req.bits.passthrough
-
-  // flush guest entries on vsatp.MODE Bare <-> SvXX transitions
-  val v_entries_use_stage1 = RegInit(false.B)
-  val vsatp_mode_mismatch  = priv_v && (vstage1_en =/= v_entries_use_stage1) && !io.req.bits.passthrough
-
-  // share a single physical memory attribute checker (unshare if critical path)
-  val refill_ppn = io.ptw.resp.bits.pte.ppn(ppnBits-1, 0)
-  /** refill signal */
-  val do_refill = usingVM.B && io.ptw.resp.valid
-  /** sfence invalidate refill */
-  val invalidate_refill = state.isOneOf(s_request /* don't care */, s_wait_invalidate) || io.sfence.valid
-  // PMP
-  val mpu_ppn = Mux(do_refill, refill_ppn,
-                Mux(vm_enabled && special_entry.nonEmpty.B, special_entry.map(e => e.ppn(vpn, e.getData(vpn))).getOrElse(0.U), io.req.bits.vaddr >> pgIdxBits))
-  val mpu_physaddr = Cat(mpu_ppn, io.req.bits.vaddr(pgIdxBits-1, 0))
-  val mpu_priv = Mux[UInt](usingVM.B && (do_refill || io.req.bits.passthrough /* PTW */), PRV.S.U, Cat(io.ptw.status.debug, priv))
-  val pmp = Module(new PMPChecker(lgMaxSize, paddrBits, pmpGranularity, nPMPs, pgIdxBits, pgLevels, pgLevelBits))
-  pmp.io.addr := mpu_physaddr
-  pmp.io.size := io.req.bits.size
-  pmp.io.pmp := (io.ptw.pmp: Seq[PMP])
-  pmp.io.prv := mpu_priv
-  // PMA
-  // check exist a slave can consume this address.
-  val legal_address = edge.manager.findSafe(mpu_physaddr).reduce(_||_)
-  // check utility to help check SoC property.
-  def fastCheck(member: TLManagerParameters => Boolean) =
-    legal_address && edge.manager.fastProperty(mpu_physaddr, member, (b:Boolean) => b.B)
-  // todo: using DataScratchpad doesn't support cacheable.
-  val cacheable = fastCheck(_.supportsAcquireB) && (instruction || !usingDataScratchpad).B
-  val homogeneous = TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), BigInt(1) << pgIdxBits)(mpu_physaddr).homogeneous
-  // In M mode, if access DM address(debug module program buffer)
-  val deny_access_to_debug = mpu_priv <= PRV.M.U && p(DebugModuleKey).map(dmp => dmp.address.contains(mpu_physaddr)).getOrElse(false.B)
-  val prot_r = fastCheck(_.supportsGet) && !deny_access_to_debug && pmp.io.r
-  val prot_w = fastCheck(_.supportsPutFull) && !deny_access_to_debug && pmp.io.w
-  val prot_pp = fastCheck(_.supportsPutPartial)
-  val prot_al = fastCheck(_.supportsLogical)
-  val prot_aa = fastCheck(_.supportsArithmetic)
-  val prot_x = fastCheck(_.executable) && !deny_access_to_debug && pmp.io.x
-  val prot_eff = fastCheck(Seq(RegionType.PUT_EFFECTS, RegionType.GET_EFFECTS) contains _.regionType)
-
-  // hit check
-  val sector_hits = sectored_entries(memIdx).map(_.sectorHit(vpn, priv_v))
-  val superpage_hits = superpage_entries.map(_.hit(vpn, priv_v))
-  val hitsVec = all_entries.map(vm_enabled && _.hit(vpn, priv_v))
-  val real_hits = hitsVec.asUInt
-  val hits = Cat(!vm_enabled, real_hits)
-
-  // use ptw response to refill
-  // permission bit arrays
-  when (do_refill) {
-    val pte = io.ptw.resp.bits.pte
-    val refill_v = r_vstage1_en || r_stage2_en
-    val newEntry = Wire(new TLBEntryData)
-    newEntry.ppn := pte.ppn
-    newEntry.c := cacheable
-    newEntry.u := pte.u
-    newEntry.g := pte.g && pte.v
-    newEntry.ae_ptw := io.ptw.resp.bits.ae_ptw
-    newEntry.ae_final := io.ptw.resp.bits.ae_final
-    newEntry.pf := io.ptw.resp.bits.pf
-    newEntry.gf := io.ptw.resp.bits.gf
-    newEntry.hr := io.ptw.resp.bits.hr
-    newEntry.hw := io.ptw.resp.bits.hw
-    newEntry.hx := io.ptw.resp.bits.hx
-    newEntry.sr := pte.sr()
-    newEntry.sw := pte.sw()
-    newEntry.sx := pte.sx()
-    newEntry.pr := prot_r
-    newEntry.pw := prot_w
-    newEntry.px := prot_x
-    newEntry.ppp := prot_pp
-    newEntry.pal := prot_al
-    newEntry.paa := prot_aa
-    newEntry.eff := prot_eff
-    newEntry.fragmented_superpage := io.ptw.resp.bits.fragmented_superpage
-    // refill special_entry
-    when (special_entry.nonEmpty.B && !io.ptw.resp.bits.homogeneous) {
-      special_entry.foreach(_.insert(r_refill_tag, refill_v, io.ptw.resp.bits.level, newEntry))
-    }.elsewhen (io.ptw.resp.bits.level < (pgLevels-1).U) {
-      val waddr = Mux(r_superpage_hit.valid && usingHypervisor.B, r_superpage_hit.bits, r_superpage_repl_addr)
-      for ((e, i) <- superpage_entries.zipWithIndex) when (r_superpage_repl_addr === i.U) {
-        e.insert(r_refill_tag, refill_v, io.ptw.resp.bits.level, newEntry)
-        when (invalidate_refill) { e.invalidate() }
-      }
-    // refill sectored_hit
-    }.otherwise {
-      val r_memIdx = r_refill_tag.extract(cfg.nSectors.log2 + cfg.nSets.log2 - 1, cfg.nSectors.log2)
-      val waddr = Mux(r_sectored_hit.valid, r_sectored_hit.bits, r_sectored_repl_addr)
-      for ((e, i) <- sectored_entries(r_memIdx).zipWithIndex) when (waddr === i.U) {
-        when (!r_sectored_hit.valid) { e.invalidate() }
-        e.insert(r_refill_tag, refill_v, 0.U, newEntry)
-        when (invalidate_refill) { e.invalidate() }
-      }
-    }
-
-    r_gpa_valid := io.ptw.resp.bits.gpa.valid
-    r_gpa := io.ptw.resp.bits.gpa.bits
-    r_gpa_is_pte := io.ptw.resp.bits.gpa_is_pte
-  }
-
-  // get all entries data.
-  val entries = all_entries.map(_.getData(vpn))
-  val normal_entries = entries.take(ordinary_entries.size)
-  // parallel query PPN from [[all_entries]], if VM not enabled return VPN instead
-  val ppn = Mux1H(hitsVec :+ !vm_enabled, (all_entries zip entries).map{ case (entry, data) => entry.ppn(vpn, data) } :+ vpn(ppnBits-1, 0))
-
-  val nPhysicalEntries = 1 + special_entry.size
-  // generally PTW misaligned load exception.
-  val ptw_ae_array = Cat(false.B, entries.map(_.ae_ptw).asUInt)
-  val final_ae_array = Cat(false.B, entries.map(_.ae_final).asUInt)
-  val ptw_pf_array = Cat(false.B, entries.map(_.pf).asUInt)
-  val ptw_gf_array = Cat(false.B, entries.map(_.gf).asUInt)
-  val sum = Mux(priv_v, io.ptw.gstatus.sum, io.ptw.status.sum)
-  // if in hypervisor/machine mode, cannot read/write user entries.
-  // if in superviosr/user mode, "If the SUM bit in the sstatus register is set, supervisor mode software may also access pages with U=1.(from spec)"
-  val priv_rw_ok = Mux(!priv_s || sum, entries.map(_.u).asUInt, 0.U) | Mux(priv_s, ~entries.map(_.u).asUInt, 0.U)
-  // if in hypervisor/machine mode, other than user pages, all pages are executable.
-  // if in superviosr/user mode, only user page can execute.
-  val priv_x_ok = Mux(priv_s, ~entries.map(_.u).asUInt, entries.map(_.u).asUInt)
-  val stage1_bypass = Fill(entries.size, usingHypervisor.B && !stage1_en)
-  val mxr = io.ptw.status.mxr | Mux(priv_v, io.ptw.gstatus.mxr, false.B)
-  // "The vsstatus field MXR, which makes execute-only pages readable, only overrides VS-stage page protection.(from spec)"
-  val r_array = Cat(true.B, (priv_rw_ok & (entries.map(_.sr).asUInt | Mux(mxr, entries.map(_.sx).asUInt, 0.U))) | stage1_bypass)
-  val w_array = Cat(true.B, (priv_rw_ok & entries.map(_.sw).asUInt) | stage1_bypass)
-  val x_array = Cat(true.B, (priv_x_ok & entries.map(_.sx).asUInt) | stage1_bypass)
-  val stage2_bypass = Fill(entries.size, !stage2_en)
-  val hr_array = Cat(true.B, entries.map(_.hr).asUInt | Mux(io.ptw.status.mxr, entries.map(_.hx).asUInt, 0.U) | stage2_bypass)
-  val hw_array = Cat(true.B, entries.map(_.hw).asUInt | stage2_bypass)
-  val hx_array = Cat(true.B, entries.map(_.hx).asUInt | stage2_bypass)
-  // These array is for each TLB entries.
-  // user mode can read: PMA OK, TLB OK, AE OK
-  val pr_array = Cat(Fill(nPhysicalEntries, prot_r), normal_entries.map(_.pr).asUInt) & ~(ptw_ae_array | final_ae_array)
-  // user mode can write: PMA OK, TLB OK, AE OK
-  val pw_array = Cat(Fill(nPhysicalEntries, prot_w), normal_entries.map(_.pw).asUInt) & ~(ptw_ae_array | final_ae_array)
-  // user mode can write: PMA OK, TLB OK, AE OK
-  val px_array = Cat(Fill(nPhysicalEntries, prot_x), normal_entries.map(_.px).asUInt) & ~(ptw_ae_array | final_ae_array)
-  // put effect
-  val eff_array = Cat(Fill(nPhysicalEntries, prot_eff), normal_entries.map(_.eff).asUInt)
-  // cacheable
-  val c_array = Cat(Fill(nPhysicalEntries, cacheable), normal_entries.map(_.c).asUInt)
-  // put partial
-  val ppp_array = Cat(Fill(nPhysicalEntries, prot_pp), normal_entries.map(_.ppp).asUInt)
-  // atomic arithmetic
-  val paa_array = Cat(Fill(nPhysicalEntries, prot_aa), normal_entries.map(_.paa).asUInt)
-  // atomic logic
-  val pal_array = Cat(Fill(nPhysicalEntries, prot_al), normal_entries.map(_.pal).asUInt)
-  val ppp_array_if_cached = ppp_array | c_array
-  val paa_array_if_cached = paa_array | (if(usingAtomicsInCache) c_array else 0.U)
-  val pal_array_if_cached = pal_array | (if(usingAtomicsInCache) c_array else 0.U)
-  val prefetchable_array = Cat((cacheable && homogeneous) << (nPhysicalEntries-1), normal_entries.map(_.c).asUInt)
-
-  // vaddr misaligned: vaddr[1:0]=b00
-  val misaligned = (io.req.bits.vaddr & (UIntToOH(io.req.bits.size) - 1.U)).orR
-  def badVA(guestPA: Boolean): Bool = {
-    val additionalPgLevels = (if (guestPA) io.ptw.hgatp else satp).additionalPgLevels
-    val extraBits = if (guestPA) hypervisorExtraAddrBits else 0
-    val signed = !guestPA
-    val nPgLevelChoices = pgLevels - minPgLevels + 1
-    val minVAddrBits = pgIdxBits + minPgLevels * pgLevelBits + extraBits
-    (for (i <- 0 until nPgLevelChoices) yield {
-      val mask = ((BigInt(1) << vaddrBitsExtended) - (BigInt(1) << (minVAddrBits + i * pgLevelBits - signed.toInt))).U
-      val maskedVAddr = io.req.bits.vaddr & mask
-      additionalPgLevels === i.U && !(maskedVAddr === 0.U || signed.B && maskedVAddr === mask)
-    }).orR
-  }
-  val bad_gpa =
-    if (!usingHypervisor) false.B
-    else vm_enabled && !stage1_en && badVA(true)
-  val bad_va =
-    if (!usingVM || (minPgLevels == pgLevels && vaddrBits == vaddrBitsExtended)) false.B
-    else vm_enabled && stage1_en && badVA(false)
-
-  val cmd_lrsc = usingAtomics.B && io.req.bits.cmd.isOneOf(M_XLR, M_XSC)
-  val cmd_amo_logical = usingAtomics.B && isAMOLogical(io.req.bits.cmd)
-  val cmd_amo_arithmetic = usingAtomics.B && isAMOArithmetic(io.req.bits.cmd)
-  val cmd_put_partial = io.req.bits.cmd === M_PWR
-  val cmd_read = isRead(io.req.bits.cmd)
-  val cmd_readx = usingHypervisor.B && io.req.bits.cmd === M_HLVX
-  val cmd_write = isWrite(io.req.bits.cmd)
-  val cmd_write_perms = cmd_write ||
-    io.req.bits.cmd.isOneOf(M_FLUSH_ALL, M_WOK) // not a write, but needs write permissions
-
-  val lrscAllowed = Mux((usingDataScratchpad || usingAtomicsOnlyForIO).B, 0.U, c_array)
-  val ae_array =
-    Mux(misaligned, eff_array, 0.U) |
-    Mux(cmd_lrsc, ~lrscAllowed, 0.U)
-
-  // access exception needs SoC information from PMA
-  val ae_ld_array = Mux(cmd_read, ae_array | ~pr_array, 0.U)
-  val ae_st_array =
-    Mux(cmd_write_perms, ae_array | ~pw_array, 0.U) |
-    Mux(cmd_put_partial, ~ppp_array_if_cached, 0.U) |
-    Mux(cmd_amo_logical, ~pal_array_if_cached, 0.U) |
-    Mux(cmd_amo_arithmetic, ~paa_array_if_cached, 0.U)
-  val must_alloc_array =
-    Mux(cmd_put_partial, ~ppp_array, 0.U) |
-    Mux(cmd_amo_logical, ~paa_array, 0.U) |
-    Mux(cmd_amo_arithmetic, ~pal_array, 0.U) |
-    Mux(cmd_lrsc, ~0.U(pal_array.getWidth.W), 0.U)
-  val pf_ld_array = Mux(cmd_read, ((~Mux(cmd_readx, x_array, r_array) & ~ptw_ae_array) | ptw_pf_array) & ~ptw_gf_array, 0.U)
-  val pf_st_array = Mux(cmd_write_perms, ((~w_array & ~ptw_ae_array) | ptw_pf_array) & ~ptw_gf_array, 0.U)
-  val pf_inst_array = ((~x_array & ~ptw_ae_array) | ptw_pf_array) & ~ptw_gf_array
-  val gf_ld_array = Mux(priv_v && cmd_read, ~Mux(cmd_readx, hx_array, hr_array) & ~ptw_ae_array, 0.U)
-  val gf_st_array = Mux(priv_v && cmd_write_perms, ~hw_array & ~ptw_ae_array, 0.U)
-  val gf_inst_array = Mux(priv_v, ~hx_array & ~ptw_ae_array, 0.U)
-
-  val gpa_hits = {
-    val need_gpa_mask = if (instruction) gf_inst_array else gf_ld_array | gf_st_array
-    val hit_mask = Fill(ordinary_entries.size, r_gpa_valid && r_gpa_vpn === vpn) | Fill(all_entries.size, !vstage1_en)
-    hit_mask | ~need_gpa_mask(all_entries.size-1, 0)
-  }
-
-  val tlb_hit_if_not_gpa_miss = real_hits.orR
-  val tlb_hit = (real_hits & gpa_hits).orR
-  // leads to s_request
-  val tlb_miss = vm_enabled && !vsatp_mode_mismatch && !bad_va && !tlb_hit
-
-  val sectored_plru = new SetAssocLRU(cfg.nSets, sectored_entries.head.size, "plru")
-  val superpage_plru = new PseudoLRU(superpage_entries.size)
-  when (io.req.valid && vm_enabled) {
-    // replace
-    when (sector_hits.orR) { sectored_plru.access(memIdx, OHToUInt(sector_hits)) }
-    when (superpage_hits.orR) { superpage_plru.access(OHToUInt(superpage_hits)) }
-  }
-
-  // Superpages create the possibility that two entries in the TLB may match.
-  // This corresponds to a software bug, but we can't return complete garbage;
-  // we must return either the old translation or the new translation.  This
-  // isn't compatible with the Mux1H approach.  So, flush the TLB and report
-  // a miss on duplicate entries.
-  val multipleHits = PopCountAtLeast(real_hits, 2)
-
-  // only pull up req.ready when this is s_ready state.
-  io.req.ready := state === s_ready
-  // page fault
-  io.resp.pf.ld := (bad_va && cmd_read) || (pf_ld_array & hits).orR
-  io.resp.pf.st := (bad_va && cmd_write_perms) || (pf_st_array & hits).orR
-  io.resp.pf.inst := bad_va || (pf_inst_array & hits).orR
-  // guest page fault
-  io.resp.gf.ld := (bad_gpa && cmd_read) || (gf_ld_array & hits).orR
-  io.resp.gf.st := (bad_gpa && cmd_write_perms) || (gf_st_array & hits).orR
-  io.resp.gf.inst := bad_gpa || (gf_inst_array & hits).orR
-  // access exception
-  io.resp.ae.ld := (ae_ld_array & hits).orR
-  io.resp.ae.st := (ae_st_array & hits).orR
-  io.resp.ae.inst := (~px_array & hits).orR
-  // misaligned
-  io.resp.ma.ld := misaligned && cmd_read
-  io.resp.ma.st := misaligned && cmd_write
-  io.resp.ma.inst := false.B // this is up to the pipeline to figure out
-  io.resp.cacheable := (c_array & hits).orR
-  io.resp.must_alloc := (must_alloc_array & hits).orR
-  io.resp.prefetchable := (prefetchable_array & hits).orR && edge.manager.managers.forall(m => !m.supportsAcquireB || m.supportsHint).B
-  io.resp.miss := do_refill || vsatp_mode_mismatch || tlb_miss || multipleHits
-  io.resp.paddr := Cat(ppn, io.req.bits.vaddr(pgIdxBits-1, 0))
-  io.resp.gpa_is_pte := vstage1_en && r_gpa_is_pte
-  io.resp.gpa := {
-    val page = Mux(!vstage1_en, Cat(bad_gpa, vpn), r_gpa >> pgIdxBits)
-    val offset = Mux(io.resp.gpa_is_pte, r_gpa(pgIdxBits-1, 0), io.req.bits.vaddr(pgIdxBits-1, 0))
-    Cat(page, offset)
-  }
-
-  io.ptw.req.valid := state === s_request
-  io.ptw.req.bits.valid := !io.kill
-  io.ptw.req.bits.bits.addr := r_refill_tag
-  io.ptw.req.bits.bits.vstage1 := r_vstage1_en
-  io.ptw.req.bits.bits.stage2 := r_stage2_en
-  io.ptw.req.bits.bits.need_gpa := r_need_gpa
-
-  if (usingVM) {
-    when(io.ptw.req.fire && io.ptw.req.bits.valid) {
-      r_gpa_valid := false.B
-      r_gpa_vpn   := r_refill_tag
-    }
-
-    val sfence = io.sfence.valid
-    // this is [[s_ready]]
-    // handle miss/hit at the first cycle.
-    // if miss, request PTW(L2TLB).
-    when (io.req.fire && tlb_miss) {
-      state := s_request
-      r_refill_tag := vpn
-      r_need_gpa := tlb_hit_if_not_gpa_miss
-      r_vstage1_en := vstage1_en
-      r_stage2_en := stage2_en
-      r_superpage_repl_addr := replacementEntry(superpage_entries, superpage_plru.way)
-      r_sectored_repl_addr := replacementEntry(sectored_entries(memIdx), sectored_plru.way(memIdx))
-      r_sectored_hit.valid := sector_hits.orR
-      r_sectored_hit.bits := OHToUInt(sector_hits)
-      r_superpage_hit.valid := superpage_hits.orR
-      r_superpage_hit.bits := OHToUInt(superpage_hits)
-    }
-    // Handle SFENCE.VMA when send request to PTW.
-    // SFENCE.VMA    io.ptw.req.ready     kill
-    //       ?                 ?            1
-    //       0                 0            0
-    //       0                 1            0 -> s_wait
-    //       1                 0            0 -> s_wait_invalidate
-    //       1                 0            0 -> s_ready
-    when (state === s_request) {
-      // SFENCE.VMA will kill TLB entries based on rs1 and rs2. It will take 1 cycle.
-      when (sfence) { state := s_ready }
-      // here should be io.ptw.req.fire, but assert(io.ptw.req.ready === true.B)
-      // fire -> s_wait
-      when (io.ptw.req.ready) { state := Mux(sfence, s_wait_invalidate, s_wait) }
-      // If CPU kills request(frontend.s2_redirect)
-      when (io.kill) { state := s_ready }
-    }
-    // sfence in refill will results in invalidate
-    when (state === s_wait && sfence) {
-      state := s_wait_invalidate
-    }
-    // after CPU acquire response, go back to s_ready.
-    when (io.ptw.resp.valid) {
-      state := s_ready
-    }
-
-    // SFENCE processing logic.
-    when (sfence) {
-      assert(!io.sfence.bits.rs1 || (io.sfence.bits.addr >> pgIdxBits) === vpn)
-      for (e <- all_real_entries) {
-        val hv = usingHypervisor.B && io.sfence.bits.hv
-        val hg = usingHypervisor.B && io.sfence.bits.hg
-        when (!hg && io.sfence.bits.rs1) { e.invalidateVPN(vpn, hv) }
-        .elsewhen (!hg && io.sfence.bits.rs2) { e.invalidateNonGlobal(hv) }
-        .otherwise { e.invalidate(hv || hg) }
-      }
-    }
-    when(io.req.fire && vsatp_mode_mismatch) {
-      all_real_entries.foreach(_.invalidate(true.B))
-      v_entries_use_stage1 := vstage1_en
-    }
-    when (multipleHits || reset.asBool) {
-      all_real_entries.foreach(_.invalidate())
-    }
-
-    ccover(io.ptw.req.fire, "MISS", "TLB miss")
-    ccover(io.ptw.req.valid && !io.ptw.req.ready, "PTW_STALL", "TLB miss, but PTW busy")
-    ccover(state === s_wait_invalidate, "SFENCE_DURING_REFILL", "flush TLB during TLB refill")
-    ccover(sfence && !io.sfence.bits.rs1 && !io.sfence.bits.rs2, "SFENCE_ALL", "flush TLB")
-    ccover(sfence && !io.sfence.bits.rs1 && io.sfence.bits.rs2, "SFENCE_ASID", "flush TLB ASID")
-    ccover(sfence && io.sfence.bits.rs1 && !io.sfence.bits.rs2, "SFENCE_LINE", "flush TLB line")
-    ccover(sfence && io.sfence.bits.rs1 && io.sfence.bits.rs2, "SFENCE_LINE_ASID", "flush TLB line/ASID")
-    ccover(multipleHits, "MULTIPLE_HITS", "Two matching translations in TLB")
-  }
-
-  def ccover(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) =
-    property.cover(cond, s"${if (instruction) "I" else "D"}TLB_$label", "MemorySystem;;" + desc)
-  /** Decides which entry to be replaced
-    *
-    * If there is a invalid entry, replace it with priorityencoder;
-    * if not, replace the alt entry
-    *
-    * @return mask for TLBEntry replacement
-    */
-  def replacementEntry(set: Seq[TLBEntry], alt: UInt) = {
-    val valids = set.map(_.valid.orR).asUInt
-    Mux(valids.andR, alt, PriorityEncoder(~valids))
-  }
-}
diff --git a/diplomatic/src/rocket/TLBPermissions.scala b/diplomatic/src/rocket/TLBPermissions.scala
deleted file mode 100644
index 26c7c055e..000000000
--- a/diplomatic/src/rocket/TLBPermissions.scala
+++ /dev/null
@@ -1,114 +0,0 @@
-// See LICENSE.SiFive for license details.
-
-package org.chipsalliance.rocket
-
-import chisel3._
-import chisel3.util.isPow2
-
-import freechips.rocketchip.diplomacy._
-import freechips.rocketchip.tilelink._
-
-case class TLBPermissions(
-  homogeneous: Bool, // if false, the below are undefined
-  r: Bool, // readable
-  w: Bool, // writeable
-  x: Bool, // executable
-  c: Bool, // cacheable
-  a: Bool, // arithmetic ops
-  l: Bool) // logical ops
-
-object TLBPageLookup
-{
-  private case class TLBFixedPermissions(
-    e: Boolean, // get-/put-effects
-    r: Boolean, // readable
-    w: Boolean, // writeable
-    x: Boolean, // executable
-    c: Boolean, // cacheable
-    a: Boolean, // arithmetic ops
-    l: Boolean) { // logical ops
-    val useful = r || w || x || c || a || l
-  }
-
-  private def groupRegions(managers: Seq[TLManagerParameters]): Map[TLBFixedPermissions, Seq[AddressSet]] = {
-    val permissions = managers.map { m =>
-      (m.address, TLBFixedPermissions(
-        e = Seq(RegionType.PUT_EFFECTS, RegionType.GET_EFFECTS) contains m.regionType,
-        r = m.supportsGet     || m.supportsAcquireB, // if cached, never uses Get
-        w = m.supportsPutFull || m.supportsAcquireT, // if cached, never uses Put
-        x = m.executable,
-        c = m.supportsAcquireB,
-        a = m.supportsArithmetic,
-        l = m.supportsLogical))
-    }
-
-    permissions
-      .filter(_._2.useful) // get rid of no-permission devices
-      .groupBy(_._2) // group by permission type
-      .mapValues(seq =>
-        AddressSet.unify(seq.flatMap(_._1))) // coalesce same-permission regions
-      .toMap
-  }
-
-  // Unmapped memory is considered to be inhomogeneous
-  def apply(managers: Seq[TLManagerParameters], xLen: Int, cacheBlockBytes: Int, pageSize: BigInt): UInt => TLBPermissions = {
-    require (isPow2(xLen) && xLen >= 8)
-    require (isPow2(cacheBlockBytes) && cacheBlockBytes >= xLen/8)
-    require (isPow2(pageSize) && pageSize >= cacheBlockBytes)
-
-    val xferSizes = TransferSizes(cacheBlockBytes, cacheBlockBytes)
-    val allSizes = TransferSizes(1, cacheBlockBytes)
-    val amoSizes = TransferSizes(4, xLen/8)
-
-    val permissions = managers.foreach { m =>
-      require (!m.supportsGet        || m.supportsGet       .contains(allSizes),  s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsGet} Get, but must support ${allSizes}")
-      require (!m.supportsPutFull    || m.supportsPutFull   .contains(allSizes),  s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsPutFull} PutFull, but must support ${allSizes}")
-      require (!m.supportsPutPartial || m.supportsPutPartial.contains(allSizes),  s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsPutPartial} PutPartial, but must support ${allSizes}")
-      require (!m.supportsAcquireB   || m.supportsAcquireB  .contains(xferSizes), s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsAcquireB} AcquireB, but must support ${xferSizes}")
-      require (!m.supportsAcquireT   || m.supportsAcquireT  .contains(xferSizes), s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsAcquireT} AcquireT, but must support ${xferSizes}")
-      require (!m.supportsLogical    || m.supportsLogical   .contains(amoSizes),  s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsLogical} Logical, but must support ${amoSizes}")
-      require (!m.supportsArithmetic || m.supportsArithmetic.contains(amoSizes),  s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsArithmetic} Arithmetic, but must support ${amoSizes}")
-      require (!(m.supportsAcquireB && m.supportsPutFull && !m.supportsAcquireT), s"Memory region '${m.name}' supports AcquireB (cached read) and PutFull (un-cached write) but not AcquireT (cached write)")
-    }
-
-    val grouped = groupRegions(managers)
-      .mapValues(_.filter(_.alignment >= pageSize)) // discard any region that's not big enough
-
-    def lowCostProperty(prop: TLBFixedPermissions => Boolean): UInt => Bool = {
-      val (yesm, nom) = grouped.partition { case (k, eq) => prop(k) }
-      val (yes, no) = (yesm.values.flatten.toList, nom.values.flatten.toList)
-      // Find the minimal bits needed to distinguish between yes and no
-      val decisionMask = AddressDecoder(Seq(yes, no))
-      def simplify(x: Seq[AddressSet]) = AddressSet.unify(x.map(_.widen(~decisionMask)).distinct)
-      val (yesf, nof) = (simplify(yes), simplify(no))
-      if (yesf.size < no.size) {
-        (x: UInt) => yesf.map(_.contains(x)).foldLeft(false.B)(_ || _)
-      } else {
-        (x: UInt) => !nof.map(_.contains(x)).foldLeft(false.B)(_ || _)
-      }
-    }
-
-    // Derive simplified property circuits (don't care when !homo)
-    val rfn = lowCostProperty(_.r)
-    val wfn = lowCostProperty(_.w)
-    val xfn = lowCostProperty(_.x)
-    val cfn = lowCostProperty(_.c)
-    val afn = lowCostProperty(_.a)
-    val lfn = lowCostProperty(_.l)
-
-    val homo = AddressSet.unify(grouped.values.flatten.toList)
-    (x: UInt) => TLBPermissions(
-      homogeneous = homo.map(_.contains(x)).foldLeft(false.B)(_ || _),
-      r = rfn(x),
-      w = wfn(x),
-      x = xfn(x),
-      c = cfn(x),
-      a = afn(x),
-      l = lfn(x))
-  }
-
-  // Are all pageSize intervals of mapped regions homogeneous?
-  def homogeneous(managers: Seq[TLManagerParameters], pageSize: BigInt): Boolean = {
-    groupRegions(managers).values.forall(_.forall(_.alignment >= pageSize))
-  }
-}
diff --git a/rocket/src/HellaCache.scala b/rocket/src/HellaCache.scala
index a243ddc4b..cd3604e3e 100644
--- a/rocket/src/HellaCache.scala
+++ b/rocket/src/HellaCache.scala
@@ -84,7 +84,7 @@ class HellaCacheWriteData(coreDataBits: Int, coreDataBytes: Int) extends Bundle
   val mask = UInt(coreDataBytes.W)
 }
 
-class HellaCacheResp(coreDataBits: Int, coreDataBytes: Int) extends Bundle {
+class HellaCacheResp(coreDataBits: Int, coreDataBytes: Int, dcacheReqTagBits: Int, dcacheArbPorts: Int) extends Bundle {
   val replay = Bool()
   val has_data = Bool()
   val data_word_bypass = UInt(coreDataBits.W)
@@ -92,6 +92,8 @@ class HellaCacheResp(coreDataBits: Int, coreDataBytes: Int) extends Bundle {
   val store_data = UInt(coreDataBits.W)
   val data = UInt(coreDataBits.W)
   val mask = UInt(coreDataBytes.W)
+  val tag  = UInt((dcacheReqTagBits + log2Ceil(dcacheArbPorts)).W)
+  val size = UInt(log2Ceil(coreDataBytes.log2 + 1).W)
 }
 
 class AlignmentExceptions extends Bundle {
@@ -161,12 +163,12 @@ class HellaCacheIO(
   val s2_uncached = Input(Bool()) // advisory signal that the access is MMIO
   val s2_paddr = Input(UInt(paddrBits.W)) // translated address
 
-  val resp = Flipped(Valid(new HellaCacheResp(coreDataBits, coreDataBytes)))
+  val resp = Flipped(Valid(new HellaCacheResp(coreDataBits, coreDataBytes, dcacheReqTagBits, dcacheArbPorts)))
   val replay_next = Input(Bool())
   val s2_xcpt = Input(new HellaCacheExceptions)
   val s2_gpa = Input(UInt(vaddrBitsExtended.W))
   val s2_gpa_is_pte = Input(Bool())
-  val uncached_resp = Option.when(separateUncachedResp)(Flipped(Decoupled(new HellaCacheResp(coreDataBits, coreDataBytes))))
+  val uncached_resp = Option.when(separateUncachedResp)(Flipped(Decoupled(new HellaCacheResp(coreDataBits, coreDataBytes, dcacheReqTagBits, dcacheArbPorts))))
   val ordered = Input(Bool())
   val perf = Input(new HellaCachePerfEvents())
 

From 8ee51692ee3adec883fec1c1f011c62c25d27247 Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Tue, 6 Jun 2023 16:09:32 +0800
Subject: [PATCH 31/32] Update .gitignore

---
 .gitignore | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 898f111c9..d8fceba30 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,10 @@
 out/
 .*.sw[a-p]
-.bsp
-.idea
\ No newline at end of file
+.bsp/
+.idea/
+.bloop/
+.metals/
+.vscode/
+.scala-build/
+out/
+venv/
\ No newline at end of file

From f9bc252ad8f9d55a8368218a6b5190cf00b4d12f Mon Sep 17 00:00:00 2001
From: Takehana <cowpowermax@pm.me>
Date: Mon, 12 Jun 2023 17:06:59 +0800
Subject: [PATCH 32/32] Add CoreMonitorBundle

---
 rocket/src/util/CoreMonitorBundle.scala | 28 +++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 rocket/src/util/CoreMonitorBundle.scala

diff --git a/rocket/src/util/CoreMonitorBundle.scala b/rocket/src/util/CoreMonitorBundle.scala
new file mode 100644
index 000000000..37ebdf51a
--- /dev/null
+++ b/rocket/src/util/CoreMonitorBundle.scala
@@ -0,0 +1,28 @@
+// See LICENSE.Berkeley for license details.
+// See LICENSE.SiFive for license details.
+
+package org.chipsalliance.rocket.util
+
+import chisel3._
+
+// this bundle is used to expose some internal core signals
+// to verification monitors which sample instruction commits
+class CoreMonitorBundle(val xLen: Int, val fLen: Int) extends Bundle {
+  val excpt = Bool()
+  val priv_mode = UInt(width = 3.W)
+  val hartid = UInt(width = xLen.W)
+  val timer = UInt(width = 32.W)
+  val valid = Bool()
+  val pc = UInt(width = xLen.W)
+  val wrdst = UInt(width = 5.W)
+  val wrdata = UInt(width = (xLen max fLen).W)
+  val wrenx = Bool()
+  val wrenf = Bool()
+  @deprecated("replace wren with wrenx or wrenf to specify integer or floating point","Rocket Chip 2020.05")
+  def wren: Bool = wrenx || wrenf
+  val rd0src = UInt(width = 5.W)
+  val rd0val = UInt(width = xLen.W)
+  val rd1src = UInt(width = 5.W)
+  val rd1val = UInt(width = xLen.W)
+  val inst = UInt(width = 32.W)
+}