diff --git a/.gitignore b/.gitignore index d04d200c6..ba9fc93f9 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,4 @@ out/ .bloop/ .metals/ .vscode/ -.scala-build/ +.scala-build/ \ No newline at end of file diff --git a/diplomatic/src/rocket/BaseTile.scala b/diplomatic/src/rocket/BaseTile.scala index b7aea6e74..ef6f8c582 100644 --- a/diplomatic/src/rocket/BaseTile.scala +++ b/diplomatic/src/rocket/BaseTile.scala @@ -13,6 +13,7 @@ import freechips.rocketchip.interrupts._ import freechips.rocketchip.tilelink._ import freechips.rocketchip.util._ import freechips.rocketchip.prci.{ClockSinkParameters} +import freechips.rocketchip.rocket.{PgLevels, ASIdBits, VMIdBits, TracedInstruction, TraceAux} case object TileVisibilityNodeKey extends Field[TLEphemeralNode] case object TileKey extends Field[TileParams] diff --git a/diplomatic/src/rocket/CSR.scala b/diplomatic/src/rocket/CSR.scala deleted file mode 100644 index 51da1e5b9..000000000 --- a/diplomatic/src/rocket/CSR.scala +++ /dev/null @@ -1,1515 +0,0 @@ -// See LICENSE.SiFive for license details. -// See LICENSE.Berkeley for license details. - -package org.chipsalliance.rocket - -import chisel3._ -import chisel3.util.{BitPat, Cat, Fill, Mux1H, PopCount, PriorityMux, RegEnable, UIntToOH, Valid, log2Ceil, log2Up} -import org.chipsalliance.cde.config.Parameters -import freechips.rocketchip.devices.debug.DebugModuleKey -import org.chipsalliance.rockettile._ -import freechips.rocketchip.util._ -import freechips.rocketchip.util.property - -import scala.collection.mutable.LinkedHashMap -import Instructions._ -import Instructions64._ -import CustomInstructions._ - -class MStatus extends Bundle { - // not truly part of mstatus, but convenient - val debug = Bool() - val cease = Bool() - val wfi = Bool() - val isa = UInt(32.W) - - val dprv = UInt(PRV.SZ.W) // effective prv for data accesses - val dv = Bool() // effective v for data accesses - val prv = UInt(PRV.SZ.W) - val v = Bool() - - val sd = Bool() - val zero2 = UInt(23.W) - val mpv = Bool() - val gva = Bool() - val mbe = Bool() - val sbe = Bool() - val sxl = UInt(2.W) - val uxl = UInt(2.W) - val sd_rv32 = Bool() - val zero1 = UInt(8.W) - val tsr = Bool() - val tw = Bool() - val tvm = Bool() - val mxr = Bool() - val sum = Bool() - val mprv = Bool() - val xs = UInt(2.W) - val fs = UInt(2.W) - val mpp = UInt(2.W) - val vs = UInt(2.W) - val spp = UInt(1.W) - val mpie = Bool() - val ube = Bool() - val spie = Bool() - val upie = Bool() - val mie = Bool() - val hie = Bool() - val sie = Bool() - val uie = Bool() -} - -class MNStatus extends Bundle { - val mpp = UInt(2.W) - val zero3 = UInt(3.W) - val mpv = Bool() - val zero2 = UInt(3.W) - val mie = Bool() - val zero1 = UInt(3.W) -} - -class HStatus extends Bundle { - val zero6 = UInt(30.W) - val vsxl = UInt(2.W) - val zero5 = UInt(9.W) - val vtsr = Bool() - val vtw = Bool() - val vtvm = Bool() - val zero3 = UInt(2.W) - val vgein = UInt(6.W) - val zero2 = UInt(2.W) - val hu = Bool() - val spvp = Bool() - val spv = Bool() - val gva = Bool() - val vsbe = Bool() - val zero1 = UInt(5.W) -} - -class DCSR extends Bundle { - val xdebugver = UInt(2.W) - val zero4 = UInt(2.W) - val zero3 = UInt(12.W) - val ebreakm = Bool() - val ebreakh = Bool() - val ebreaks = Bool() - val ebreaku = Bool() - val zero2 = Bool() - val stopcycle = Bool() - val stoptime = Bool() - val cause = UInt(3.W) - val v = Bool() - val zero1 = UInt(2.W) - val step = Bool() - val prv = UInt(PRV.SZ.W) -} - -class MIP(implicit p: Parameters) extends CoreBundle()(p) - with HasCoreParameters { - val lip = Vec(coreParams.nLocalInterrupts, Bool()) - val zero1 = Bool() - val debug = Bool() // keep in sync with CSR.debugIntCause - val rocc = Bool() - val sgeip = Bool() - val meip = Bool() - val vseip = Bool() - val seip = Bool() - val ueip = Bool() - val mtip = Bool() - val vstip = Bool() - val stip = Bool() - val utip = Bool() - val msip = Bool() - val vssip = Bool() - val ssip = Bool() - val usip = Bool() -} - -class PTBR(implicit p: Parameters) extends CoreBundle()(p) { - def additionalPgLevels = mode.extract(log2Ceil(pgLevels-minPgLevels+1)-1, 0) - def pgLevelsToMode(i: Int) = (xLen, i) match { - case (32, 2) => 1 - case (64, x) if x >= 3 && x <= 6 => x + 5 - } - val (modeBits, maxASIdBits) = xLen match { - case 32 => (1, 9) - case 64 => (4, 16) - } - require(modeBits + maxASIdBits + maxPAddrBits - pgIdxBits == xLen) - - val mode = UInt(modeBits.W) - val asid = UInt(maxASIdBits.W) - val ppn = UInt((maxPAddrBits - pgIdxBits).W) -} - - -class PerfCounterIO(implicit p: Parameters) extends CoreBundle - with HasCoreParameters { - val eventSel = Output(UInt(xLen.W)) - val inc = Input(UInt(log2Ceil(1+retireWidth).W)) -} - -class TracedInstruction(implicit p: Parameters) extends CoreBundle { - val valid = Bool() - val iaddr = UInt(coreMaxAddrBits.W) - val insn = UInt(iLen.W) - val priv = UInt(3.W) - val exception = Bool() - val interrupt = Bool() - val cause = UInt(xLen.W) - val tval = UInt((coreMaxAddrBits max iLen).W) - val wdata = Option.when(traceHasWdata)(UInt((vLen max xLen).W)) -} - -class TraceAux extends Bundle { - val enable = Bool() - val stall = Bool() -} - -class CSRDecodeIO(implicit p: Parameters) extends CoreBundle { - val inst = Input(UInt(iLen.W)) - - def csr_addr = (inst >> 20)(CSR.ADDRSZ-1, 0) - - val fp_illegal = Output(Bool()) - val vector_illegal = Output(Bool()) - val fp_csr = Output(Bool()) - val rocc_illegal = Output(Bool()) - val read_illegal = Output(Bool()) - val write_illegal = Output(Bool()) - val write_flush = Output(Bool()) - val system_illegal = Output(Bool()) - val virtual_access_illegal = Output(Bool()) - val virtual_system_illegal = Output(Bool()) -} - -class CSRFileIO(implicit p: Parameters) extends CoreBundle - with HasCoreParameters { - val ungated_clock = Input(Clock()) - val interrupts = Input(new CoreInterrupts()) - val hartid = Input(UInt(hartIdLen.W)) - val rw = new Bundle { - val addr = Input(UInt(CSR.ADDRSZ.W)) - val cmd = Input(Bits(CSR.SZ.W)) - val rdata = Output(Bits(xLen.W)) - val wdata = Input(Bits(xLen.W)) - } - - val decode = Vec(decodeWidth, new CSRDecodeIO) - - val csr_stall = Output(Bool()) - val eret = Output(Bool()) - val singleStep = Output(Bool()) - - val status = Output(new MStatus()) - val hstatus = Output(new HStatus()) - val gstatus = Output(new MStatus()) - val ptbr = Output(new PTBR()) - val hgatp = Output(new PTBR()) - val vsatp = Output(new PTBR()) - val evec = Output(UInt(vaddrBitsExtended.W)) - val exception = Input(Bool()) - val retire = Input(UInt(log2Up(1+retireWidth).W)) - val cause = Input(UInt(xLen.W)) - val pc = Input(UInt(vaddrBitsExtended.W)) - val tval = Input(UInt(vaddrBitsExtended.W)) - val htval = Input(UInt(((maxSVAddrBits + 1) min xLen).W)) - val gva = Input(Bool()) - val time = Output(UInt(xLen.W)) - val fcsr_rm = Output(Bits(FPConstants.RM_SZ.W)) - val fcsr_flags = Flipped(Valid(Bits(FPConstants.FLAGS_SZ.W))) - val set_fs_dirty = coreParams.haveFSDirty.option(Input(Bool())) - val rocc_interrupt = Input(Bool()) - val interrupt = Output(Bool()) - val interrupt_cause = Output(UInt(xLen.W)) - val bp = Output(Vec(nBreakpoints, new BP)) - val pmp = Output(Vec(nPMPs, new PMP(paddrBits, pmpGranularity, pgIdxBits, pgLevels, pgLevelBits))) - val counters = Vec(nPerfCounters, new PerfCounterIO) - val csrw_counter = Output(UInt(CSR.nCtr.W)) - val inhibit_cycle = Output(Bool()) - val inst = Input(Vec(retireWidth, UInt(iLen.W))) - val trace = Output(Vec(retireWidth, new TracedInstruction)) - val mcontext = Output(UInt(coreParams.mcontextWidth.W)) - val scontext = Output(UInt(coreParams.scontextWidth.W)) - - val vector = usingVector.option(new Bundle { - val vconfig = Output(new VConfig()) - val vstart = Output(UInt(maxVLMax.log2.W)) - val vxrm = Output(UInt(2.W)) - val set_vs_dirty = Input(Bool()) - val set_vconfig = Flipped(Valid(new VConfig)) - val set_vstart = Flipped(Valid(vstart)) - val set_vxsat = Input(Bool()) - }) -} - -class VConfig(implicit p: Parameters) extends CoreBundle { - val vl = UInt((maxVLMax.log2 + 1).W) - val vtype = new VType -} - -object VType { - def fromUInt(that: UInt, ignore_vill: Boolean = false)(implicit p: Parameters): VType = { - val res = 0.U.asTypeOf(new VType) - val in = that.asTypeOf(res) - val vill = (in.max_vsew.U < in.vsew) || !in.lmul_ok || in.reserved =/= 0.U || in.vill - when (!vill || ignore_vill.B) { - res := in - res.vsew := in.vsew(log2Ceil(1 + in.max_vsew) - 1, 0) - } - res.reserved := 0.U - res.vill := vill - res - } - - def computeVL(avl: UInt, vtype: UInt, currentVL: UInt, useCurrentVL: Bool, useMax: Bool, useZero: Bool)(implicit p: Parameters): UInt = - VType.fromUInt(vtype, true).vl(avl, currentVL, useCurrentVL, useMax, useZero) -} - -class VType(implicit p: Parameters) extends CoreBundle { - val vill = Bool() - val reserved = UInt((xLen - 9).W) - val vma = Bool() - val vta = Bool() - val vsew = UInt(3.W) - val vlmul_sign = Bool() - val vlmul_mag = UInt(2.W) - - def vlmul_signed: SInt = Cat(vlmul_sign, vlmul_mag).asSInt - - @deprecated("use vlmul_sign, vlmul_mag, or vlmul_signed", "RVV 0.9") - def vlmul: UInt = vlmul_mag - - def max_vsew = log2Ceil(eLen/8) - def max_vlmul = (1 << vlmul_mag.getWidth) - 1 - - def lmul_ok: Bool = Mux(this.vlmul_sign, this.vlmul_mag =/= 0.U && ~this.vlmul_mag < max_vsew.U - this.vsew, true.B) - - def minVLMax: Int = ((maxVLMax / eLen) >> ((1 << vlmul_mag.getWidth) - 1)) max 1 - - def vlMax: UInt = (maxVLMax.U >> (this.vsew +& Cat(this.vlmul_sign, ~this.vlmul_mag))).andNot((minVLMax-1).U) - - def vl(avl: UInt, currentVL: UInt, useCurrentVL: Bool, useMax: Bool, useZero: Bool): UInt = { - val atLeastMaxVLMax = useMax || Mux(useCurrentVL, currentVL >= maxVLMax.U, avl >= maxVLMax.U) - val avl_lsbs = Mux(useCurrentVL, currentVL, avl)(maxVLMax.log2 - 1, 0) - - val atLeastVLMax = atLeastMaxVLMax || (avl_lsbs & (-maxVLMax.S >> (this.vsew +& Cat(this.vlmul_sign, ~this.vlmul_mag))).asUInt.andNot((minVLMax-1).U)).orR - val isZero = vill || useZero - Mux(!isZero && atLeastVLMax, vlMax, 0.U) | Mux(!isZero && !atLeastVLMax, avl_lsbs, 0.U) - } -} - -class CSRFile( - perfEventSets: EventSets = new EventSets(Seq()), - customCSRs: Seq[CustomCSR] = Nil)(implicit p: Parameters) - extends CoreModule()(p) - with HasCoreParameters { - val io = IO(new CSRFileIO { - val customCSRs = Output(Vec(CSRFile.this.customCSRs.size, new CustomCSRIO)) - }) - - val reset_mstatus = WireDefault(0.U.asTypeOf(new MStatus())) - reset_mstatus.mpp := PRV.M.U - reset_mstatus.prv := PRV.M.U - reset_mstatus.xs := (if (usingRoCC) 3.U else 0.U) - val reg_mstatus = RegInit(reset_mstatus) - - val new_prv = WireDefault(reg_mstatus.prv) - reg_mstatus.prv := legalizePrivilege(new_prv) - - val reset_dcsr = WireDefault(0.U.asTypeOf(new DCSR())) - reset_dcsr.xdebugver := 1.U - reset_dcsr.prv := PRV.M.U - val reg_dcsr = RegInit(reset_dcsr) - - val (supported_interrupts, delegable_interrupts) = { - val sup = Wire(new MIP) - sup.usip := false.B - sup.ssip := usingSupervisor.B - sup.vssip := usingHypervisor.B - sup.msip := true.B - sup.utip := false.B - sup.stip := usingSupervisor.B - sup.vstip := usingHypervisor.B - sup.mtip := true.B - sup.ueip := false.B - sup.seip := usingSupervisor.B - sup.vseip := usingHypervisor.B - sup.meip := true.B - sup.sgeip := false.B - sup.rocc := usingRoCC.B - sup.debug := false.B - sup.zero1 := false.B - sup.lip foreach { _ := true.B } - val supported_high_interrupts = if (io.interrupts.buserror.nonEmpty && !usingNMI) (BigInt(1) << CSR.busErrorIntCause).U else 0.U - - val del = WireDefault(sup) - del.msip := false.B - del.mtip := false.B - del.meip := false.B - - (sup.asUInt | supported_high_interrupts, del.asUInt) - } - val delegable_exceptions = Seq( - Causes.misaligned_fetch, - Causes.fetch_page_fault, - Causes.breakpoint, - Causes.load_page_fault, - Causes.store_page_fault, - Causes.misaligned_load, - Causes.misaligned_store, - Causes.illegal_instruction, - Causes.user_ecall, - Causes.virtual_supervisor_ecall, - Causes.fetch_guest_page_fault, - Causes.load_guest_page_fault, - Causes.virtual_instruction, - Causes.store_guest_page_fault).map(1 << _).sum.U - - val hs_delegable_exceptions = Seq( - Causes.misaligned_fetch, - Causes.fetch_access, - Causes.illegal_instruction, - Causes.breakpoint, - Causes.misaligned_load, - Causes.load_access, - Causes.misaligned_store, - Causes.store_access, - Causes.user_ecall, - Causes.fetch_page_fault, - Causes.load_page_fault, - Causes.store_page_fault).map(1 << _).sum.U - - val (hs_delegable_interrupts, mideleg_always_hs) = { - val always = WireDefault(0.U.asTypeOf(new MIP())) - always.vssip := usingHypervisor.B - always.vstip := usingHypervisor.B - always.vseip := usingHypervisor.B - - val deleg = WireDefault(always) - deleg.lip.foreach { _ := usingHypervisor.B } - - (deleg.asUInt, always.asUInt) - } - - val reg_debug = RegInit(false.B) - val reg_dpc = Reg(UInt(vaddrBitsExtended.W)) - val reg_dscratch0 = Reg(UInt(xLen.W)) - val reg_dscratch1 = (p(DebugModuleKey).map(_.nDscratch).getOrElse(1) > 1).option(Reg(UInt(xLen.W))) - val reg_singleStepped = Reg(Bool()) - - val reg_mcontext = (coreParams.mcontextWidth > 0).option(RegInit(0.U(coreParams.mcontextWidth.W))) - val reg_scontext = (coreParams.scontextWidth > 0).option(RegInit(0.U(coreParams.scontextWidth.W))) - - val reg_tselect = Reg(UInt(log2Up(nBreakpoints).W)) - val reg_bp = Reg(Vec(1 << log2Up(nBreakpoints), new BP)) - val reg_pmp = Reg(Vec(nPMPs, new PMPReg(paddrBits, pmpGranularity))) - - val reg_mie = Reg(UInt(xLen.W)) - val (reg_mideleg, read_mideleg) = { - val reg = Reg(UInt(xLen.W)) - (reg, Mux(usingSupervisor.B, reg & delegable_interrupts | mideleg_always_hs, 0.U)) - } - val (reg_medeleg, read_medeleg) = { - val reg = Reg(UInt(xLen.W)) - (reg, Mux(usingSupervisor.B, reg & delegable_exceptions, 0.U)) - } - val reg_mip = Reg(new MIP) - val reg_mepc = Reg(UInt(vaddrBitsExtended.W)) - val reg_mcause = RegInit(0.U(xLen.W)) - val reg_mtval = Reg(UInt(vaddrBitsExtended.W)) - val reg_mtval2 = Reg(UInt(((maxSVAddrBits + 1) min xLen).W)) - val reg_mscratch = Reg(Bits(xLen.W)) - val mtvecWidth = paddrBits min xLen - val reg_mtvec = mtvecInit match { - case Some(addr) => RegInit(addr.U(mtvecWidth.W)) - case None => Reg(UInt(mtvecWidth.W)) - } - - val reset_mnstatus = WireDefault(0.U.asTypeOf(new MNStatus())) - reset_mnstatus.mpp := PRV.M.U - val reg_mnscratch = Reg(Bits(xLen.W)) - val reg_mnepc = Reg(UInt(vaddrBitsExtended.W)) - val reg_mncause = RegInit(0.U(xLen.W)) - val reg_mnstatus = RegInit(reset_mnstatus) - val reg_rnmie = RegInit(true.B) - val nmie = reg_rnmie - - val delegable_counters = ((BigInt(1) << (nPerfCounters + CSR.firstHPM)) - 1).U - val (reg_mcounteren, read_mcounteren) = { - val reg = Reg(UInt(32.W)) - (reg, Mux(usingUser.B, reg & delegable_counters, 0.U)) - } - val (reg_scounteren, read_scounteren) = { - val reg = Reg(UInt(32.W)) - (reg, Mux(usingSupervisor.B, reg & delegable_counters, 0.U)) - } - - val (reg_hideleg, read_hideleg) = { - val reg = Reg(UInt(xLen.W)) - (reg, Mux(usingHypervisor.B, reg & hs_delegable_interrupts, 0.U)) - } - val (reg_hedeleg, read_hedeleg) = { - val reg = Reg(UInt(xLen.W)) - (reg, Mux(usingHypervisor.B, reg & hs_delegable_exceptions, 0.U)) - } - val hs_delegable_counters = delegable_counters - val (reg_hcounteren, read_hcounteren) = { - val reg = Reg(UInt(32.W)) - (reg, Mux(usingHypervisor.B, reg & hs_delegable_counters, 0.U)) - } - val reg_hstatus = RegInit(0.U.asTypeOf(new HStatus)) - val reg_hgatp = Reg(new PTBR) - val reg_htval = Reg(reg_mtval2.cloneType) - val read_hvip = reg_mip.asUInt & hs_delegable_interrupts - val read_hie = reg_mie & hs_delegable_interrupts - - val (reg_vstvec, read_vstvec) = { - val reg = Reg(UInt(vaddrBitsExtended.W)) - (reg, formTVec(reg).sextTo(xLen)) - } - val reg_vsstatus = Reg(new MStatus) - val reg_vsscratch = Reg(Bits(xLen.W)) - val reg_vsepc = Reg(UInt(vaddrBitsExtended.W)) - val reg_vscause = Reg(Bits(xLen.W)) - val reg_vstval = Reg(UInt(vaddrBitsExtended.W)) - val reg_vsatp = Reg(new PTBR) - - val reg_sepc = Reg(UInt(vaddrBitsExtended.W)) - val reg_scause = Reg(Bits(xLen.W)) - val reg_stval = Reg(UInt(vaddrBitsExtended.W)) - val reg_sscratch = Reg(Bits(xLen.W)) - val reg_stvec = Reg(UInt((if (usingHypervisor) vaddrBitsExtended else vaddrBits).W)) - val reg_satp = Reg(new PTBR) - val reg_wfi = withClock(io.ungated_clock) { RegInit(false.B) } - - val reg_fflags = Reg(UInt(5.W)) - val reg_frm = Reg(UInt(3.W)) - val reg_vconfig = usingVector.option(Reg(new VConfig)) - val reg_vstart = usingVector.option(Reg(UInt(maxVLMax.log2.W))) - val reg_vxsat = usingVector.option(Reg(Bool())) - val reg_vxrm = usingVector.option(Reg(UInt(io.vector.get.vxrm.getWidth.W))) - - val reg_mcountinhibit = RegInit(0.U((CSR.firstHPM + nPerfCounters).W)) - io.inhibit_cycle := reg_mcountinhibit(0) - val reg_instret = WideCounter(64, io.retire, inhibit = reg_mcountinhibit(2)) - val reg_cycle = if (enableCommitLog) WideCounter(64, io.retire, inhibit = reg_mcountinhibit(0)) - else withClock(io.ungated_clock) { WideCounter(64, !io.csr_stall, inhibit = reg_mcountinhibit(0)) } - val reg_hpmevent = io.counters.map(c => RegInit(0.U(xLen.W))) - (io.counters zip reg_hpmevent) foreach { case (c, e) => c.eventSel := e } - val reg_hpmcounter = io.counters.zipWithIndex.map { case (c, i) => - WideCounter(CSR.hpmWidth, c.inc, reset = false, inhibit = reg_mcountinhibit(CSR.firstHPM+i)) } - - val mip = WireDefault(reg_mip) - mip.lip := (io.interrupts.lip: Seq[Bool]) - mip.mtip := io.interrupts.mtip - mip.msip := io.interrupts.msip - mip.meip := io.interrupts.meip - // seip is the OR of reg_mip.seip and the actual line from the PLIC - io.interrupts.seip.foreach { mip.seip := reg_mip.seip || _ } - // Simimlar sort of thing would apply if the PLIC had a VSEIP line: - //io.interrupts.vseip.foreach { mip.vseip := reg_mip.vseip || _ } - mip.rocc := io.rocc_interrupt - val read_mip = mip.asUInt & supported_interrupts - val read_hip = read_mip & hs_delegable_interrupts - val high_interrupts = (if (usingNMI) 0.U else io.interrupts.buserror.map(_ << CSR.busErrorIntCause).getOrElse(0.U)) - - val pending_interrupts = high_interrupts | (read_mip & reg_mie) - val d_interrupts = io.interrupts.debug << CSR.debugIntCause - val (nmi_interrupts, nmiFlag) = io.interrupts.nmi.map(nmi => - (((nmi.rnmi && reg_rnmie) << CSR.rnmiIntCause) | - io.interrupts.buserror.map(_ << CSR.rnmiBEUCause).getOrElse(0.U), - !io.interrupts.debug && nmi.rnmi && reg_rnmie)).getOrElse(0.U, false.B) - val m_interrupts = Mux(nmie && (reg_mstatus.prv <= PRV.S.U || reg_mstatus.mie), ~(~pending_interrupts | read_mideleg), 0.U) - val s_interrupts = Mux(nmie && (reg_mstatus.v || reg_mstatus.prv < PRV.S.U || (reg_mstatus.prv === PRV.S.U && reg_mstatus.sie)), pending_interrupts & read_mideleg & ~read_hideleg, 0.U) - val vs_interrupts = Mux(nmie && (reg_mstatus.v && (reg_mstatus.prv < PRV.S.U || reg_mstatus.prv === PRV.S.U && reg_vsstatus.sie)), pending_interrupts & read_hideleg, 0.U) - val (anyInterrupt, whichInterrupt) = chooseInterrupt(Seq(vs_interrupts, s_interrupts, m_interrupts, nmi_interrupts, d_interrupts)) - val interruptMSB = BigInt(1) << (xLen-1) - val interruptCause = interruptMSB.U + (nmiFlag << (xLen-2)) + whichInterrupt - io.interrupt := (anyInterrupt && !io.singleStep || reg_singleStepped) && !(reg_debug || io.status.cease) - io.interrupt_cause := interruptCause - io.bp := reg_bp take nBreakpoints - io.mcontext := reg_mcontext.getOrElse(0.U) - io.scontext := reg_scontext.getOrElse(0.U) - io.pmp := reg_pmp.map(PMP(_, paddrBits, pmpGranularity, pgIdxBits, pgLevels, pgLevelBits)) - - val isaMaskString = - (if (usingMulDiv) "M" else "") + - (if (usingAtomics) "A" else "") + - (if (fLen >= 32) "F" else "") + - (if (fLen >= 64) "D" else "") + - (if (usingVector) "V" else "") + - // The current spec does not define what sub-extensions constitute the 'B' misa bit - // (if (usingBitManip) "B" else "") + - (if (usingCompressed) "C" else "") - val isaString = (if (coreParams.useRVE) "E" else "I") + - isaMaskString + - (if (customIsaExt.isDefined) "X" else "") + - (if (usingSupervisor) "S" else "") + - (if (usingHypervisor) "H" else "") + - (if (usingUser) "U" else "") - val isaMax = (BigInt(log2Ceil(xLen) - 4) << (xLen-2)) | isaStringToMask(isaString) - val reg_misa = RegInit(isaMax.U) - val read_mstatus = io.status.asUInt.extract(xLen-1,0) - val read_mtvec = formTVec(reg_mtvec).padTo(xLen) - val read_stvec = formTVec(reg_stvec).sextTo(xLen) - - val read_mapping = LinkedHashMap[Int,Bits]( - CSRs.tselect -> reg_tselect, - CSRs.tdata1 -> reg_bp(reg_tselect).control.asUInt, - CSRs.tdata2 -> reg_bp(reg_tselect).address.sextTo(xLen), - CSRs.tdata3 -> reg_bp(reg_tselect).textra.asUInt, - CSRs.misa -> reg_misa, - CSRs.mstatus -> read_mstatus, - CSRs.mtvec -> read_mtvec, - CSRs.mip -> read_mip, - CSRs.mie -> reg_mie, - CSRs.mscratch -> reg_mscratch, - CSRs.mepc -> readEPC(reg_mepc).sextTo(xLen), - CSRs.mtval -> reg_mtval.sextTo(xLen), - CSRs.mcause -> reg_mcause, - CSRs.mhartid -> io.hartid) - - val debug_csrs = if (!usingDebug) LinkedHashMap() else LinkedHashMap[Int,Bits]( - CSRs.dcsr -> reg_dcsr.asUInt, - CSRs.dpc -> readEPC(reg_dpc).sextTo(xLen), - CSRs.dscratch0 -> reg_dscratch0.asUInt) ++ - reg_dscratch1.map(r => CSRs.dscratch1 -> r) - - val read_mnstatus = WireInit(0.U.asTypeOf(new MNStatus())) - read_mnstatus.mpp := reg_mnstatus.mpp - read_mnstatus.mpv := reg_mnstatus.mpv - read_mnstatus.mie := reg_rnmie - val nmi_csrs = if (!usingNMI) LinkedHashMap() else LinkedHashMap[Int,Bits]( - CustomCSRs.mnscratch -> reg_mnscratch, - CustomCSRs.mnepc -> readEPC(reg_mnepc).sextTo(xLen), - CustomCSRs.mncause -> reg_mncause, - CustomCSRs.mnstatus -> read_mnstatus.asUInt) - - val context_csrs = LinkedHashMap[Int,Bits]() ++ - reg_mcontext.map(r => CSRs.mcontext -> r) ++ - reg_scontext.map(r => CSRs.scontext -> r) - - val read_fcsr = Cat(reg_frm, reg_fflags) - val fp_csrs = LinkedHashMap[Int,Bits]() ++ - usingFPU.option(CSRs.fflags -> reg_fflags) ++ - usingFPU.option(CSRs.frm -> reg_frm) ++ - (usingFPU || usingVector).option(CSRs.fcsr -> read_fcsr) - - val read_vcsr = Cat(reg_vxrm.getOrElse(0.U), reg_vxsat.getOrElse(0.U)) - val vector_csrs = if (!usingVector) LinkedHashMap() else LinkedHashMap[Int,Bits]( - CSRs.vxsat -> reg_vxsat.get, - CSRs.vxrm -> reg_vxrm.get, - CSRs.vcsr -> read_vcsr, - CSRs.vstart -> reg_vstart.get, - CSRs.vtype -> reg_vconfig.get.vtype.asUInt, - CSRs.vl -> reg_vconfig.get.vl, - CSRs.vlenb -> (vLen / 8).U) - - read_mapping ++= debug_csrs - read_mapping ++= nmi_csrs - read_mapping ++= context_csrs - read_mapping ++= fp_csrs - read_mapping ++= vector_csrs - - if (coreParams.haveBasicCounters) { - read_mapping += CSRs.mcountinhibit -> reg_mcountinhibit - read_mapping += CSRs.mcycle -> reg_cycle - read_mapping += CSRs.minstret -> reg_instret - - for (((e, c), i) <- (reg_hpmevent.padTo(CSR.nHPM, 0.U) - zip reg_hpmcounter.map(x => x: UInt).padTo(CSR.nHPM, 0.U)).zipWithIndex) { - read_mapping += (i + CSR.firstHPE) -> e // mhpmeventN - read_mapping += (i + CSR.firstMHPC) -> c // mhpmcounterN - read_mapping += (i + CSR.firstHPC) -> c // hpmcounterN - if (xLen == 32) { - read_mapping += (i + CSR.firstMHPCH) -> (c >> 32) // mhpmcounterNh - read_mapping += (i + CSR.firstHPCH) -> (c >> 32) // hpmcounterNh - } - } - - if (usingUser) { - read_mapping += CSRs.mcounteren -> read_mcounteren - } - read_mapping += CSRs.cycle -> reg_cycle - read_mapping += CSRs.instret -> reg_instret - - if (xLen == 32) { - read_mapping += CSRs.mcycleh -> (reg_cycle >> 32) - read_mapping += CSRs.minstreth -> (reg_instret >> 32) - read_mapping += CSRs.cycleh -> (reg_cycle >> 32) - read_mapping += CSRs.instreth -> (reg_instret >> 32) - } - } - - val sie_mask = { - val sgeip_mask = WireInit(0.U.asTypeOf(new MIP)) - sgeip_mask.sgeip := true.B - read_mideleg & ~(hs_delegable_interrupts | sgeip_mask.asUInt) - } - if (usingSupervisor) { - val read_sie = reg_mie & sie_mask - val read_sip = read_mip & sie_mask - val read_sstatus = WireDefault(0.U.asTypeOf(new MStatus)) - read_sstatus.sd := io.status.sd - read_sstatus.uxl := io.status.uxl - read_sstatus.sd_rv32 := io.status.sd_rv32 - read_sstatus.mxr := io.status.mxr - read_sstatus.sum := io.status.sum - read_sstatus.xs := io.status.xs - read_sstatus.fs := io.status.fs - read_sstatus.vs := io.status.vs - read_sstatus.spp := io.status.spp - read_sstatus.spie := io.status.spie - read_sstatus.sie := io.status.sie - - read_mapping += CSRs.sstatus -> (read_sstatus.asUInt)(xLen-1,0) - read_mapping += CSRs.sip -> read_sip.asUInt - read_mapping += CSRs.sie -> read_sie.asUInt - read_mapping += CSRs.sscratch -> reg_sscratch - read_mapping += CSRs.scause -> reg_scause - read_mapping += CSRs.stval -> reg_stval.sextTo(xLen) - read_mapping += CSRs.satp -> reg_satp.asUInt - read_mapping += CSRs.sepc -> readEPC(reg_sepc).sextTo(xLen) - read_mapping += CSRs.stvec -> read_stvec - read_mapping += CSRs.scounteren -> read_scounteren - read_mapping += CSRs.mideleg -> read_mideleg - read_mapping += CSRs.medeleg -> read_medeleg - } - - val pmpCfgPerCSR = xLen / new PMPConfig().getWidth - def pmpCfgIndex(i: Int) = (xLen / 32) * (i / pmpCfgPerCSR) - if (reg_pmp.nonEmpty) { - require(reg_pmp.size <= CSR.maxPMPs) - val read_pmp = reg_pmp.padTo(CSR.maxPMPs, 0.U.asTypeOf(new PMP(paddrBits, pmpGranularity, pgIdxBits, pgLevels, pgLevelBits))) - for (i <- 0 until read_pmp.size by pmpCfgPerCSR) - read_mapping += (CSRs.pmpcfg0 + pmpCfgIndex(i)) -> read_pmp.map(_.cfg).slice(i, i + pmpCfgPerCSR).asUInt - for ((pmp, i) <- read_pmp.zipWithIndex) - read_mapping += (CSRs.pmpaddr0 + i) -> pmp.readAddr - } - - // implementation-defined CSRs - val reg_custom = customCSRs.map { csr => - require(csr.mask >= 0 && csr.mask.bitLength <= xLen) - require(!read_mapping.contains(csr.id)) - val reg = csr.init.map(init => RegInit(init.U(xLen.W))).getOrElse(Reg(UInt(xLen.W))) - read_mapping += csr.id -> reg - reg - } - - if (usingHypervisor) { - read_mapping += CSRs.mtinst -> 0.U - read_mapping += CSRs.mtval2 -> reg_mtval2 - - val read_hstatus = io.hstatus.asUInt.extract(xLen-1,0) - - read_mapping += CSRs.hstatus -> read_hstatus - read_mapping += CSRs.hedeleg -> read_hedeleg - read_mapping += CSRs.hideleg -> read_hideleg - read_mapping += CSRs.hcounteren-> read_hcounteren - read_mapping += CSRs.hgatp -> reg_hgatp.asUInt - read_mapping += CSRs.hip -> read_hip - read_mapping += CSRs.hie -> read_hie - read_mapping += CSRs.hvip -> read_hvip - read_mapping += CSRs.hgeie -> 0.U - read_mapping += CSRs.hgeip -> 0.U - read_mapping += CSRs.htval -> reg_htval - read_mapping += CSRs.htinst -> 0.U - - val read_vsie = (read_hie & read_hideleg) >> 1 - val read_vsip = (read_hip & read_hideleg) >> 1 - val read_vsepc = readEPC(reg_vsepc).sextTo(xLen) - val read_vstval = reg_vstval.sextTo(xLen) - val read_vsstatus = io.gstatus.asUInt.extract(xLen-1,0) - - read_mapping += CSRs.vsstatus -> read_vsstatus - read_mapping += CSRs.vsip -> read_vsip - read_mapping += CSRs.vsie -> read_vsie - read_mapping += CSRs.vsscratch -> reg_vsscratch - read_mapping += CSRs.vscause -> reg_vscause - read_mapping += CSRs.vstval -> read_vstval - read_mapping += CSRs.vsatp -> reg_vsatp.asUInt - read_mapping += CSRs.vsepc -> read_vsepc - read_mapping += CSRs.vstvec -> read_vstvec - } - - // mimpid, marchid, and mvendorid are 0 unless overridden by customCSRs - Seq(CSRs.mimpid, CSRs.marchid, CSRs.mvendorid).foreach(id => read_mapping.getOrElseUpdate(id, 0.U)) - - val decoded_addr = { - val addr = Cat(io.status.v, io.rw.addr) - val pats = for (((k, _), i) <- read_mapping.zipWithIndex) - yield (BitPat(k.U), (0 until read_mapping.size).map(j => BitPat((i == j).B))) - val decoded = DecodeLogic(addr, Seq.fill(read_mapping.size)(X), pats) - val unvirtualized_mapping = (for (((k, _), v) <- read_mapping zip decoded) yield k -> v.asBool).toMap - - for ((k, v) <- unvirtualized_mapping) yield k -> { - val alt = CSR.mode(k) match { - case PRV.S => unvirtualized_mapping.lift(k + (1 << CSR.modeLSB)) - case PRV.H => unvirtualized_mapping.lift(k - (1 << CSR.modeLSB)) - case _ => None - } - alt.map(Mux(reg_mstatus.v, _, v)).getOrElse(v) - } - } - - val wdata = readModifyWriteCSR(io.rw.cmd, io.rw.rdata, io.rw.wdata) - - val system_insn = io.rw.cmd === CSR.I - val hlsv = Seq(HLV_B, HLV_BU, HLV_H, HLV_HU, HLV_W, HLV_WU, HLV_D, HSV_B, HSV_H, HSV_W, HSV_D, HLVX_HU, HLVX_WU) - val decode_table = Seq( ECALL-> List(Y,N,N,N,N,N,N,N,N), - EBREAK-> List(N,Y,N,N,N,N,N,N,N), - MRET-> List(N,N,Y,N,N,N,N,N,N), - CEASE-> List(N,N,N,Y,N,N,N,N,N), - WFI-> List(N,N,N,N,Y,N,N,N,N)) ++ - usingDebug.option( DRET-> List(N,N,Y,N,N,N,N,N,N)) ++ - usingNMI.option( MNRET-> List(N,N,Y,N,N,N,N,N,N)) ++ - coreParams.haveCFlush.option(CFLUSH_D_L1-> List(N,N,N,N,N,N,N,N,N)) ++ - usingSupervisor.option( SRET-> List(N,N,Y,N,N,N,N,N,N)) ++ - usingVM.option( SFENCE_VMA-> List(N,N,N,N,N,Y,N,N,N)) ++ - usingHypervisor.option( HFENCE_VVMA-> List(N,N,N,N,N,N,Y,N,N)) ++ - usingHypervisor.option( HFENCE_GVMA-> List(N,N,N,N,N,N,N,Y,N)) ++ - (if (usingHypervisor) hlsv.map(_-> List(N,N,N,N,N,N,N,N,Y)) else Seq()) - val insn_call :: insn_break :: insn_ret :: insn_cease :: insn_wfi :: _ :: _ :: _ :: _ :: Nil = { - val insn = ECALL.value.U | (io.rw.addr << 20) - DecodeLogic(insn, decode_table(0)._2.map(x=>X), decode_table).map(system_insn && _.asBool) - } - - for (io_dec <- io.decode) { - val addr = io_dec.inst(31, 20) - - def decodeAny(m: LinkedHashMap[Int,Bits]): Bool = m.map { case(k: Int, _: Bits) => addr === k.U }.reduce(_||_) - def decodeFast(s: Seq[Int]): Bool = DecodeLogic(addr, s.map(_.U), (read_mapping -- s).keys.toList.map(_.U)) - - val _ :: is_break :: is_ret :: _ :: is_wfi :: is_sfence :: is_hfence_vvma :: is_hfence_gvma :: is_hlsv :: Nil = - DecodeLogic(io_dec.inst, decode_table(0)._2.map(x=>X), decode_table).map(_.asBool) - val is_counter = (addr.inRange(CSR.firstCtr.U, (CSR.firstCtr + CSR.nCtr).U) || addr.inRange(CSR.firstCtrH.U, (CSR.firstCtrH + CSR.nCtr).U)) - - val allow_wfi = (!usingSupervisor).B || reg_mstatus.prv > PRV.S.U || !reg_mstatus.tw && (!reg_mstatus.v || !reg_hstatus.vtw) - val allow_sfence_vma = (!usingVM).B || reg_mstatus.prv > PRV.S.U || !Mux(reg_mstatus.v, reg_hstatus.vtvm, reg_mstatus.tvm) - val allow_hfence_vvma = (!usingHypervisor).B || !reg_mstatus.v && (reg_mstatus.prv >= PRV.S.U) - val allow_hlsv = (!usingHypervisor).B || !reg_mstatus.v && (reg_mstatus.prv >= PRV.S.U || reg_hstatus.hu) - val allow_sret = (!usingSupervisor).B || reg_mstatus.prv > PRV.S.U || !Mux(reg_mstatus.v, reg_hstatus.vtsr, reg_mstatus.tsr) - val counter_addr = addr(log2Ceil(read_mcounteren.getWidth)-1, 0) - val allow_counter = (reg_mstatus.prv > PRV.S.U || read_mcounteren(counter_addr)) && - (!usingSupervisor.B || reg_mstatus.prv >= PRV.S.U || read_scounteren(counter_addr)) && - (!usingHypervisor.B || !reg_mstatus.v || read_hcounteren(counter_addr)) - io_dec.fp_illegal := io.status.fs === 0.U || reg_mstatus.v && reg_vsstatus.fs === 0.U || !reg_misa('f'-'a') - io_dec.vector_illegal := io.status.vs === 0.U || reg_mstatus.v && reg_vsstatus.vs === 0.U || !reg_misa('v'-'a') - io_dec.fp_csr := decodeFast(fp_csrs.keys.toList) - io_dec.rocc_illegal := io.status.xs === 0.U || reg_mstatus.v && reg_vsstatus.xs === 0.U || !reg_misa('x'-'a') - val csr_addr_legal = reg_mstatus.prv >= CSR.mode(addr) || - usingHypervisor.B && !reg_mstatus.v && reg_mstatus.prv === PRV.S.U && CSR.mode(addr) === PRV.H.U - val csr_exists = decodeAny(read_mapping) - io_dec.read_illegal := !csr_addr_legal || - !csr_exists || - ((addr === CSRs.satp.U || addr === CSRs.hgatp.U) && !allow_sfence_vma) || - is_counter && !allow_counter || - decodeFast(debug_csrs.keys.toList) && !reg_debug || - decodeFast(vector_csrs.keys.toList) && io_dec.vector_illegal || - io_dec.fp_csr && io_dec.fp_illegal - io_dec.write_illegal := addr(11,10).andR - io_dec.write_flush := { - val addr_m = addr | (PRV.M.U << CSR.modeLSB) - !(addr_m >= CSRs.mscratch.U && addr_m <= CSRs.mtval.U) - } - io_dec.system_illegal := !csr_addr_legal && !is_hlsv || - is_wfi && !allow_wfi || - is_ret && !allow_sret || - is_ret && addr(10) && addr(7) && !reg_debug || - (is_sfence || is_hfence_gvma) && !allow_sfence_vma || - is_hfence_vvma && !allow_hfence_vvma || - is_hlsv && !allow_hlsv - - io_dec.virtual_access_illegal := reg_mstatus.v && csr_exists && ( - CSR.mode(addr) === PRV.H.U || - is_counter && read_mcounteren(counter_addr) && (!read_hcounteren(counter_addr) || !reg_mstatus.prv(0) && !read_scounteren(counter_addr)) || - CSR.mode(addr) === PRV.S.U && !reg_mstatus.prv(0) || - addr === CSRs.satp.U && reg_mstatus.prv(0) && reg_hstatus.vtvm) - - io_dec.virtual_system_illegal := reg_mstatus.v && ( - is_hfence_vvma || - is_hfence_gvma || - is_hlsv || - is_wfi && (!reg_mstatus.prv(0) || !reg_mstatus.tw && reg_hstatus.vtw) || - is_ret && CSR.mode(addr) === PRV.S.U && (!reg_mstatus.prv(0) || reg_hstatus.vtsr) || - is_sfence && (!reg_mstatus.prv(0) || reg_hstatus.vtvm)) - } - - val cause = - Mux(insn_call, Causes.user_ecall.U + Mux(reg_mstatus.prv(0) && reg_mstatus.v, PRV.H.U, reg_mstatus.prv), - Mux[UInt](insn_break, Causes.breakpoint.U, io.cause)) - val cause_lsbs = cause(log2Ceil(1 + CSR.busErrorIntCause)-1, 0) - val causeIsDebugInt = cause(xLen-1) && cause_lsbs === CSR.debugIntCause.U - val causeIsDebugTrigger = !cause(xLen-1) && cause_lsbs === CSR.debugTriggerCause.U - val causeIsDebugBreak = !cause(xLen-1) && insn_break && Cat(reg_dcsr.ebreakm, reg_dcsr.ebreakh, reg_dcsr.ebreaks, reg_dcsr.ebreaku)(reg_mstatus.prv) - val trapToDebug = usingDebug.B && (reg_singleStepped || causeIsDebugInt || causeIsDebugTrigger || causeIsDebugBreak || reg_debug) - val debugEntry = p(DebugModuleKey).map(_.debugEntry).getOrElse(BigInt(0x800)) - val debugException = p(DebugModuleKey).map(_.debugException).getOrElse(BigInt(0x808)) - val debugTVec = Mux(reg_debug, Mux(insn_break, debugEntry.U, debugException.U), debugEntry.U) - val delegate = usingSupervisor.B && reg_mstatus.prv <= PRV.S.U && Mux(cause(xLen-1), read_mideleg(cause_lsbs), read_medeleg(cause_lsbs)) - val delegateVS = reg_mstatus.v && delegate && Mux(cause(xLen-1), read_hideleg(cause_lsbs), read_hedeleg(cause_lsbs)) - def mtvecBaseAlign = 2 - def mtvecInterruptAlign = { - require(reg_mip.getWidth <= xLen) - log2Ceil(xLen) - } - val notDebugTVec = { - val base = Mux(delegate, Mux(delegateVS, read_vstvec, read_stvec), read_mtvec) - val interruptOffset = cause(mtvecInterruptAlign-1, 0) << mtvecBaseAlign - val interruptVec = Cat(base >> (mtvecInterruptAlign + mtvecBaseAlign), interruptOffset) - val doVector = base(0) && cause(cause.getWidth-1) && (cause_lsbs >> mtvecInterruptAlign) === 0.U - Mux(doVector, interruptVec, base >> mtvecBaseAlign << mtvecBaseAlign) - } - - val causeIsRnmiInt = cause(xLen-1) && cause(xLen-2) && (cause_lsbs === CSR.rnmiIntCause.U || cause_lsbs === CSR.rnmiBEUCause.U) - val causeIsRnmiBEU = cause(xLen-1) && cause(xLen-2) && cause_lsbs === CSR.rnmiBEUCause.U - val causeIsNmi = causeIsRnmiInt - val nmiTVecInt = io.interrupts.nmi.map(nmi => nmi.rnmi_interrupt_vector).getOrElse(0.U) - val nmiTVecXcpt = io.interrupts.nmi.map(nmi => nmi.rnmi_exception_vector).getOrElse(0.U) - val trapToNmiInt = usingNMI.B && causeIsNmi - val trapToNmiXcpt = usingNMI.B && !nmie - val trapToNmi = trapToNmiInt || trapToNmiXcpt - val nmiTVec = (Mux(causeIsNmi, nmiTVecInt, nmiTVecXcpt)>>1)<<1 - - val tvec = Mux(trapToDebug, debugTVec, Mux(trapToNmi, nmiTVec, notDebugTVec)) - io.evec := tvec - io.ptbr := reg_satp - io.hgatp := reg_hgatp - io.vsatp := reg_vsatp - io.eret := insn_call || insn_break || insn_ret - io.singleStep := reg_dcsr.step && !reg_debug - io.status := reg_mstatus - io.status.sd := io.status.fs.andR || io.status.xs.andR || io.status.vs.andR - io.status.debug := reg_debug - io.status.isa := reg_misa - io.status.uxl := (if (usingUser) log2Ceil(xLen) - 4 else 0).U - io.status.sxl := (if (usingSupervisor) log2Ceil(xLen) - 4 else 0).U - io.status.dprv := Mux(reg_mstatus.mprv && !reg_debug, reg_mstatus.mpp, reg_mstatus.prv) - io.status.dv := reg_mstatus.v || Mux(reg_mstatus.mprv && !reg_debug, reg_mstatus.mpv, false.B) - io.status.sd_rv32 := (xLen == 32).B && io.status.sd - io.status.mpv := reg_mstatus.mpv - io.status.gva := reg_mstatus.gva - io.hstatus := reg_hstatus - io.hstatus.vsxl := (if (usingSupervisor) log2Ceil(xLen) - 4 else 0).U - io.gstatus := reg_vsstatus - io.gstatus.sd := io.gstatus.fs.andR || io.gstatus.xs.andR || io.gstatus.vs.andR - io.gstatus.uxl := (if (usingUser) log2Ceil(xLen) - 4 else 0).U - io.gstatus.sd_rv32 := (xLen == 32).B && io.gstatus.sd - - val exception = insn_call || insn_break || io.exception - assert(PopCount(insn_ret :: insn_call :: insn_break :: io.exception :: Nil) <= 1.U, "these conditions must be mutually exclusive") - - when (insn_wfi && !io.singleStep && !reg_debug) { reg_wfi := true.B } - when (pending_interrupts.orR || io.interrupts.debug || exception) { reg_wfi := false.B } - io.interrupts.nmi.map(nmi => when (nmi.rnmi) { reg_wfi := false.B } ) - - when (io.retire(0) || exception) { reg_singleStepped := true.B } - when (!io.singleStep) { reg_singleStepped := false.B } - assert(!io.singleStep || io.retire <= 1.U) - assert(!reg_singleStepped || io.retire === 0.U) - - val epc = formEPC(io.pc) - val tval = Mux(insn_break, epc, io.tval) - - when (exception) { - when (trapToDebug) { - when (!reg_debug) { - reg_mstatus.v := false.B - reg_debug := true.B - reg_dpc := epc - reg_dcsr.cause := Mux(reg_singleStepped, 4.U, Mux(causeIsDebugInt, 3.U, Mux[UInt](causeIsDebugTrigger, 2.U, 1.U))) - reg_dcsr.prv := trimPrivilege(reg_mstatus.prv) - reg_dcsr.v := reg_mstatus.v - new_prv := PRV.M.U - } - }.elsewhen (trapToNmiInt) { - when (reg_rnmie) { - reg_mstatus.v := false.B - reg_mnstatus.mpv := reg_mstatus.v - reg_rnmie := false.B - reg_mnepc := epc - reg_mncause := (BigInt(1) << (xLen-1)).U | Mux(causeIsRnmiBEU, 3.U, 2.U) - reg_mnstatus.mpp := trimPrivilege(reg_mstatus.prv) - new_prv := PRV.M.U - } - }.elsewhen (delegateVS && nmie) { - reg_mstatus.v := true.B - reg_vsstatus.spp := reg_mstatus.prv - reg_vsepc := epc - reg_vscause := Mux(cause(xLen-1), Cat(cause(xLen-1, 2), 1.U(2.W)), cause) - reg_vstval := tval - reg_vsstatus.spie := reg_vsstatus.sie - reg_vsstatus.sie := false.B - new_prv := PRV.S.U - }.elsewhen (delegate && nmie) { - reg_mstatus.v := false.B - reg_hstatus.spvp := Mux(reg_mstatus.v, reg_mstatus.prv(0),reg_hstatus.spvp) - reg_hstatus.gva := io.gva - reg_hstatus.spv := reg_mstatus.v - reg_sepc := epc - reg_scause := cause - reg_stval := tval - reg_htval := io.htval - reg_mstatus.spie := reg_mstatus.sie - reg_mstatus.spp := reg_mstatus.prv - reg_mstatus.sie := false.B - new_prv := PRV.S.U - }.otherwise { - reg_mstatus.v := false.B - reg_mstatus.mpv := reg_mstatus.v - reg_mstatus.gva := io.gva - reg_mepc := epc - reg_mcause := cause - reg_mtval := tval - reg_mtval2 := io.htval - reg_mstatus.mpie := reg_mstatus.mie - reg_mstatus.mpp := trimPrivilege(reg_mstatus.prv) - reg_mstatus.mie := false.B - new_prv := PRV.M.U - } - } - - for (i <- 0 until supported_interrupts.getWidth) { - val en = exception && (supported_interrupts & (BigInt(1) << i).U) =/= 0.U && cause === (BigInt(1) << (xLen - 1)).U + i.U - val delegable = (delegable_interrupts & (BigInt(1) << i).U) =/= 0.U - property.cover(en && !delegate, s"INTERRUPT_M_$i") - property.cover(en && delegable && delegate, s"INTERRUPT_S_$i") - } - for (i <- 0 until xLen) { - val supported_exceptions: BigInt = 0x8fe | - (if (usingCompressed && !coreParams.misaWritable) 0 else 1) | - (if (usingUser) 0x100 else 0) | - (if (usingSupervisor) 0x200 else 0) | - (if (usingVM) 0xb000 else 0) - if (((supported_exceptions >> i) & 1) != 0) { - val en = exception && cause === i.U - val delegable = (delegable_exceptions & (BigInt(1) << i).U) =/= 0.U - property.cover(en && !delegate, s"EXCEPTION_M_$i") - property.cover(en && delegable && delegate, s"EXCEPTION_S_$i") - } - } - - when (insn_ret) { - val ret_prv = WireInit(UInt(), DontCare) - when (usingSupervisor.B && !io.rw.addr(9)) { - when (!reg_mstatus.v) { - reg_mstatus.sie := reg_mstatus.spie - reg_mstatus.spie := true.B - reg_mstatus.spp := PRV.U.U - ret_prv := reg_mstatus.spp - reg_mstatus.v := usingHypervisor.B && reg_hstatus.spv - io.evec := readEPC(reg_sepc) - reg_hstatus.spv := false.B - }.otherwise { - reg_vsstatus.sie := reg_vsstatus.spie - reg_vsstatus.spie := true.B - reg_vsstatus.spp := PRV.U.U - ret_prv := reg_vsstatus.spp - reg_mstatus.v := usingHypervisor.B - io.evec := readEPC(reg_vsepc) - } - }.elsewhen (usingDebug.B && io.rw.addr(10) && io.rw.addr(7)) { - ret_prv := reg_dcsr.prv - reg_mstatus.v := usingHypervisor.B && reg_dcsr.v && reg_dcsr.prv <= PRV.S.U - reg_debug := false.B - io.evec := readEPC(reg_dpc) - }.elsewhen (usingNMI.B && io.rw.addr(10) && !io.rw.addr(7)) { - ret_prv := reg_mnstatus.mpp - reg_mstatus.v := usingHypervisor.B && reg_mnstatus.mpv && reg_mnstatus.mpp <= PRV.S.U - reg_rnmie := true.B - io.evec := readEPC(reg_mnepc) - }.otherwise { - reg_mstatus.mie := reg_mstatus.mpie - reg_mstatus.mpie := true.B - reg_mstatus.mpp := legalizePrivilege(PRV.U.U) - reg_mstatus.mpv := false.B - ret_prv := reg_mstatus.mpp - reg_mstatus.v := usingHypervisor.B && reg_mstatus.mpv && reg_mstatus.mpp <= PRV.S.U - io.evec := readEPC(reg_mepc) - } - - new_prv := ret_prv - when (usingUser.B && ret_prv <= PRV.S.U) { - reg_mstatus.mprv := false.B - } - } - - io.time := reg_cycle - io.csr_stall := reg_wfi || io.status.cease - io.status.cease := RegEnable(true.B, false.B, insn_cease) - io.status.wfi := reg_wfi - - for ((io, reg) <- io.customCSRs zip reg_custom) { - io.wen := false.B - io.wdata := wdata - io.value := reg - } - - io.rw.rdata := Mux1H(for ((k, v) <- read_mapping) yield decoded_addr(k) -> v) - - // cover access to register - val coverable_counters = read_mapping.filterNot { case (k, _) => - k >= CSR.firstHPC + nPerfCounters && k < CSR.firstHPC + CSR.nHPM - } - coverable_counters.foreach( {case (k, v) => { - when (!k.U(11,10).andR) { // Cover points for RW CSR registers - property.cover(io.rw.cmd.isOneOf(CSR.W, CSR.S, CSR.C) && io.rw.addr===k.U, "CSR_access_"+k.toString, "Cover Accessing Core CSR field") - } .otherwise { // Cover points for RO CSR registers - property.cover(io.rw.cmd===CSR.R && io.rw.addr===k.U, "CSR_access_"+k.toString, "Cover Accessing Core CSR field") - } - }}) - - val set_vs_dirty = WireDefault(io.vector.map(_.set_vs_dirty).getOrElse(false.B)) - io.vector.foreach { vio => - when (set_vs_dirty) { - assert(reg_mstatus.vs > 0.U) - when (reg_mstatus.v) { reg_vsstatus.vs := 3.U } - reg_mstatus.vs := 3.U - } - } - - val set_fs_dirty = WireDefault(io.set_fs_dirty.getOrElse(false.B)) - if (coreParams.haveFSDirty) { - when (set_fs_dirty) { - assert(reg_mstatus.fs > 0.U) - when (reg_mstatus.v) { reg_vsstatus.fs := 3.U } - reg_mstatus.fs := 3.U - } - } - - io.fcsr_rm := reg_frm - when (io.fcsr_flags.valid) { - reg_fflags := reg_fflags | io.fcsr_flags.bits - set_fs_dirty := true.B - } - - io.vector.foreach { vio => - when (vio.set_vxsat) { - reg_vxsat.get := true.B - set_vs_dirty := true.B - } - } - - val csr_wen = io.rw.cmd.isOneOf(CSR.S, CSR.C, CSR.W) - io.csrw_counter := Mux(coreParams.haveBasicCounters.B && csr_wen && (io.rw.addr.inRange(CSRs.mcycle.U, (CSRs.mcycle + CSR.nCtr).U) || io.rw.addr.inRange(CSRs.mcycleh.U, (CSRs.mcycleh + CSR.nCtr).U)), UIntToOH(io.rw.addr(log2Ceil(CSR.nCtr+nPerfCounters)-1, 0)), 0.U) - when (csr_wen) { - val scause_mask = ((BigInt(1) << (xLen-1)) + 31).U /* only implement 5 LSBs and MSB */ - val satp_valid_modes = 0 +: (minPgLevels to pgLevels).map(new PTBR().pgLevelsToMode(_)) - - when (decoded_addr(CSRs.mstatus)) { - val new_mstatus = wdata.asTypeOf(new MStatus()) - reg_mstatus.mie := new_mstatus.mie - reg_mstatus.mpie := new_mstatus.mpie - - if (usingUser) { - reg_mstatus.mprv := new_mstatus.mprv - reg_mstatus.mpp := legalizePrivilege(new_mstatus.mpp) - if (usingSupervisor) { - reg_mstatus.spp := new_mstatus.spp - reg_mstatus.spie := new_mstatus.spie - reg_mstatus.sie := new_mstatus.sie - reg_mstatus.tw := new_mstatus.tw - reg_mstatus.tsr := new_mstatus.tsr - } - if (usingVM) { - reg_mstatus.mxr := new_mstatus.mxr - reg_mstatus.sum := new_mstatus.sum - reg_mstatus.tvm := new_mstatus.tvm - } - if (usingHypervisor) { - reg_mstatus.mpv := new_mstatus.mpv - reg_mstatus.gva := new_mstatus.gva - } - } - - if (usingSupervisor || usingFPU) reg_mstatus.fs := formFS(new_mstatus.fs) - reg_mstatus.vs := formVS(new_mstatus.vs) - } - when (decoded_addr(CSRs.misa)) { - val mask = isaStringToMask(isaMaskString).U(xLen.W) - val f = wdata('f' - 'a') - // suppress write if it would cause the next fetch to be misaligned - when (!usingCompressed.B || !io.pc(1) || wdata('c' - 'a')) { - if (coreParams.misaWritable) - reg_misa := ~(~wdata | (!f << ('d' - 'a'))) & mask | reg_misa & ~mask - } - } - when (decoded_addr(CSRs.mip)) { - // MIP should be modified based on the value in reg_mip, not the value - // in read_mip, since read_mip.seip is the OR of reg_mip.seip and - // io.interrupts.seip. We don't want the value on the PLIC line to - // inadvertently be OR'd into read_mip.seip. - val new_mip = readModifyWriteCSR(io.rw.cmd, reg_mip.asUInt, io.rw.wdata).asTypeOf(new MIP) - if (usingSupervisor) { - reg_mip.ssip := new_mip.ssip - reg_mip.stip := new_mip.stip - reg_mip.seip := new_mip.seip - } - if (usingHypervisor) { - reg_mip.vssip := new_mip.vssip - } - } - when (decoded_addr(CSRs.mie)) { reg_mie := wdata & supported_interrupts } - when (decoded_addr(CSRs.mepc)) { reg_mepc := formEPC(wdata) } - when (decoded_addr(CSRs.mscratch)) { reg_mscratch := wdata } - if (mtvecWritable) - when (decoded_addr(CSRs.mtvec)) { reg_mtvec := wdata } - when (decoded_addr(CSRs.mcause)) { reg_mcause := wdata & ((BigInt(1) << (xLen-1)) + (BigInt(1) << whichInterrupt.getWidth) - 1).U } - when (decoded_addr(CSRs.mtval)) { reg_mtval := wdata } - - if (usingNMI) { - val new_mnstatus = wdata.asTypeOf(new MNStatus()) - when (decoded_addr(CustomCSRs.mnscratch)) { reg_mnscratch := wdata } - when (decoded_addr(CustomCSRs.mnepc)) { reg_mnepc := formEPC(wdata) } - when (decoded_addr(CustomCSRs.mncause)) { reg_mncause := wdata & ((BigInt(1) << (xLen-1)) + BigInt(3)).U } - when (decoded_addr(CustomCSRs.mnstatus)) { - reg_mnstatus.mpp := legalizePrivilege(new_mnstatus.mpp) - reg_mnstatus.mpv := usingHypervisor.B && new_mnstatus.mpv - reg_rnmie := reg_rnmie | new_mnstatus.mie // mnie bit settable but not clearable from software - } - } - - for (((e, c), i) <- (reg_hpmevent zip reg_hpmcounter).zipWithIndex) { - writeCounter(i + CSR.firstMHPC, c, wdata) - when (decoded_addr(i + CSR.firstHPE)) { e := perfEventSets.maskEventSelector(wdata) } - } - if (coreParams.haveBasicCounters) { - when (decoded_addr(CSRs.mcountinhibit)) { reg_mcountinhibit := wdata & ~2.U(xLen.W) } // mcountinhibit bit [1] is tied zero - writeCounter(CSRs.mcycle, reg_cycle, wdata) - writeCounter(CSRs.minstret, reg_instret, wdata) - } - - if (usingFPU) { - when (decoded_addr(CSRs.fflags)) { set_fs_dirty := true.B; reg_fflags := wdata } - when (decoded_addr(CSRs.frm)) { set_fs_dirty := true.B; reg_frm := wdata } - when (decoded_addr(CSRs.fcsr)) { - set_fs_dirty := true.B - reg_fflags := wdata - reg_frm := wdata >> reg_fflags.getWidth - } - } - if (usingDebug) { - when (decoded_addr(CSRs.dcsr)) { - val new_dcsr = wdata.asTypeOf(new DCSR()) - reg_dcsr.step := new_dcsr.step - reg_dcsr.ebreakm := new_dcsr.ebreakm - if (usingSupervisor) reg_dcsr.ebreaks := new_dcsr.ebreaks - if (usingUser) reg_dcsr.ebreaku := new_dcsr.ebreaku - if (usingUser) reg_dcsr.prv := legalizePrivilege(new_dcsr.prv) - if (usingHypervisor) reg_dcsr.v := new_dcsr.v - } - when (decoded_addr(CSRs.dpc)) { reg_dpc := formEPC(wdata) } - when (decoded_addr(CSRs.dscratch0)) { reg_dscratch0 := wdata } - reg_dscratch1.foreach { r => - when (decoded_addr(CSRs.dscratch1)) { r := wdata } - } - } - if (usingSupervisor) { - when (decoded_addr(CSRs.sstatus)) { - val new_sstatus = wdata.asTypeOf(new MStatus()) - reg_mstatus.sie := new_sstatus.sie - reg_mstatus.spie := new_sstatus.spie - reg_mstatus.spp := new_sstatus.spp - reg_mstatus.fs := formFS(new_sstatus.fs) - reg_mstatus.vs := formVS(new_sstatus.vs) - if (usingVM) { - reg_mstatus.mxr := new_sstatus.mxr - reg_mstatus.sum := new_sstatus.sum - } - } - when (decoded_addr(CSRs.sip)) { - val new_sip = ((read_mip & ~read_mideleg) | (wdata & read_mideleg)).asTypeOf(new MIP()) - reg_mip.ssip := new_sip.ssip - } - when (decoded_addr(CSRs.satp)) { - if (usingVM) { - val new_satp = wdata.asTypeOf(new PTBR()) - when (new_satp.mode.isOneOf(satp_valid_modes.map(_.U))) { - reg_satp.mode := new_satp.mode & satp_valid_modes.reduce(_|_).U - reg_satp.ppn := new_satp.ppn(ppnBits-1,0) - if (asIdBits > 0) reg_satp.asid := new_satp.asid(asIdBits-1,0) - } - } - } - when (decoded_addr(CSRs.sie)) { reg_mie := (reg_mie & ~sie_mask) | (wdata & sie_mask) } - when (decoded_addr(CSRs.sscratch)) { reg_sscratch := wdata } - when (decoded_addr(CSRs.sepc)) { reg_sepc := formEPC(wdata) } - when (decoded_addr(CSRs.stvec)) { reg_stvec := wdata } - when (decoded_addr(CSRs.scause)) { reg_scause := wdata & scause_mask } - when (decoded_addr(CSRs.stval)) { reg_stval := wdata } - when (decoded_addr(CSRs.mideleg)) { reg_mideleg := wdata } - when (decoded_addr(CSRs.medeleg)) { reg_medeleg := wdata } - when (decoded_addr(CSRs.scounteren)) { reg_scounteren := wdata } - } - - if (usingHypervisor) { - when (decoded_addr(CSRs.hstatus)) { - val new_hstatus = wdata.asTypeOf(new HStatus()) - reg_hstatus.gva := new_hstatus.gva - reg_hstatus.spv := new_hstatus.spv - reg_hstatus.spvp := new_hstatus.spvp - reg_hstatus.hu := new_hstatus.hu - reg_hstatus.vtvm := new_hstatus.vtvm - reg_hstatus.vtw := new_hstatus.vtw - reg_hstatus.vtsr := new_hstatus.vtsr - reg_hstatus.vsxl := new_hstatus.vsxl - } - when (decoded_addr(CSRs.hideleg)) { reg_hideleg := wdata } - when (decoded_addr(CSRs.hedeleg)) { reg_hedeleg := wdata } - when (decoded_addr(CSRs.hgatp)) { - val new_hgatp = wdata.asTypeOf(new PTBR()) - val valid_modes = 0 +: (minPgLevels to pgLevels).map(new_hgatp.pgLevelsToMode(_)) - when (new_hgatp.mode.isOneOf(valid_modes.map(_.U))) { - reg_hgatp.mode := new_hgatp.mode & valid_modes.reduce(_|_).U - } - reg_hgatp.ppn := Cat(new_hgatp.ppn(ppnBits-1,2), 0.U(2.W)) - if (vmIdBits > 0) reg_hgatp.asid := new_hgatp.asid(vmIdBits-1,0) - } - when (decoded_addr(CSRs.hip)) { - val new_hip = ((read_mip & ~hs_delegable_interrupts) | (wdata & hs_delegable_interrupts)).asTypeOf(new MIP()) - reg_mip.vssip := new_hip.vssip - } - when (decoded_addr(CSRs.hie)) { reg_mie := (reg_mie & ~hs_delegable_interrupts) | (wdata & hs_delegable_interrupts) } - when (decoded_addr(CSRs.hvip)) { - val new_sip = ((read_mip & ~hs_delegable_interrupts) | (wdata & hs_delegable_interrupts)).asTypeOf(new MIP()) - reg_mip.vssip := new_sip.vssip - reg_mip.vstip := new_sip.vstip - reg_mip.vseip := new_sip.vseip - } - when (decoded_addr(CSRs.hcounteren)) { reg_hcounteren := wdata } - when (decoded_addr(CSRs.htval)) { reg_htval := wdata } - when (decoded_addr(CSRs.mtval2)) { reg_mtval2 := wdata } - - when (decoded_addr(CSRs.vsstatus)) { - val new_vsstatus = wdata.asTypeOf(new MStatus()) - reg_vsstatus.sie := new_vsstatus.sie - reg_vsstatus.spie := new_vsstatus.spie - reg_vsstatus.spp := new_vsstatus.spp - reg_vsstatus.mxr := new_vsstatus.mxr - reg_vsstatus.sum := new_vsstatus.sum - reg_vsstatus.fs := formFS(new_vsstatus.fs) - reg_vsstatus.vs := formVS(new_vsstatus.vs) - } - when (decoded_addr(CSRs.vsip)) { - val new_vsip = ((read_hip & ~read_hideleg) | ((wdata << 1) & read_hideleg)).asTypeOf(new MIP()) - reg_mip.vssip := new_vsip.vssip - } - when (decoded_addr(CSRs.vsatp)) { - val new_vsatp = wdata.asTypeOf(new PTBR()) - val mode_ok = new_vsatp.mode.isOneOf(satp_valid_modes.map(_.U)) - when (mode_ok) { - reg_vsatp.mode := new_vsatp.mode & satp_valid_modes.reduce(_|_).U - } - when (mode_ok || !reg_mstatus.v) { - reg_vsatp.ppn := new_vsatp.ppn(vpnBits.min(new_vsatp.ppn.getWidth)-1,0) - if (asIdBits > 0) reg_vsatp.asid := new_vsatp.asid(asIdBits-1,0) - } - } - when (decoded_addr(CSRs.vsie)) { reg_mie := (reg_mie & ~read_hideleg) | ((wdata << 1) & read_hideleg) } - when (decoded_addr(CSRs.vsscratch)) { reg_vsscratch := wdata } - when (decoded_addr(CSRs.vsepc)) { reg_vsepc := formEPC(wdata) } - when (decoded_addr(CSRs.vstvec)) { reg_vstvec := wdata } - when (decoded_addr(CSRs.vscause)) { reg_vscause := wdata & scause_mask } - when (decoded_addr(CSRs.vstval)) { reg_vstval := wdata } - } - if (usingUser) { - when (decoded_addr(CSRs.mcounteren)) { reg_mcounteren := wdata } - } - if (nBreakpoints > 0) { - when (decoded_addr(CSRs.tselect)) { reg_tselect := wdata } - - for ((bp, i) <- reg_bp.zipWithIndex) { - when (i.U === reg_tselect && (!bp.control.dmode || reg_debug)) { - when (decoded_addr(CSRs.tdata2)) { bp.address := wdata } - when (decoded_addr(CSRs.tdata3)) { - if (coreParams.mcontextWidth > 0) { - bp.textra.mselect := wdata(bp.textra.mselectPos) - bp.textra.mvalue := wdata >> bp.textra.mvaluePos - } - if (coreParams.scontextWidth > 0) { - bp.textra.sselect := wdata(bp.textra.sselectPos) - bp.textra.svalue := wdata >> bp.textra.svaluePos - } - } - when (decoded_addr(CSRs.tdata1)) { - bp.control := wdata.asTypeOf(bp.control) - - val prevChain = if (i == 0) false.B else reg_bp(i-1).control.chain - val prevDMode = if (i == 0) false.B else reg_bp(i-1).control.dmode - val nextChain = if (i >= nBreakpoints-1) true.B else reg_bp(i+1).control.chain - val nextDMode = if (i >= nBreakpoints-1) true.B else reg_bp(i+1).control.dmode - val newBPC = readModifyWriteCSR(io.rw.cmd, bp.control.asUInt, io.rw.wdata).asTypeOf(bp.control) - val dMode = newBPC.dmode && reg_debug && (prevDMode || !prevChain) - bp.control.dmode := dMode - when (dMode || (newBPC.action > 1.U)) { bp.control.action := newBPC.action }.otherwise { bp.control.action := 0.U } - bp.control.chain := newBPC.chain && !(prevChain || nextChain) && (dMode || !nextDMode) - } - } - } - } - reg_mcontext.foreach { r => when (decoded_addr(CSRs.mcontext)) { r := wdata }} - reg_scontext.foreach { r => when (decoded_addr(CSRs.scontext)) { r := wdata }} - if (reg_pmp.nonEmpty) for (((pmp, next), i) <- (reg_pmp zip (reg_pmp.tail :+ reg_pmp.last)).zipWithIndex) { - require(xLen % pmp.cfg.getWidth == 0) - when (decoded_addr(CSRs.pmpcfg0 + pmpCfgIndex(i)) && !pmp.cfgLocked) { - val newCfg = (wdata >> ((i * pmp.cfg.getWidth) % xLen)).asTypeOf(new PMPConfig()) - pmp.cfg := newCfg - // disallow unreadable but writable PMPs - pmp.cfg.w := newCfg.w && newCfg.r - // can't select a=NA4 with coarse-grained PMPs - if (pmpGranularity.log2 > PMP.lgAlign) - pmp.cfg.a := Cat(newCfg.a(1), newCfg.a.orR) - } - when (decoded_addr(CSRs.pmpaddr0 + i) && !pmp.addrLocked(next)) { - pmp.addr := wdata - } - } - for ((io, csr, reg) <- (io.customCSRs, customCSRs, reg_custom).zipped) { - val mask = csr.mask.U(xLen.W) - when (decoded_addr(csr.id)) { - reg := (wdata & mask) | (reg & ~mask) - io.wen := true.B - } - } - if (usingVector) { - when (decoded_addr(CSRs.vstart)) { set_vs_dirty := true.B; reg_vstart.get := wdata } - when (decoded_addr(CSRs.vxrm)) { set_vs_dirty := true.B; reg_vxrm.get := wdata } - when (decoded_addr(CSRs.vxsat)) { set_vs_dirty := true.B; reg_vxsat.get := wdata } - when (decoded_addr(CSRs.vcsr)) { - set_vs_dirty := true.B - reg_vxsat.get := wdata - reg_vxrm.get := wdata >> 1 - } - } - } - - io.vector.map { vio => - when (vio.set_vconfig.valid) { - // user of CSRFile is responsible for set_vs_dirty in this case - assert(vio.set_vconfig.bits.vl <= vio.set_vconfig.bits.vtype.vlMax) - reg_vconfig.get := vio.set_vconfig.bits - } - when (vio.set_vstart.valid) { - set_vs_dirty := true.B - reg_vstart.get := vio.set_vstart.bits - } - vio.vstart := reg_vstart.get - vio.vconfig := reg_vconfig.get - vio.vxrm := reg_vxrm.get - - when (reset.asBool) { - reg_vconfig.get.vl := 0.U - reg_vconfig.get.vtype := 0.U.asTypeOf(new VType) - reg_vconfig.get.vtype.vill := true.B - } - } - - when(reset.asBool) { - reg_satp.mode := 0.U - reg_vsatp.mode := 0.U - reg_hgatp.mode := 0.U - } - if (!usingVM) { - reg_satp.mode := 0.U - reg_satp.ppn := 0.U - reg_satp.asid := 0.U - } - if (!usingHypervisor) { - reg_vsatp.mode := 0.U - reg_vsatp.ppn := 0.U - reg_vsatp.asid := 0.U - reg_hgatp.mode := 0.U - reg_hgatp.ppn := 0.U - reg_hgatp.asid := 0.U - } - if (!(asIdBits > 0)) { - reg_satp.asid := 0.U - reg_vsatp.asid := 0.U - } - if (!(vmIdBits > 0)) { - reg_hgatp.asid := 0.U - } - reg_vsstatus.xs := (if (usingRoCC) 3.U else 0.U) - - if (nBreakpoints <= 1) reg_tselect := 0.U - for (bpc <- reg_bp map {_.control}) { - bpc.ttype := bpc.tType.U - bpc.maskmax := bpc.maskMax.U - bpc.reserved := 0.U - bpc.zero := 0.U - bpc.h := false.B - if (!usingSupervisor) bpc.s := false.B - if (!usingUser) bpc.u := false.B - if (!usingSupervisor && !usingUser) bpc.m := true.B - when (reset.asBool) { - bpc.action := 0.U - bpc.dmode := false.B - bpc.chain := false.B - bpc.r := false.B - bpc.w := false.B - bpc.x := false.B - } - } - for (bpx <- reg_bp map {_.textra}) { - if (coreParams.mcontextWidth == 0) bpx.mselect := false.B - if (coreParams.scontextWidth == 0) bpx.sselect := false.B - } - for (bp <- reg_bp drop nBreakpoints) - bp := 0.U.asTypeOf(new BP()) - for (pmp <- reg_pmp) { - pmp.cfg.res := 0.U - when (reset.asBool) { pmp.reset() } - } - - for (((t, insn), i) <- (io.trace zip io.inst).zipWithIndex) { - t.exception := io.retire >= i.U && exception - t.valid := io.retire > i.U || t.exception - t.insn := insn - t.iaddr := io.pc - t.priv := Cat(reg_debug, reg_mstatus.prv) - t.cause := cause - t.interrupt := cause(xLen-1) - t.tval := io.tval - } - - def chooseInterrupt(masksIn: Seq[UInt]): (Bool, UInt) = { - val nonstandard = supported_interrupts.getWidth-1 to 12 by -1 - // MEI, MSI, MTI, SEI, SSI, STI, VSEI, VSSI, VSTI, UEI, USI, UTI - val standard = Seq(11, 3, 7, 9, 1, 5, 10, 2, 6, 8, 0, 4) - val priority = nonstandard ++ standard - val masks = masksIn.reverse - val any = masks.flatMap(m => priority.filter(_ < m.getWidth).map(i => m(i))).reduce(_||_) - val which = PriorityMux(masks.flatMap(m => priority.filter(_ < m.getWidth).map(i => (m(i), i.U)))) - (any, which) - } - - def readModifyWriteCSR(cmd: UInt, rdata: UInt, wdata: UInt) = { - (Mux(cmd(1), rdata, 0.U) | wdata) & ~Mux(cmd(1,0).andR, wdata, 0.U) - } - - def legalizePrivilege(priv: UInt): UInt = - if (usingSupervisor) Mux(priv === PRV.H.U, PRV.U.U, priv) - else if (usingUser) Fill(2, priv(0)) - else PRV.M.U - - def trimPrivilege(priv: UInt): UInt = - if (usingSupervisor) priv - else legalizePrivilege(priv) - - def writeCounter(lo: Int, ctr: WideCounter, wdata: UInt) = { - if (xLen == 32) { - val hi = lo + CSRs.mcycleh - CSRs.mcycle - when (decoded_addr(lo)) { ctr := Cat(ctr(ctr.getWidth-1, 32), wdata) } - when (decoded_addr(hi)) { ctr := Cat(wdata(ctr.getWidth-33, 0), ctr(31, 0)) } - } else { - when (decoded_addr(lo)) { ctr := wdata(ctr.getWidth-1, 0) } - } - } - def formEPC(x: UInt) = ~(~x | (if (usingCompressed) 1.U else 3.U)) - def readEPC(x: UInt) = ~(~x | Mux(reg_misa('c' - 'a'), 1.U, 3.U)) - def formTVec(x: UInt) = x andNot Mux(x(0), ((((BigInt(1) << mtvecInterruptAlign) - 1) << mtvecBaseAlign) | 2).U, 2.U) - def isaStringToMask(s: String) = s.map(x => 1 << (x - 'A')).foldLeft(0)(_|_) - def formFS(fs: UInt) = if (coreParams.haveFSDirty) fs else Fill(2, fs.orR) - def formVS(vs: UInt) = if (usingVector) vs else 0.U -} diff --git a/diplomatic/src/rocket/Core.scala b/diplomatic/src/rocket/Core.scala index 0346b219e..50122e586 100644 --- a/diplomatic/src/rocket/Core.scala +++ b/diplomatic/src/rocket/Core.scala @@ -5,6 +5,7 @@ package org.chipsalliance.rockettile import Chisel._ import org.chipsalliance.cde.config._ import freechips.rocketchip.util._ +import freechips.rocketchip.rocket.{TracedInstruction} import org.chipsalliance.rocket._ case object XLen extends Field[Int] @@ -54,7 +55,7 @@ trait CoreParams { val mtvecWritable: Boolean val traceHasWdata: Boolean def customIsaExt: Option[String] = None - def customCSRs(implicit p: Parameters): CustomCSRs = new CustomCSRs + def customCSRs(implicit p: Parameters): CustomCSRs = new CustomCSRs(p(XLen)) def hasSupervisorMode: Boolean = useSupervisor || useVM def hasBitManipCrypto: Boolean = useBitManipCrypto || useCryptoNIST || useCryptoSM @@ -154,8 +155,15 @@ trait HasCoreIO extends HasTileParameters { val reset_vector = UInt(resetVectorLen.W).asInput val interrupts = new CoreInterrupts().asInput val imem = new FrontendIO - val dmem = new HellaCacheIO - val ptw = new DatapathPTWIO().flip + val dmem = new HellaCacheIO(DCacheParams( + xLen, paddrBits, vaddrBitsExtended, coreDataBits, coreMaxAddrBits, + cacheBlockBytes, pgIdxBits, coreMaxAddrBits, coreDataBits, + coreParams.lrscCycles, coreParams.dcacheReqTagBits, dcacheArbPorts, usingVM + )) // TODO: `addressBits` and `dataBits` might not be correct + val ptw = new DatapathPTWIO( + xLen, pgLevels, pgLevelBits, minPgLevels, maxPAddrBits, pgIdxBits, + vaddrBits, paddrBits, asIdBits, pmpGranularity, nPMPs, coreParams.customCSRs + ).flip val fpu = new FPUCoreIO(hartIdLen, xLen, fLen).flip val rocc = new RoCCCoreIO().flip val trace = Vec(coreParams.retireWidth, new TracedInstruction).asOutput diff --git a/diplomatic/src/rocket/HellaCache.scala b/diplomatic/src/rocket/HellaCache.scala deleted file mode 100644 index 5e634b29e..000000000 --- a/diplomatic/src/rocket/HellaCache.scala +++ /dev/null @@ -1,334 +0,0 @@ -// See LICENSE.SiFive for license details. -// See LICENSE.Berkeley for license details. - -package org.chipsalliance.rocket - -import chisel3._ -import chisel3.util.{isPow2,log2Ceil,log2Up,Decoupled,Valid} -import chisel3.dontTouch -import freechips.rocketchip.amba._ -import org.chipsalliance.cde.config.{Parameters, Field} -import freechips.rocketchip.diplomacy._ -import org.chipsalliance.rockettile._ -import freechips.rocketchip.tilelink._ -import freechips.rocketchip.util._ -import scala.collection.mutable.ListBuffer - -case class DCacheParams( - nSets: Int = 64, - nWays: Int = 4, - rowBits: Int = 64, - subWordBits: Option[Int] = None, - replacementPolicy: String = "random", - nTLBSets: Int = 1, - nTLBWays: Int = 32, - nTLBBasePageSectors: Int = 4, - nTLBSuperpages: Int = 4, - tagECC: Option[String] = None, - dataECC: Option[String] = None, - dataECCBytes: Int = 1, - nMSHRs: Int = 1, - nSDQ: Int = 17, - nRPQ: Int = 16, - nMMIOs: Int = 1, - blockBytes: Int = 64, - separateUncachedResp: Boolean = false, - acquireBeforeRelease: Boolean = false, - pipelineWayMux: Boolean = false, - clockGate: Boolean = false, - scratch: Option[BigInt] = None) extends L1CacheParams { - - def tagCode: Code = Code.fromString(tagECC) - def dataCode: Code = Code.fromString(dataECC) - - def dataScratchpadBytes: Int = scratch.map(_ => nSets*blockBytes).getOrElse(0) - - def replacement = new RandomReplacement(nWays) - - def silentDrop: Boolean = !acquireBeforeRelease - - require((!scratch.isDefined || nWays == 1), - "Scratchpad only allowed in direct-mapped cache.") - require((!scratch.isDefined || nMSHRs == 0), - "Scratchpad only allowed in blocking cache.") - if (scratch.isEmpty) - require(isPow2(nSets), s"nSets($nSets) must be pow2") -} - -trait HasL1HellaCacheParameters extends HasL1CacheParameters with HasCoreParameters { - val cacheParams = tileParams.dcache.get - val cfg = cacheParams - - def wordBits = coreDataBits - def wordBytes = coreDataBytes - def subWordBits = cacheParams.subWordBits.getOrElse(wordBits) - def subWordBytes = subWordBits / 8 - def wordOffBits = log2Up(wordBytes) - def beatBytes = cacheBlockBytes / cacheDataBeats - def beatWords = beatBytes / wordBytes - def beatOffBits = log2Up(beatBytes) - def idxMSB = untagBits-1 - def idxLSB = blockOffBits - def offsetmsb = idxLSB-1 - def offsetlsb = wordOffBits - def rowWords = rowBits/wordBits - def doNarrowRead = coreDataBits * nWays % rowBits == 0 - def eccBytes = cacheParams.dataECCBytes - val eccBits = cacheParams.dataECCBytes * 8 - val encBits = cacheParams.dataCode.width(eccBits) - val encWordBits = encBits * (wordBits / eccBits) - def encDataBits = cacheParams.dataCode.width(coreDataBits) // NBDCache only - def encRowBits = encDataBits*rowWords - def lrscCycles = coreParams.lrscCycles // ISA requires 16-insn LRSC sequences to succeed - def lrscBackoff = 3 // disallow LRSC reacquisition briefly - def blockProbeAfterGrantCycles = 8 // give the processor some time to issue a request after a grant - def nIOMSHRs = cacheParams.nMMIOs - def maxUncachedInFlight = cacheParams.nMMIOs - def dataScratchpadSize = cacheParams.dataScratchpadBytes - - require(rowBits >= coreDataBits, s"rowBits($rowBits) < coreDataBits($coreDataBits)") - if (!usingDataScratchpad) - require(rowBits == cacheDataBits, s"rowBits($rowBits) != cacheDataBits($cacheDataBits)") - // would need offset addr for puts if data width < xlen - require(xLen <= cacheDataBits, s"xLen($xLen) > cacheDataBits($cacheDataBits)") -} - -abstract class L1HellaCacheModule(implicit val p: Parameters) extends Module - with HasL1HellaCacheParameters - -abstract class L1HellaCacheBundle(implicit val p: Parameters) extends ParameterizedBundle()(p) - with HasL1HellaCacheParameters - -/** Bundle definitions for HellaCache interfaces */ - -trait HasCoreMemOp extends HasL1HellaCacheParameters { - val addr = UInt(coreMaxAddrBits.W) - val idx = (usingVM && untagBits > pgIdxBits).option(UInt(coreMaxAddrBits.W)) - val tag = UInt((coreParams.dcacheReqTagBits + log2Ceil(dcacheArbPorts)).W) - val cmd = UInt(M_SZ.W) - val size = UInt(log2Ceil(coreDataBytes.log2 + 1).W) - val signed = Bool() - val dprv = UInt(PRV.SZ.W) - val dv = Bool() -} - -trait HasCoreData extends HasCoreParameters { - val data = UInt(coreDataBits.W) - val mask = UInt(coreDataBytes.W) -} - -class HellaCacheReqInternal(implicit p: Parameters) extends CoreBundle()(p) with HasCoreMemOp { - val phys = Bool() - val no_alloc = Bool() - val no_xcpt = Bool() -} - -class HellaCacheReq(implicit p: Parameters) extends HellaCacheReqInternal()(p) with HasCoreData - -class HellaCacheResp(implicit p: Parameters) extends CoreBundle()(p) - with HasCoreMemOp - with HasCoreData { - val replay = Bool() - val has_data = Bool() - val data_word_bypass = UInt(coreDataBits.W) - val data_raw = UInt(coreDataBits.W) - val store_data = UInt(coreDataBits.W) -} - -class AlignmentExceptions extends Bundle { - val ld = Bool() - val st = Bool() -} - -class HellaCacheExceptions extends Bundle { - val ma = new AlignmentExceptions - val pf = new AlignmentExceptions - val gf = new AlignmentExceptions - val ae = new AlignmentExceptions -} - -class HellaCacheWriteData(implicit p: Parameters) extends CoreBundle()(p) with HasCoreData - -class HellaCachePerfEvents extends Bundle { - val acquire = Bool() - val release = Bool() - val grant = Bool() - val tlbMiss = Bool() - val blocked = Bool() - val canAcceptStoreThenLoad = Bool() - val canAcceptStoreThenRMW = Bool() - val canAcceptLoadThenLoad = Bool() - val storeBufferEmptyAfterLoad = Bool() - val storeBufferEmptyAfterStore = Bool() -} - -// interface between D$ and processor/DTLB -class HellaCacheIO(implicit p: Parameters) extends CoreBundle()(p) { - val req = Decoupled(new HellaCacheReq) - val s1_kill = Output(Bool()) // kill previous cycle's req - val s1_data = Output(new HellaCacheWriteData()) // data for previous cycle's req - val s2_nack = Input(Bool()) // req from two cycles ago is rejected - val s2_nack_cause_raw = Input(Bool()) // reason for nack is store-load RAW hazard (performance hint) - val s2_kill = Output(Bool()) // kill req from two cycles ago - val s2_uncached = Input(Bool()) // advisory signal that the access is MMIO - val s2_paddr = Input(UInt(paddrBits.W)) // translated address - - val resp = Flipped(Valid(new HellaCacheResp)) - val replay_next = Input(Bool()) - val s2_xcpt = Input(new HellaCacheExceptions) - val s2_gpa = Input(UInt(vaddrBitsExtended.W)) - val s2_gpa_is_pte = Input(Bool()) - val uncached_resp = tileParams.dcache.get.separateUncachedResp.option(Flipped(Decoupled(new HellaCacheResp))) - val ordered = Input(Bool()) - val perf = Input(new HellaCachePerfEvents()) - - val keep_clock_enabled = Output(Bool()) // should D$ avoid clock-gating itself? - val clock_enabled = Input(Bool()) // is D$ currently being clocked? -} - -/** Base classes for Diplomatic TL2 HellaCaches */ - -abstract class HellaCache(staticIdForMetadataUseOnly: Int)(implicit p: Parameters) extends LazyModule - with HasNonDiplomaticTileParameters { - protected val cfg = tileParams.dcache.get - - protected def cacheClientParameters = cfg.scratch.map(x => Seq()).getOrElse(Seq(TLMasterParameters.v1( - name = s"Core ${staticIdForMetadataUseOnly} DCache", - sourceId = IdRange(0, 1 max cfg.nMSHRs), - supportsProbe = TransferSizes(cfg.blockBytes, cfg.blockBytes)))) - - protected def mmioClientParameters = Seq(TLMasterParameters.v1( - name = s"Core ${staticIdForMetadataUseOnly} DCache MMIO", - sourceId = IdRange(firstMMIO, firstMMIO + cfg.nMMIOs), - requestFifo = true)) - - def firstMMIO = (cacheClientParameters.map(_.sourceId.end) :+ 0).max - - val node = TLClientNode(Seq(TLMasterPortParameters.v1( - clients = cacheClientParameters ++ mmioClientParameters, - minLatency = 1, - requestFields = tileParams.core.useVM.option(Seq()).getOrElse(Seq(AMBAProtField()))))) - - val hartIdSinkNodeOpt = cfg.scratch.map(_ => BundleBridgeSink[UInt]()) - val mmioAddressPrefixSinkNodeOpt = cfg.scratch.map(_ => BundleBridgeSink[UInt]()) - - val module: HellaCacheModule - - def flushOnFenceI = cfg.scratch.isEmpty && !node.edges.out(0).manager.managers.forall(m => !m.supportsAcquireB || !m.executable || m.regionType >= RegionType.TRACKED || m.regionType <= RegionType.IDEMPOTENT) - - def canSupportCFlushLine = !usingVM || cfg.blockBytes * cfg.nSets <= (1 << pgIdxBits) - - require(!tileParams.core.haveCFlush || cfg.scratch.isEmpty, "CFLUSH_D_L1 instruction requires a D$") -} - -class HellaCacheBundle(val outer: HellaCache)(implicit p: Parameters) extends CoreBundle()(p) { - val cpu = Flipped((new HellaCacheIO)) - val ptw = new TLBPTWIO() - val errors = new DCacheErrors -} - -class HellaCacheModule(outer: HellaCache) extends LazyModuleImp(outer) - with HasL1HellaCacheParameters { - implicit val edge = outer.node.edges.out(0) - val (tl_out, _) = outer.node.out(0) - val io = IO(new HellaCacheBundle(outer)) - val io_hartid = outer.hartIdSinkNodeOpt.map(_.bundle) - val io_mmio_address_prefix = outer.mmioAddressPrefixSinkNodeOpt.map(_.bundle) - dontTouch(io.cpu.resp) // Users like to monitor these fields even if the core ignores some signals - dontTouch(io.cpu.s1_data) - - private val fifoManagers = edge.manager.managers.filter(TLFIFOFixer.allVolatile) - fifoManagers.foreach { m => - require (m.fifoId == fifoManagers.head.fifoId, - s"IOMSHRs must be FIFO for all regions with effects, but HellaCache sees\n"+ - s"${m.nodePath.map(_.name)}\nversus\n${fifoManagers.head.nodePath.map(_.name)}") - } -} - -/** Support overriding which HellaCache is instantiated */ - -case object BuildHellaCache extends Field[BaseTile => Parameters => HellaCache](HellaCacheFactory.apply) - -object HellaCacheFactory { - def apply(tile: BaseTile)(p: Parameters): HellaCache = { - if (tile.tileParams.dcache.get.nMSHRs == 0) - new DCache(tile.staticIdForMetadataUseOnly, tile.crossing)(p) - else - new NonBlockingDCache(tile.staticIdForMetadataUseOnly)(p) - } -} - -/** Mix-ins for constructing tiles that have a HellaCache */ - -trait HasHellaCache { this: BaseTile => - val module: HasHellaCacheModule - implicit val p: Parameters - var nDCachePorts = 0 - lazy val dcache: HellaCache = LazyModule(p(BuildHellaCache)(this)(p)) - - tlMasterXbar.node := dcache.node - dcache.hartIdSinkNodeOpt.map { _ := hartIdNexusNode } - dcache.mmioAddressPrefixSinkNodeOpt.map { _ := mmioAddressPrefixNexusNode } -} - -trait HasHellaCacheModule { - val outer: HasHellaCache with HasTileParameters - implicit val p: Parameters - val dcachePorts = ListBuffer[HellaCacheIO]() - val dcacheArb = Module(new HellaCacheArbiter(outer.nDCachePorts)(outer.p)) - outer.dcache.module.io.cpu <> dcacheArb.io.mem -} - -/** Metadata array used for all HellaCaches */ - -class L1Metadata(implicit p: Parameters) extends L1HellaCacheBundle()(p) { - val coh = new ClientMetadata - val tag = UInt(tagBits.W) -} - -object L1Metadata { - def apply(tag: Bits, coh: ClientMetadata)(implicit p: Parameters) = { - val meta = Wire(new L1Metadata) - meta.tag := tag - meta.coh := coh - meta - } -} - -class L1MetaReadReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) { - val idx = UInt(idxBits.W) - val way_en = UInt(nWays.W) - val tag = UInt(tagBits.W) -} - -class L1MetaWriteReq(implicit p: Parameters) extends L1MetaReadReq()(p) { - val data = new L1Metadata -} - -class L1MetadataArray[T <: L1Metadata](onReset: () => T)(implicit p: Parameters) extends L1HellaCacheModule()(p) { - val rstVal = onReset() - val io = IO(new Bundle { - val read = Flipped(Decoupled(new L1MetaReadReq)) - val write = Flipped(Decoupled(new L1MetaWriteReq)) - val resp = Output(Vec(nWays, rstVal.cloneType)) - }) - - val rst_cnt = RegInit(0.U(log2Up(nSets+1).W)) - val rst = rst_cnt < nSets.U - val waddr = Mux(rst, rst_cnt, io.write.bits.idx) - val wdata = Mux(rst, rstVal, io.write.bits.data).asUInt - val wmask = Mux(rst || (nWays == 1).B, (-1).S, io.write.bits.way_en.asSInt).asBools - val rmask = Mux(rst || (nWays == 1).B, (-1).S, io.read.bits.way_en.asSInt).asBools - when (rst) { rst_cnt := rst_cnt+1.U } - - val metabits = rstVal.getWidth - val tag_array = SyncReadMem(nSets, Vec(nWays, UInt(metabits.W))) - val wen = rst || io.write.valid - when (wen) { - tag_array.write(waddr, VecInit.fill(nWays)(wdata), wmask) - } - io.resp := tag_array.read(io.read.bits.idx, io.read.fire()).map(_.asTypeOf(chiselTypeOf(rstVal))) - - io.read.ready := !wen // so really this could be a 6T RAM - io.write.ready := !rst -} diff --git a/diplomatic/src/rocket/RocketCore.scala b/diplomatic/src/rocket/RocketCore.scala index 523819d94..bc0060336 100644 --- a/diplomatic/src/rocket/RocketCore.scala +++ b/diplomatic/src/rocket/RocketCore.scala @@ -11,6 +11,7 @@ import org.chipsalliance.rockettile._ import freechips.rocketchip.util._ import freechips.rocketchip.util.property import scala.collection.mutable.ArrayBuffer +import org.chipsalliance.rockettile.CustomCSRs case class RocketCoreParams( bootFreqHz: BigInt = 0, @@ -65,7 +66,7 @@ case class RocketCoreParams( val traceHasWdata: Boolean = false // ooo wb, so no wdata in trace override val customIsaExt = Some("Xrocket") // CEASE instruction override def minFLen: Int = fpu.map(_.minFLen).getOrElse(32) - override def customCSRs(implicit p: Parameters) = new RocketCustomCSRs + override def customCSRs(implicit p: Parameters) = new CustomCSRs } trait HasRocketCoreParameters extends HasCoreParameters { diff --git a/rocket/src/CSR.scala b/rocket/src/CSR.scala new file mode 100644 index 000000000..947f7974f --- /dev/null +++ b/rocket/src/CSR.scala @@ -0,0 +1,140 @@ +// See LICENSE.SiFive for license details. +// See LICENSE.Berkeley for license details. + +package org.chipsalliance.rocket + +import chisel3._ +import chisel3.util._ + +object CSR +{ + // commands + val SZ = 3 + def X = BitPat.dontCare(SZ) + def N = 0.U(SZ.W) + def R = 2.U(SZ.W) + def I = 4.U(SZ.W) + def W = 5.U(SZ.W) + def S = 6.U(SZ.W) + def C = 7.U(SZ.W) + + // mask a CSR cmd with a valid bit + def maskCmd(valid: Bool, cmd: UInt): UInt = { + // all commands less than CSR.I are treated by CSRFile as NOPs + cmd & ~Mux(valid, 0.U, CSR.I) + } + + val ADDRSZ = 12 + + def modeLSB: Int = 8 + def mode(addr: Int): Int = (addr >> modeLSB) % (1 << PRV.SZ) + def mode(addr: UInt): UInt = addr(modeLSB + PRV.SZ - 1, modeLSB) + + def busErrorIntCause = 128 + def debugIntCause = 14 // keep in sync with MIP.debug + def debugTriggerCause = { + val res = debugIntCause + require(!(Causes.all contains res)) + res + } + def rnmiIntCause = 13 // NMI: Higher numbers = higher priority, must not reuse debugIntCause + def rnmiBEUCause = 12 + + val firstCtr = CSRs.cycle + val firstCtrH = CSRs.cycleh + val firstHPC = CSRs.hpmcounter3 + val firstHPCH = CSRs.hpmcounter3h + val firstHPE = CSRs.mhpmevent3 + val firstMHPC = CSRs.mhpmcounter3 + val firstMHPCH = CSRs.mhpmcounter3h + val firstHPM = 3 + val nCtr = 32 + val nHPM = nCtr - firstHPM + val hpmWidth = 40 + + val maxPMPs = 16 +} + + +class MStatus extends Bundle { + // not truly part of mstatus, but convenient + val debug = Bool() + val cease = Bool() + val wfi = Bool() + val isa = UInt(32.W) + + val dprv = UInt(PRV.SZ.W) // effective prv for data accesses + val dv = Bool() // effective v for data accesses + val prv = UInt(PRV.SZ.W) + val v = Bool() + + val sd = Bool() + val zero2 = UInt(23.W) + val mpv = Bool() + val gva = Bool() + val mbe = Bool() + val sbe = Bool() + val sxl = UInt(2.W) + val uxl = UInt(2.W) + val sd_rv32 = Bool() + val zero1 = UInt(8.W) + val tsr = Bool() + val tw = Bool() + val tvm = Bool() + val mxr = Bool() + val sum = Bool() + val mprv = Bool() + val xs = UInt(2.W) + val fs = UInt(2.W) + val mpp = UInt(2.W) + val vs = UInt(2.W) + val spp = UInt(1.W) + val mpie = Bool() + val ube = Bool() + val spie = Bool() + val upie = Bool() + val mie = Bool() + val hie = Bool() + val sie = Bool() + val uie = Bool() +} + +class HStatus extends Bundle { + val zero6 = UInt(30.W) + val vsxl = UInt(2.W) + val zero5 = UInt(9.W) + val vtsr = Bool() + val vtw = Bool() + val vtvm = Bool() + val zero3 = UInt(2.W) + val vgein = UInt(6.W) + val zero2 = UInt(2.W) + val hu = Bool() + val spvp = Bool() + val spv = Bool() + val gva = Bool() + val vsbe = Bool() + val zero1 = UInt(5.W) +} + +class PTBR( + xLen: Int, + pgLevels: Int, + minPgLevels: Int, + maxPAddrBits: Int, + pgIdxBits: Int) extends Bundle { + def additionalPgLevels = mode(log2Ceil(pgLevels-minPgLevels+1)-1, 0) + def pgLevelsToMode(i: Int) = (xLen, i) match { + case (32, 2) => 1 + case (64, x) if x >= 3 && x <= 6 => x + 5 + } + val (modeBits, maxASIdBits) = xLen match { + case 32 => (1, 9) + case 64 => (4, 16) + } + require(modeBits + maxASIdBits + maxPAddrBits - pgIdxBits == xLen) + + val mode = UInt(modeBits.W) + val asid = UInt(maxASIdBits.W) + val ppn = UInt((maxPAddrBits - pgIdxBits).W) +} \ No newline at end of file diff --git a/rocket/src/ConstInCSR.scala b/rocket/src/ConstInCSR.scala index b2e67930e..576edb669 100644 --- a/rocket/src/ConstInCSR.scala +++ b/rocket/src/ConstInCSR.scala @@ -14,53 +14,4 @@ object PRV val S = 1 val H = 2 val M = 3 -} - -object CSR -{ - // commands - val SZ = 3 - def X = BitPat.dontCare(SZ) - def N = 0.U(SZ.W) - def R = 2.U(SZ.W) - def I = 4.U(SZ.W) - def W = 5.U(SZ.W) - def S = 6.U(SZ.W) - def C = 7.U(SZ.W) - - // mask a CSR cmd with a valid bit - def maskCmd(valid: Bool, cmd: UInt): UInt = { - // all commands less than CSR.I are treated by CSRFile as NOPs - cmd & ~Mux(valid, 0.U, CSR.I) - } - - val ADDRSZ = 12 - - def modeLSB: Int = 8 - def mode(addr: Int): Int = (addr >> modeLSB) % (1 << PRV.SZ) - def mode(addr: UInt): UInt = addr(modeLSB + PRV.SZ - 1, modeLSB) - - def busErrorIntCause = 128 - def debugIntCause = 14 // keep in sync with MIP.debug - def debugTriggerCause = { - val res = debugIntCause - require(!(Causes.all contains res)) - res - } - def rnmiIntCause = 13 // NMI: Higher numbers = higher priority, must not reuse debugIntCause - def rnmiBEUCause = 12 - - val firstCtr = CSRs.cycle - val firstCtrH = CSRs.cycleh - val firstHPC = CSRs.hpmcounter3 - val firstHPCH = CSRs.hpmcounter3h - val firstHPE = CSRs.mhpmevent3 - val firstMHPC = CSRs.mhpmcounter3 - val firstMHPCH = CSRs.mhpmcounter3h - val firstHPM = 3 - val nCtr = 32 - val nHPM = nCtr - firstHPM - val hpmWidth = 40 - - val maxPMPs = 16 -} +} \ No newline at end of file diff --git a/diplomatic/src/rocket/CustomCSRs.scala b/rocket/src/CustomCSRs.scala similarity index 73% rename from diplomatic/src/rocket/CustomCSRs.scala rename to rocket/src/CustomCSRs.scala index f2424da45..1e74c3aa8 100644 --- a/diplomatic/src/rocket/CustomCSRs.scala +++ b/rocket/src/CustomCSRs.scala @@ -1,24 +1,37 @@ // See LICENSE.SiFive for license details. -package org.chipsalliance.rockettile +package org.chipsalliance.rocket import chisel3._ -import org.chipsalliance.cde.config.Parameters - case class CustomCSR(id: Int, mask: BigInt, init: Option[BigInt]) -object CustomCSR { - def constant(id: Int, value: BigInt): CustomCSR = CustomCSR(id, BigInt(0), Some(value)) +object CustomCSRs { + val mnscratch = 0x350 + val mnepc = 0x351 + val mncause = 0x352 + val mnstatus = 0x353 + val all = { + val res = collection.mutable.ArrayBuffer[Int]() + res += mnscratch + res += mnepc + res += mncause + res += mnstatus + res.toArray + } + val all32 = { + val res = collection.mutable.ArrayBuffer(all:_*) + res.toArray + } } -class CustomCSRIO(implicit p: Parameters) extends CoreBundle { +class CustomCSRIO(xLen: Int) extends Bundle { val wen = Bool() val wdata = UInt(xLen.W) val value = UInt(xLen.W) } -class CustomCSRs(implicit p: Parameters) extends CoreBundle { +class CustomCSRs(xLen: Int) extends Bundle { // Not all cores have these CSRs, but those that do should follow the same // numbering conventions. So we list them here but default them to None. protected def bpmCSRId = 0x7c0 @@ -30,7 +43,7 @@ class CustomCSRs(implicit p: Parameters) extends CoreBundle { // If you override this, you'll want to concatenate super.decls def decls: Seq[CustomCSR] = bpmCSR.toSeq ++ chickenCSR - val csrs = Vec(decls.size, new CustomCSRIO) + val csrs = Vec(decls.size, new CustomCSRIO(xLen)) def flushBTB = getOrElse(bpmCSR, _.wen, false.B) def bpmStatic = getOrElse(bpmCSR, _.value(0), false.B) diff --git a/rocket/src/CustomInstructions.scala b/rocket/src/CustomInstructions.scala index f4770184c..9411f9229 100644 --- a/rocket/src/CustomInstructions.scala +++ b/rocket/src/CustomInstructions.scala @@ -35,22 +35,3 @@ object CustomInstructions { def CUSTOM3_RD_RS1 = BitPat("b?????????????????110?????1111011") def CUSTOM3_RD_RS1_RS2 = BitPat("b?????????????????111?????1111011") } - -object CustomCSRs { - val mnscratch = 0x350 - val mnepc = 0x351 - val mncause = 0x352 - val mnstatus = 0x353 - val all = { - val res = collection.mutable.ArrayBuffer[Int]() - res += mnscratch - res += mnepc - res += mncause - res += mnstatus - res.toArray - } - val all32 = { - val res = collection.mutable.ArrayBuffer(all:_*) - res.toArray - } -} diff --git a/diplomatic/src/rocket/DCache.scala b/rocket/src/DCache.scala similarity index 72% rename from diplomatic/src/rocket/DCache.scala rename to rocket/src/DCache.scala index f15c97a07..4fb3b5006 100644 --- a/diplomatic/src/rocket/DCache.scala +++ b/rocket/src/DCache.scala @@ -4,16 +4,9 @@ package org.chipsalliance.rocket import chisel3._ import chisel3.util._ -import freechips.rocketchip.amba._ -import org.chipsalliance.cde.config.Parameters -import freechips.rocketchip.diplomacy._ -import org.chipsalliance.rockettile.{CoreBundle, LookupByHartId} -import freechips.rocketchip.tilelink._ -import freechips.rocketchip.util._ -import freechips.rocketchip.util.property -import chisel3.{DontCare, WireInit, dontTouch, withClock} import chisel3.internal.sourceinfo.SourceInfo -import TLMessages._ +import org.chipsalliance.rocket.MemoryOpConstants._ +import org.chipsalliance.rocket.util._ // TODO: delete this trait once deduplication is smart enough to avoid globally inlining matching circuits trait InlineInstance { self: chisel3.experimental.BaseModule => @@ -22,83 +15,82 @@ trait InlineInstance { self: chisel3.experimental.BaseModule => def toFirrtl: firrtl.annotations.Annotation = firrtl.passes.InlineAnnotation(self.toNamed) } ) } -class DCacheErrors(implicit p: Parameters) extends L1HellaCacheBundle()(p) - with CanHaveErrors { - val correctable = (cacheParams.tagCode.canCorrect || cacheParams.dataCode.canCorrect).option(Valid(UInt(paddrBits.W))) - val uncorrectable = (cacheParams.tagCode.canDetect || cacheParams.dataCode.canDetect).option(Valid(UInt(paddrBits.W))) - val bus = Valid(UInt(paddrBits.W)) +class DCacheErrors(cacheParams: DCacheParams) extends Bundle { + val correctable = (cacheParams.tagCode.canCorrect || cacheParams.dataCode.canCorrect).option(Valid(UInt(cacheParams.paddrBits.W))) + val uncorrectable = (cacheParams.tagCode.canDetect || cacheParams.dataCode.canDetect).option(Valid(UInt(cacheParams.paddrBits.W))) + val bus = Valid(UInt(cacheParams.paddrBits.W)) } -class DCacheDataReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) { - val addr = UInt(untagBits.W) +class DCacheDataReq(cacheParams: DCacheParams) extends Bundle { + val addr = UInt(cacheParams.untagBits.W) val write = Bool() - val wdata = UInt((encBits * rowBytes / eccBytes).W) - val wordMask = UInt((rowBytes / subWordBytes).W) - val eccMask = UInt((wordBytes / eccBytes).W) - val way_en = UInt(nWays.W) + val wdata = UInt((cacheParams.encBits * cacheParams.rowBytes / cacheParams.eccBytes).W) + val wordMask = UInt((cacheParams.rowBytes / cacheParams.subWordBytes).W) + val eccMask = UInt((cacheParams.wordBytes / cacheParams.eccBytes).W) + val way_en = UInt(cacheParams.nWays.W) } -class DCacheDataArray(implicit p: Parameters) extends L1HellaCacheModule()(p) { +class DCacheDataArray(cacheParams: DCacheParams) extends Module { val io = IO(new Bundle { - val req = Flipped(Valid(new DCacheDataReq)) - val resp = Output(Vec(nWays, UInt((req.bits.wdata.getWidth).W))) + val req = Flipped(Valid(new DCacheDataReq(cacheParams))) + val resp = Output(Vec(cacheParams.nWays, UInt((req.bits.wdata.getWidth).W))) }) - require(rowBits % subWordBits == 0, "rowBits must be a multiple of subWordBits") - val eccMask = if (eccBits == subWordBits) Seq(true.B) else io.req.bits.eccMask.asBools - val wMask = if (nWays == 1) eccMask else (0 until nWays).flatMap(i => eccMask.map(_ && io.req.bits.way_en(i))) - val wWords = io.req.bits.wdata.grouped(encBits * (subWordBits / eccBits)) - val addr = io.req.bits.addr >> rowOffBits - val data_arrays = Seq.tabulate(rowBits / subWordBits) { + require(cacheParams.rowBits % cacheParams.subWordBits == 0, "rowBits must be a multiple of subWordBits") + val eccMask = if (cacheParams.eccBits == cacheParams.subWordBits) Seq(true.B) else io.req.bits.eccMask.asBools + val wMask = if (cacheParams.nWays == 1) eccMask else (0 until cacheParams.nWays).flatMap(i => eccMask.map(_ && io.req.bits.way_en(i))) + val wWords = io.req.bits.wdata.grouped(cacheParams.encBits * (cacheParams.subWordBits / cacheParams.eccBits)) + val addr = io.req.bits.addr >> cacheParams.rowOffBits + val data_arrays = Seq.tabulate(cacheParams.rowBits / cacheParams.subWordBits) { i => DescribedSRAM( name = s"data_arrays_${i}", desc = "DCache Data Array", - size = nSets * cacheBlockBytes / rowBytes, - data = Vec(nWays * (subWordBits / eccBits), UInt(encBits.W)) + size = cacheParams.nSets * cacheParams.cacheBlockBytes / cacheParams.rowBytes, + data = Vec(cacheParams.nWays * (cacheParams.subWordBits / cacheParams.eccBits), UInt(cacheParams.encBits.W)) ) } val rdata = for ((array , i) <- data_arrays.zipWithIndex) yield { val valid = io.req.valid && ((data_arrays.size == 1).B || io.req.bits.wordMask(i)) when (valid && io.req.bits.write) { - val wMaskSlice = (0 until wMask.size).filter(j => i % (wordBits/subWordBits) == (j % (wordBytes/eccBytes)) / (subWordBytes/eccBytes)).map(wMask(_)) - val wData = wWords(i).grouped(encBits) - array.write(addr, VecInit((0 until nWays).flatMap(i => wData)), wMaskSlice) + val wMaskSlice = (0 until wMask.size).filter(j => i % (cacheParams.wordBits/cacheParams.subWordBits) == (j % (cacheParams.wordBytes/cacheParams.eccBytes)) / (cacheParams.subWordBytes/cacheParams.eccBytes)).map(wMask(_)) + val wData = wWords(i).grouped(cacheParams.encBits) + array.write(addr, VecInit((0 until cacheParams.nWays).flatMap(i => wData)), wMaskSlice) } val data = array.read(addr, valid && !io.req.bits.write) - data.grouped(subWordBits / eccBits).map(_.asUInt).toSeq + data.grouped(cacheParams.subWordBits / cacheParams.eccBits).map(_.asUInt).toSeq } (io.resp zip rdata.transpose).foreach { case (resp, data) => resp := data.asUInt } } -class DCacheMetadataReq(implicit p: Parameters) extends L1HellaCacheBundle()(p) { +class DCacheMetadataReq(cacheParams: DCacheParams) { val write = Bool() - val addr = UInt(vaddrBitsExtended.W) - val idx = UInt(idxBits.W) - val way_en = UInt(nWays.W) - val data = UInt(cacheParams.tagCode.width(new L1Metadata().getWidth).W) + val addr = UInt(cacheParams.vaddrBitsExtended.W) + val idx = UInt(cacheParams.idxBits.W) + val way_en = UInt(cacheParams.nWays.W) + val data = UInt(cacheParams.tagCode.width(new L1Metadata(cacheParams.tagBits).getWidth).W) } -class DCache(staticIdForMetadataUseOnly: Int, val crossing: ClockCrossingType)(implicit p: Parameters) extends HellaCache(staticIdForMetadataUseOnly)(p) { - override lazy val module = new DCacheModule(this) +class DCache(staticIdForMetadataUseOnly: Int, val crossing: ClockCrossingType, cacheParams: DCacheParams, haveCFlush: Boolean, usingAtomicsInCache: Boolean) extends HellaCache(staticIdForMetadataUseOnly, cacheParams) { + override lazy val module = new DCacheModule(this, cacheParams, haveCFlush, usingAtomicsInCache) } -class DCacheTLBPort(implicit p: Parameters) extends CoreBundle()(p) { - val req = Flipped(Decoupled(new TLBReq(coreDataBytes.log2))) - val s1_resp = Output(new TLBResp) +class DCacheTLBPort(cacheParams: DCacheParams) extends Bundle { + val req = Flipped(Decoupled(new TLBReq(cacheParams.coreDataBytes.log2, cacheParams.vaddrBitsExtended))) + val s1_resp = Output(new TLBResp(cacheParams.paddrBits, cacheParams.vaddrBitsExtended)) val s2_kill = Input(Bool()) } -class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { - val tlb_port = IO(new DCacheTLBPort) +class DCacheModule(outer: DCache, cacheParams: DCacheParams, haveCFlush: Boolean, usingAtomicsInCache: Boolean) extends HellaCacheModule(outer) { + val tlb_port = IO(new DCacheTLBPort(cacheParams)) val tECC = cacheParams.tagCode val dECC = cacheParams.dataCode - require(subWordBits % eccBits == 0, "subWordBits must be a multiple of eccBits") - require(eccBytes == 1 || !dECC.isInstanceOf[IdentityCode]) + require(cacheParams.subWordBits % cacheParams.eccBits == 0, "subWordBits must be a multiple of eccBits") + require(cacheParams.eccBytes == 1 || !dECC.isInstanceOf[IdentityCode]) require(cacheParams.silentDrop || cacheParams.acquireBeforeRelease, "!silentDrop requires acquireBeforeRelease") - val usingRMW = eccBytes > 1 || usingAtomicsInCache + val usingRMW = cacheParams.eccBytes > 1 || usingAtomicsInCache val mmioOffset = outer.firstMMIO edge.manager.requireFifo(TLFIFOFixer.allVolatile) // TileLink pipelining MMIO requests @@ -110,23 +102,23 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { else ClockGate(clock, clock_en_reg, "dcache_clock_gate") class DCacheModuleImpl { // entering gated-clock domain - val tlb = Module(new TLB(false, log2Ceil(coreDataBytes), TLBConfig(nTLBSets, nTLBWays, cacheParams.nTLBBasePageSectors, cacheParams.nTLBSuperpages))) - val pma_checker = Module(new TLB(false, log2Ceil(coreDataBytes), TLBConfig(nTLBSets, nTLBWays, cacheParams.nTLBBasePageSectors, cacheParams.nTLBSuperpages)) with InlineInstance) + val tlb = Module(new TLB(false, log2Ceil(cacheParams.coreDataBytes), TLBConfig(cacheParams.nTLBSets, cacheParams.nTLBWays, cacheParams.nTLBBasePageSectors, cacheParams.nTLBSuperpages))) + val pma_checker = Module(new TLB(false, log2Ceil(cacheParams.coreDataBytes), TLBConfig(cacheParams.nTLBSets, cacheParams.nTLBWays, cacheParams.nTLBBasePageSectors, cacheParams.nTLBSuperpages)) with InlineInstance) // tags - val replacer = ReplacementPolicy.fromString(cacheParams.replacementPolicy, nWays) - val metaArb = Module(new Arbiter(new DCacheMetadataReq, 8) with InlineInstance) + val replacer = ReplacementPolicy.fromString(cacheParams.replacementPolicy, cacheParams.nWays) + val metaArb = Module(new Arbiter(new DCacheMetadataReq(cacheParams), 8) with InlineInstance) val tag_array = DescribedSRAM( name = "tag_array", desc = "DCache Tag Array", - size = nSets, - data = Vec(nWays, chiselTypeOf(metaArb.io.out.bits.data)) + size = cacheParams.nSets, + data = Vec(cacheParams.nWays, chiselTypeOf(metaArb.io.out.bits.data)) ) // data - val data = Module(new DCacheDataArray) - val dataArb = Module(new Arbiter(new DCacheDataReq, 4) with InlineInstance) + val data = Module(new DCacheDataArray(cacheParams)) + val dataArb = Module(new Arbiter(new DCacheDataReq(cacheParams), 4) with InlineInstance) dataArb.io.in.tail.foreach(_.bits.wdata := dataArb.io.in.head.bits.wdata) // tie off write ports by default data.io.req.bits <> dataArb.io.out.bits data.io.req.valid := dataArb.io.out.valid @@ -137,8 +129,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { tl_out.a <> { val a_queue_depth = outer.crossing match { case RationalCrossing(_) => // TODO make this depend on the actual ratio? - if (cacheParams.separateUncachedResp) (maxUncachedInFlight + 1) / 2 - else 2 min maxUncachedInFlight-1 + if (cacheParams.separateUncachedResp) (cacheParams.maxUncachedInFlight + 1) / 2 + else 2 min cacheParams.maxUncachedInFlight-1 case SynchronousCrossing(BufferParams.none) => 1 // Need some buffering to guarantee livelock freedom case SynchronousCrossing(_) => 0 // Adequate buffering within the crossing case _: AsynchronousCrossing => 0 // Adequate buffering within the crossing @@ -149,7 +141,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { val (tl_out_c, release_queue_empty) = if (cacheParams.acquireBeforeRelease) { - val q = Module(new Queue(chiselTypeOf(tl_out.c.bits), cacheDataBeats, flow = true)) + val q = Module(new Queue(chiselTypeOf(tl_out.c.bits), cacheParams.cacheDataBeats, flow = true)) tl_out.c <> q.io.deq (q.io.enq, q.io.count === 0.U) } else { @@ -167,8 +159,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { val s0_clk_en = metaArb.io.out.valid && !metaArb.io.out.bits.write val s0_req = WireInit(io.cpu.req.bits) - s0_req.addr := Cat(metaArb.io.out.bits.addr >> blockOffBits, io.cpu.req.bits.addr(blockOffBits-1,0)) - s0_req.idx.foreach(_ := Cat(metaArb.io.out.bits.idx, s0_req.addr(blockOffBits-1, 0))) + s0_req.addr := Cat(metaArb.io.out.bits.addr >> cacheParams.blockOffBits, io.cpu.req.bits.addr(cacheParams.blockOffBits-1,0)) + s0_req.idx.foreach(_ := Cat(metaArb.io.out.bits.idx, s0_req.addr(cacheParams.blockOffBits-1, 0))) when (!metaArb.io.in(7).ready) { s0_req.phys := true.B } val s1_req = RegEnable(s0_req, s0_clk_en) val s1_vaddr = Cat(s1_req.idx.getOrElse(s1_req.addr) >> tagLSB, s1_req.addr(tagLSB-1, 0)) @@ -193,15 +185,15 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { val s1_waw_hazard = Wire(Bool()) val s_ready :: s_voluntary_writeback :: s_probe_rep_dirty :: s_probe_rep_clean :: s_probe_retry :: s_probe_rep_miss :: s_voluntary_write_meta :: s_probe_write_meta :: s_dummy :: s_voluntary_release :: Nil = Enum(10) - val supports_flush = outer.flushOnFenceI || coreParams.haveCFlush + val supports_flush = outer.flushOnFenceI || haveCFlush val flushed = RegInit(true.B) val flushing = RegInit(false.B) val flushing_req = Reg(chiselTypeOf(s1_req)) val cached_grant_wait = RegInit(false.B) val resetting = RegInit(false.B) - val flushCounter = RegInit((nSets * (nWays-1)).U(log2Ceil(nSets * nWays).W)) + val flushCounter = RegInit((cacheParams.nSets * (cacheParams.nWays-1)).U(log2Ceil(cacheParams.nSets * cacheParams.nWays).W)) val release_ack_wait = RegInit(false.B) - val release_ack_addr = Reg(UInt(paddrBits.W)) + val release_ack_addr = Reg(UInt(cacheParams.paddrBits.W)) val release_state = RegInit(s_ready) val refill_way = Reg(UInt()) val any_pstore_valid = Wire(Bool()) @@ -210,9 +202,9 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { io.cpu.req.ready := (release_state === s_ready) && !cached_grant_wait && !s1_nack // I/O MSHRs - val uncachedInFlight = RegInit(VecInit(Seq.fill(maxUncachedInFlight)(false.B))) - val uncachedReqs = Reg(Vec(maxUncachedInFlight, new HellaCacheReq)) - val uncachedResp = WireInit(new HellaCacheReq, DontCare) + val uncachedInFlight = RegInit(VecInit(Seq.fill(cacheParams.maxUncachedInFlight)(false.B))) + val uncachedReqs = Reg(Vec(cacheParams.maxUncachedInFlight, new HellaCacheReq(cacheParams))) + val uncachedResp = WireInit(new HellaCacheReq(cacheParams), DontCare) // hit initiation path val s0_read = isRead(io.cpu.req.bits.cmd) @@ -221,23 +213,23 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { dataArb.io.in(3).bits.write := false.B dataArb.io.in(3).bits.addr := Cat(io.cpu.req.bits.idx.getOrElse(io.cpu.req.bits.addr) >> tagLSB, io.cpu.req.bits.addr(tagLSB-1, 0)) dataArb.io.in(3).bits.wordMask := { - val mask = (subWordBytes.log2 until rowOffBits).foldLeft(1.U) { case (in, i) => - val upper_mask = Mux((i >= wordBytes.log2).B || io.cpu.req.bits.size <= i.U, 0.U, - ((BigInt(1) << (1 << (i - subWordBytes.log2)))-1).U) + val mask = (cacheParams.subWordBytes.log2 until cacheParams.rowOffBits).foldLeft(1.U) { case (in, i) => + val upper_mask = Mux((i >= cacheParams.wordBytes.log2).B || io.cpu.req.bits.size <= i.U, 0.U, + ((BigInt(1) << (1 << (i - cacheParams.subWordBytes.log2)))-1).U) val upper = Mux(io.cpu.req.bits.addr(i), in, 0.U) | upper_mask val lower = Mux(io.cpu.req.bits.addr(i), 0.U, in) upper ## lower } - Fill(subWordBytes / eccBytes, mask) + Fill(cacheParams.subWordBytes / cacheParams.eccBytes, mask) } - dataArb.io.in(3).bits.eccMask := ~0.U((wordBytes / eccBytes).W) - dataArb.io.in(3).bits.way_en := ~0.U(nWays.W) + dataArb.io.in(3).bits.eccMask := ~0.U((cacheParams.wordBytes / cacheParams.eccBytes).W) + dataArb.io.in(3).bits.way_en := ~0.U(cacheParams.nWays.W) when (!dataArb.io.in(3).ready && s0_read) { io.cpu.req.ready := false.B } val s1_did_read = RegEnable(dataArb.io.in(3).ready && (io.cpu.req.valid && needsRead(io.cpu.req.bits)), s0_clk_en) val s1_read_mask = RegEnable(dataArb.io.in(3).bits.wordMask, s0_clk_en) metaArb.io.in(7).valid := io.cpu.req.valid metaArb.io.in(7).bits.write := false.B - metaArb.io.in(7).bits.idx := dataArb.io.in(3).bits.addr(idxMSB, idxLSB) + metaArb.io.in(7).bits.idx := dataArb.io.in(3).bits.addr(cacheParams.idxMSB, cacheParams.idxLSB) metaArb.io.in(7).bits.addr := io.cpu.req.bits.addr metaArb.io.in(7).bits.way_en := metaArb.io.in(4).bits.way_en metaArb.io.in(7).bits.data := metaArb.io.in(4).bits.data @@ -272,12 +264,12 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { pma_checker.io.req.bits.prv := s1_req.dprv pma_checker.io.req.bits.v := s1_req.dv - val s1_paddr = Cat(Mux(s1_tlb_req_valid, s1_req.addr(paddrBits-1, pgIdxBits), tlb.io.resp.paddr >> pgIdxBits), s1_req.addr(pgIdxBits-1, 0)) + val s1_paddr = Cat(Mux(s1_tlb_req_valid, s1_req.addr(cacheParams.paddrBits-1, cacheParams.pgIdxBits), tlb.io.resp.paddr >> cacheParams.pgIdxBits), s1_req.addr(cacheParams.pgIdxBits-1, 0)) val s1_victim_way = Wire(UInt()) val (s1_hit_way, s1_hit_state, s1_meta) = - if (usingDataScratchpad) { + if (cacheParams.usingDataScratchpad) { val baseAddr = p(LookupByHartId)(_.dcache.flatMap(_.scratch.map(_.U)), io_hartid.get) | io_mmio_address_prefix.get - val inScratchpad = s1_paddr >= baseAddr && s1_paddr < baseAddr + (nSets * cacheBlockBytes).U + val inScratchpad = s1_paddr >= baseAddr && s1_paddr < baseAddr + (cacheParams.nSets * cacheParams.cacheBlockBytes).U val hitState = Mux(inScratchpad, ClientMetadata.maximum, ClientMetadata.onReset) val dummyMeta = L1Metadata(0.U, ClientMetadata.onReset) (inScratchpad, hitState, Seq(tECC.encode(dummyMeta.asUInt))) @@ -285,11 +277,11 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { val metaReq = metaArb.io.out val metaIdx = metaReq.bits.idx when (metaReq.valid && metaReq.bits.write) { - val wmask = if (nWays == 1) Seq(true.B) else metaReq.bits.way_en.asBools - tag_array.write(metaIdx, VecInit(Seq.fill(nWays)(metaReq.bits.data)), wmask) + val wmask = if (cacheParams.nWays == 1) Seq(true.B) else metaReq.bits.way_en.asBools + tag_array.write(metaIdx, VecInit(Seq.fill(cacheParams.nWays)(metaReq.bits.data)), wmask) } val s1_meta = tag_array.read(metaIdx, metaReq.valid && !metaReq.bits.write) - val s1_meta_uncorrected = s1_meta.map(tECC.decode(_).uncorrected.asTypeOf(new L1Metadata)) + val s1_meta_uncorrected = s1_meta.map(tECC.decode(_).uncorrected.asTypeOf(new L1Metadata(cacheParams.tagBits))) val s1_tag = s1_paddr >> tagLSB val s1_meta_hit_way = s1_meta_uncorrected.map(r => r.coh.isValid() && r.tag === s1_tag).asUInt val s1_meta_hit_state = ( @@ -297,10 +289,10 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { .reduce (_|_)).asTypeOf(chiselTypeOf(ClientMetadata.onReset)) (s1_meta_hit_way, s1_meta_hit_state, s1_meta) } - val s1_data_way = WireDefault(if (nWays == 1) 1.U else Mux(inWriteback, releaseWay, s1_hit_way)) + val s1_data_way = WireDefault(if (cacheParams.nWays == 1) 1.U else Mux(inWriteback, releaseWay, s1_hit_way)) val tl_d_data_encoded = Wire(chiselTypeOf(encodeData(tl_out.d.bits.data, false.B))) val s1_all_data_ways = VecInit(data.io.resp ++ (!cacheParams.separateUncachedResp).option(tl_d_data_encoded)) - val s1_mask_xwr = new StoreGen(s1_req.size, s1_req.addr, 0.U, wordBytes).mask + val s1_mask_xwr = new StoreGen(s1_req.size, s1_req.addr, 0.U, cacheParams.wordBytes).mask val s1_mask = Mux(s1_req.cmd === M_PWR, io.cpu.s1_data.mask, s1_mask_xwr) // for partial writes, s1_data.mask must be a subset of s1_mask_xwr assert(!(s1_valid_masked && s1_req.cmd === M_PWR) || (s1_mask_xwr | ~io.cpu.s1_data.mask).andR) @@ -335,23 +327,23 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { val s2_meta_correctable_errors = s1_meta_decoded.map(m => RegEnable(m.correctable, s1_meta_clk_en)).asUInt val s2_meta_uncorrectable_errors = s1_meta_decoded.map(m => RegEnable(m.uncorrectable, s1_meta_clk_en)).asUInt val s2_meta_error_uncorrectable = s2_meta_uncorrectable_errors.orR - val s2_meta_corrected = s1_meta_decoded.map(m => RegEnable(m.corrected, s1_meta_clk_en).asTypeOf(new L1Metadata)) + val s2_meta_corrected = s1_meta_decoded.map(m => RegEnable(m.corrected, s1_meta_clk_en).asTypeOf(new L1Metadata(cacheParams.tagBits))) val s2_meta_error = (s2_meta_uncorrectable_errors | s2_meta_correctable_errors).orR val s2_flush_valid = s2_flush_valid_pre_tag_ecc && !s2_meta_error val s2_data = { - val wordsPerRow = rowBits / subWordBits + val wordsPerRow = cacheParams.rowBits / cacheParams.subWordBits val en = s1_valid || inWriteback || io.cpu.replay_next val word_en = Mux(inWriteback, Fill(wordsPerRow, 1.U), Mux(s1_did_read, s1_read_mask, 0.U)) - val s1_way_words = s1_all_data_ways.map(_.grouped(dECC.width(eccBits) * (subWordBits / eccBits))) + val s1_way_words = s1_all_data_ways.map(_.grouped(dECC.width(cacheParams.eccBits) * (cacheParams.subWordBits / cacheParams.eccBits))) if (cacheParams.pipelineWayMux) { val s1_word_en = Mux(io.cpu.replay_next, 0.U, word_en) (for (i <- 0 until wordsPerRow) yield { val s2_way_en = RegEnable(Mux(s1_word_en(i), s1_data_way, 0.U), en) - val s2_way_words = (0 until nWays).map(j => RegEnable(s1_way_words(j)(i), en && word_en(i))) - (0 until nWays).map(j => Mux(s2_way_en(j), s2_way_words(j), 0.U)).reduce(_|_) + val s2_way_words = (0 until cacheParams.nWays).map(j => RegEnable(s1_way_words(j)(i), en && word_en(i))) + (0 until cacheParams.nWays).map(j => Mux(s2_way_en(j), s2_way_words(j), 0.U)).reduce(_|_) }).asUInt } else { - val s1_word_en = Mux(!io.cpu.replay_next, word_en, UIntToOH(uncachedResp.addr.extract(log2Up(rowBits/8)-1, log2Up(wordBytes)), wordsPerRow)) + val s1_word_en = Mux(!io.cpu.replay_next, word_en, UIntToOH(uncachedResp.addr.extract(log2Up(cacheParams.rowBits/8)-1, log2Up(cacheParams.wordBytes)), wordsPerRow)) (for (i <- 0 until wordsPerRow) yield { RegEnable(Mux1H(Mux(s1_word_en(i), s1_data_way, 0.U), s1_way_words.map(_(i))), en) }).asUInt @@ -366,13 +358,13 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { val s2_hit_valid = s2_hit_state.isValid() val (s2_hit, s2_grow_param, s2_new_hit_state) = s2_hit_state.onAccess(s2_req.cmd) val s2_data_decoded = decodeData(s2_data) - val s2_word_idx = s2_req.addr.extract(log2Up(rowBits/8)-1, log2Up(wordBytes)) + val s2_word_idx = s2_req.addr.extract(log2Up(cacheParams.rowBits/8)-1, log2Up(cacheParams.wordBytes)) val s2_data_error = s2_data_decoded.map(_.error).orR val s2_data_error_uncorrectable = s2_data_decoded.map(_.uncorrectable).orR val s2_data_corrected = (s2_data_decoded.map(_.corrected): Seq[UInt]).asUInt val s2_data_uncorrected = (s2_data_decoded.map(_.uncorrected): Seq[UInt]).asUInt val s2_valid_hit_maybe_flush_pre_data_ecc_and_waw = s2_valid_masked && !s2_meta_error && s2_hit - val s2_no_alloc_hazard = if (!usingVM || pgIdxBits >= untagBits) false.B else { + val s2_no_alloc_hazard = if (!cacheParams.usingVM || cacheParams.pgIdxBits >= cacheParams.untagBits) false.B else { // make sure that any in-flight non-allocating accesses are ordered before // any allocating accesses. this can only happen if aliasing is possible. val any_no_alloc_in_flight = Reg(Bool()) @@ -382,8 +374,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { val concerns = (uncachedInFlight zip uncachedReqs) :+ (s2_valid && s2_req.no_alloc, s2_req) val s1_uncached_hits = concerns.map { c => - val concern_wmask = new StoreGen(c._2.size, c._2.addr, 0.U, wordBytes).mask - val addr_match = (c._2.addr ^ s1_paddr)(pgIdxBits+pgLevelBits-1, wordBytes.log2) === 0.U + val concern_wmask = new StoreGen(c._2.size, c._2.addr, 0.U, cacheParams.wordBytes).mask + val addr_match = (c._2.addr ^ s1_paddr)(cacheParams.pgIdxBits+pgLevelBits-1, cacheParams.wordBytes.log2) === 0.U val mask_match = (concern_wmask & s1_mask_xwr).orR || c._2.cmd === M_PWR || s1_req.cmd === M_PWR val cmd_match = isWrite(c._2.cmd) || isWrite(s1_req.cmd) c._1 && s1_need_check && cmd_match && addr_match && mask_match @@ -401,13 +393,13 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { val s2_uncached = !s2_pma.cacheable || s2_req.no_alloc && !s2_pma.must_alloc && !s2_hit_valid val s2_valid_cached_miss = s2_valid_miss && !s2_uncached && !uncachedInFlight.asUInt.orR dontTouch(s2_valid_cached_miss) - val s2_want_victimize = (!usingDataScratchpad).B && (s2_valid_cached_miss || s2_valid_flush_line || s2_valid_data_error || s2_flush_valid) + val s2_want_victimize = (!cacheParams.usingDataScratchpad).B && (s2_valid_cached_miss || s2_valid_flush_line || s2_valid_data_error || s2_flush_valid) val s2_cannot_victimize = !s2_flush_valid && io.cpu.s2_kill val s2_victimize = s2_want_victimize && !s2_cannot_victimize val s2_valid_uncached_pending = s2_valid_miss && s2_uncached && !uncachedInFlight.asUInt.andR val s2_victim_way = UIntToOH(RegEnable(s1_victim_way, s1_valid_not_nacked || s1_flush_valid)) val s2_victim_or_hit_way = Mux(s2_hit_valid, s2_hit_way, s2_victim_way) - val s2_victim_tag = Mux(s2_valid_data_error || s2_valid_flush_line, s2_req.addr(paddrBits-1, tagLSB), Mux1H(s2_victim_way, s2_meta_corrected).tag) + val s2_victim_tag = Mux(s2_valid_data_error || s2_valid_flush_line, s2_req.addr(cacheParams.paddrBits-1, tagLSB), Mux1H(s2_victim_way, s2_meta_corrected).tag) val s2_victim_state = Mux(s2_hit_valid, s2_hit_state, Mux1H(s2_victim_way, s2_meta_corrected).coh) val (s2_prb_ack_data, s2_report_param, probeNewCoh)= s2_probe_state.onProbe(probe_bits.param) @@ -427,8 +419,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { metaArb.io.in(1).valid := s2_meta_error && (s2_valid_masked || s2_flush_valid_pre_tag_ecc || s2_probe) metaArb.io.in(1).bits.write := true.B metaArb.io.in(1).bits.way_en := s2_meta_uncorrectable_errors | Mux(s2_meta_error_uncorrectable, 0.U, PriorityEncoderOH(s2_meta_correctable_errors)) - metaArb.io.in(1).bits.idx := Mux(s2_probe, probeIdx(probe_bits), s2_vaddr(idxMSB, idxLSB)) - metaArb.io.in(1).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, metaArb.io.in(1).bits.idx << blockOffBits) + metaArb.io.in(1).bits.idx := Mux(s2_probe, probeIdx(probe_bits), s2_vaddr(cacheParams.idxMSB, cacheParams.idxLSB)) + metaArb.io.in(1).bits.addr := Cat(io.cpu.req.bits.addr >> cacheParams.untagBits, metaArb.io.in(1).bits.idx << cacheParams.blockOffBits) metaArb.io.in(1).bits.data := tECC.encode { val new_meta = WireDefault(s2_first_meta_corrected) when (s2_meta_error_uncorrectable) { new_meta.coh := ClientMetadata.onReset } @@ -439,29 +431,29 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { metaArb.io.in(2).valid := s2_valid_hit_pre_data_ecc_and_waw && s2_update_meta metaArb.io.in(2).bits.write := !io.cpu.s2_kill metaArb.io.in(2).bits.way_en := s2_victim_or_hit_way - metaArb.io.in(2).bits.idx := s2_vaddr(idxMSB, idxLSB) - metaArb.io.in(2).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, s2_vaddr(idxMSB, 0)) + metaArb.io.in(2).bits.idx := s2_vaddr(cacheParams.idxMSB, cacheParams.idxLSB) + metaArb.io.in(2).bits.addr := Cat(io.cpu.req.bits.addr >> cacheParams.untagBits, s2_vaddr(cacheParams.idxMSB, 0)) metaArb.io.in(2).bits.data := tECC.encode(L1Metadata(s2_req.addr >> tagLSB, s2_new_hit_state).asUInt) // load reservations and TL error reporting - val s2_lr = (usingAtomics && !usingDataScratchpad).B && s2_req.cmd === M_XLR - val s2_sc = (usingAtomics && !usingDataScratchpad).B && s2_req.cmd === M_XSC + val s2_lr = (cacheParams.usingAtomics && !cacheParams.usingDataScratchpad).B && s2_req.cmd === M_XLR + val s2_sc = (cacheParams.usingAtomics && !cacheParams.usingDataScratchpad).B && s2_req.cmd === M_XSC val lrscCount = RegInit(0.U) - val lrscValid = lrscCount > lrscBackoff.U + val lrscValid = lrscCount > cacheParams.lrscBackoff.U val lrscBackingOff = lrscCount > 0.U && !lrscValid val lrscAddr = Reg(UInt()) - val lrscAddrMatch = lrscAddr === (s2_req.addr >> blockOffBits) + val lrscAddrMatch = lrscAddr === (s2_req.addr >> cacheParams.blockOffBits) val s2_sc_fail = s2_sc && !(lrscValid && lrscAddrMatch) when ((s2_valid_hit && s2_lr && !cached_grant_wait || s2_valid_cached_miss) && !io.cpu.s2_kill) { - lrscCount := Mux(s2_hit, (lrscCycles - 1).U, 0.U) - lrscAddr := s2_req.addr >> blockOffBits + lrscCount := Mux(s2_hit, (cacheParams.lrscCycles - 1).U, 0.U) + lrscAddr := s2_req.addr >> cacheParams.blockOffBits } when (lrscCount > 0.U) { lrscCount := lrscCount - 1.U } - when (s2_valid_not_killed && lrscValid) { lrscCount := lrscBackoff.U } + when (s2_valid_not_killed && lrscValid) { lrscCount := cacheParams.lrscBackoff.U } when (s1_probe) { lrscCount := 0.U } // don't perform data correction if it might clobber a recent store - val s2_correct = s2_data_error && !any_pstore_valid && !RegNext(any_pstore_valid || s2_valid) && usingDataScratchpad.B + val s2_correct = s2_data_error && !any_pstore_valid && !RegNext(any_pstore_valid || s2_valid) && cacheParams.usingDataScratchpad.B // pending store buffer val s2_valid_correct = s2_valid_hit_pre_data_ecc_and_waw && s2_correct && !io.cpu.s2_kill def s2_store_valid_pre_kill = s2_valid_hit && s2_write && !s2_sc_fail @@ -501,23 +493,23 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { val pstore2_addr = RegEnable(Mux(s2_correct, s2_vaddr, pstore1_addr), advance_pstore1) val pstore2_way = RegEnable(Mux(s2_correct, s2_hit_way, pstore1_way), advance_pstore1) val pstore2_storegen_data = { - for (i <- 0 until wordBytes) + for (i <- 0 until cacheParams.wordBytes) yield RegEnable(pstore1_storegen_data(8*(i+1)-1, 8*i), advance_pstore1 || pstore1_merge && pstore1_mask(i)) }.asUInt val pstore2_storegen_mask = { - val mask = Reg(UInt(wordBytes.W)) + val mask = Reg(UInt(cacheParams.wordBytes.W)) when (advance_pstore1 || pstore1_merge) { val mergedMask = pstore1_mask | Mux(pstore1_merge, mask, 0.U) mask := ~Mux(s2_correct, 0.U, ~mergedMask) } mask } - s2_store_merge := (if (eccBytes == 1) false.B else { + s2_store_merge := (if (cacheParams.eccBytes == 1) false.B else { ccover(pstore1_merge, "STORE_MERGED", "D$ store merged") // only merge stores to ECC granules that are already stored-to, to avoid // WAW hazards val wordMatch = (eccMask(pstore2_storegen_mask) | ~eccMask(pstore1_mask)).andR - val idxMatch = s2_vaddr(untagBits-1, log2Ceil(wordBytes)) === pstore2_addr(untagBits-1, log2Ceil(wordBytes)) + val idxMatch = s2_vaddr(cacheParams.untagBits-1, log2Ceil(cacheParams.wordBytes)) === pstore2_addr(cacheParams.untagBits-1, log2Ceil(cacheParams.wordBytes)) val tagMatch = (s2_hit_way & pstore2_way).orR pstore2_valid && wordMatch && idxMatch && tagMatch }) @@ -525,23 +517,23 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { dataArb.io.in(0).bits.write := pstore_drain dataArb.io.in(0).bits.addr := Mux(pstore2_valid, pstore2_addr, pstore1_addr) dataArb.io.in(0).bits.way_en := Mux(pstore2_valid, pstore2_way, pstore1_way) - dataArb.io.in(0).bits.wdata := encodeData(Fill(rowWords, Mux(pstore2_valid, pstore2_storegen_data, pstore1_data)), false.B) + dataArb.io.in(0).bits.wdata := encodeData(Fill(cacheParams.rowWords, Mux(pstore2_valid, pstore2_storegen_data, pstore1_data)), false.B) dataArb.io.in(0).bits.wordMask := { - val eccMask = dataArb.io.in(0).bits.eccMask.asBools.grouped(subWordBytes/eccBytes).map(_.orR).toSeq.asUInt - val wordMask = UIntToOH(Mux(pstore2_valid, pstore2_addr, pstore1_addr).extract(rowOffBits-1, wordBytes.log2)) - FillInterleaved(wordBytes/subWordBytes, wordMask) & Fill(rowBytes/wordBytes, eccMask) + val eccMask = dataArb.io.in(0).bits.eccMask.asBools.grouped(cacheParams.subWordBytes/cacheParams.eccBytes).map(_.orR).toSeq.asUInt + val wordMask = UIntToOH(Mux(pstore2_valid, pstore2_addr, pstore1_addr).extract(cacheParams.rowOffBits-1, cacheParams.wordBytes.log2)) + FillInterleaved(cacheParams.wordBytes/cacheParams.subWordBytes, wordMask) & Fill(cacheParams.rowBytes/cacheParams.wordBytes, eccMask) } dataArb.io.in(0).bits.eccMask := eccMask(Mux(pstore2_valid, pstore2_storegen_mask, pstore1_mask)) // store->load RAW hazard detection def s1Depends(addr: UInt, mask: UInt) = - addr(idxMSB, wordOffBits) === s1_vaddr(idxMSB, wordOffBits) && + addr(cacheParams.idxMSB, cacheParams.wordOffBits) === s1_vaddr(cacheParams.idxMSB, cacheParams.wordOffBits) && Mux(s1_write, (eccByteMask(mask) & eccByteMask(s1_mask_xwr)).orR, (mask & s1_mask_xwr).orR) val s1_hazard = (pstore1_valid_likely && s1Depends(pstore1_addr, pstore1_mask)) || (pstore2_valid && s1Depends(pstore2_addr, pstore2_storegen_mask)) val s1_raw_hazard = s1_read && s1_hazard - s1_waw_hazard := (if (eccBytes == 1) false.B else { + s1_waw_hazard := (if (cacheParams.eccBytes == 1) false.B else { ccover(s1_valid_not_nacked && s1_waw_hazard, "WAW_HAZARD", "D$ write-after-write hazard") s1_write && (s1_hazard || needsRead(s1_req) && !s1_did_read) }) @@ -552,11 +544,11 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { // Prepare a TileLink request message that initiates a transaction val a_source = PriorityEncoder(~uncachedInFlight.asUInt << mmioOffset) // skip the MSHR - val acquire_address = (s2_req.addr >> idxLSB) << idxLSB + val acquire_address = (s2_req.addr >> cacheParams.idxLSB) << cacheParams.idxLSB val access_address = s2_req.addr val a_size = s2_req.size - val a_data = Fill(beatWords, pstore1_data) - val a_mask = pstore1_mask << (access_address.extract(beatBytes.log2-1, wordBytes.log2) << 3) + val a_data = Fill(cacheParams.beatWords, pstore1_data) + val a_mask = pstore1_mask << (access_address.extract(cacheParams.beatBytes.log2-1, cacheParams.wordBytes.log2) << 3) val get = edge.Get(a_source, access_address, a_size)._2 val put = edge.Put(a_source, access_address, a_size, a_data)._2 val putpartial = edge.Put(a_source, access_address, a_size, a_data, a_mask)._2 @@ -580,7 +572,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { tl_out_a.valid := !io.cpu.s2_kill && (s2_valid_uncached_pending || (s2_valid_cached_miss && - !(release_ack_wait && (s2_req.addr ^ release_ack_addr)(((pgIdxBits + pgLevelBits) min paddrBits) - 1, idxLSB) === 0.U) && + !(release_ack_wait && (s2_req.addr ^ release_ack_addr)(((cacheParams.pgIdxBits + cacheParams.pgLevelBits) min cacheParams.paddrBits) - 1, cacheParams.idxLSB) === 0.U) && (cacheParams.acquireBeforeRelease.B && !release_ack_wait && release_queue_empty || !s2_victim_dirty))) tl_out_a.bits := Mux(!s2_uncached, acquire(s2_vaddr, s2_req.addr, s2_grow_param), Mux(!s2_write, get, @@ -627,7 +619,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { val uncachedGrantOpcodesWithData = Seq(AccessAckData) val uncachedGrantOpcodes = uncachedGrantOpcodesWithData ++ uncachedGrantOpcodesSansData val whole_opc = tl_out.d.bits.opcode - if (usingDataScratchpad) { + if (cacheParams.usingDataScratchpad) { assert(!tl_out.d.valid || whole_opc.isOneOf(uncachedGrantOpcodes)) // the only valid TL-D messages are uncached, so we can do some pruning val opc = whole_opc(uncachedGrantOpcodes.map(_.getWidth).max - 1, 0) @@ -646,7 +638,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { when (blockProbeAfterGrantCount > 0.U) { blockProbeAfterGrantCount := blockProbeAfterGrantCount - 1.U } val canAcceptCachedGrant = !release_state.isOneOf(s_voluntary_writeback, s_voluntary_write_meta, s_voluntary_release) tl_out.d.ready := Mux(grantIsCached, (!d_first || tl_out.e.ready) && canAcceptCachedGrant, true.B) - val uncachedRespIdxOH = UIntToOH(tl_out.d.bits.source, maxUncachedInFlight+mmioOffset) >> mmioOffset + val uncachedRespIdxOH = UIntToOH(tl_out.d.bits.source, cacheParams.maxUncachedInFlight+mmioOffset) >> mmioOffset uncachedResp := Mux1H(uncachedRespIdxOH, uncachedReqs) when (tl_out.d.fire()) { when (grantIsCached) { @@ -674,9 +666,9 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { s2_req.signed := uncachedResp.signed s2_req.tag := uncachedResp.tag s2_req.addr := { - require(rowOffBits >= beatOffBits) - val dontCareBits = s1_paddr >> rowOffBits << rowOffBits - dontCareBits | uncachedResp.addr(beatOffBits-1, 0) + require(cacheParams.rowOffBits >= cacheParams.beatOffBits) + val dontCareBits = s1_paddr >> cacheParams.rowOffBits << cacheParams.rowOffBits + dontCareBits | uncachedResp.addr(cacheParams.beatOffBits-1, 0) } s2_uncached_resp_addr := uncachedResp.addr } @@ -700,13 +692,13 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { tl_out.e.valid := false.B tl_out.d.ready := false.B } - if (!usingDataScratchpad) { + if (!cacheParams.usingDataScratchpad) { dataArb.io.in(1).bits.write := true.B - dataArb.io.in(1).bits.addr := (s2_vaddr >> idxLSB) << idxLSB | d_address_inc + dataArb.io.in(1).bits.addr := (s2_vaddr >> cacheParams.idxLSB) << cacheParams.idxLSB | d_address_inc dataArb.io.in(1).bits.way_en := refill_way dataArb.io.in(1).bits.wdata := tl_d_data_encoded - dataArb.io.in(1).bits.wordMask := ~0.U((rowBytes / subWordBytes).W) - dataArb.io.in(1).bits.eccMask := ~0.U((wordBytes / eccBytes).W) + dataArb.io.in(1).bits.wordMask := ~0.U((cacheParams.rowBytes / cacheParams.subWordBytes).W) + dataArb.io.in(1).bits.eccMask := ~0.U((cacheParams.wordBytes / cacheParams.eccBytes).W) } else { dataArb.io.in(1).bits := dataArb.io.in(0).bits } @@ -718,8 +710,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { metaArb.io.in(3).valid := grantIsCached && d_done && !tl_out.d.bits.denied metaArb.io.in(3).bits.write := true.B metaArb.io.in(3).bits.way_en := refill_way - metaArb.io.in(3).bits.idx := s2_vaddr(idxMSB, idxLSB) - metaArb.io.in(3).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, s2_vaddr(idxMSB, 0)) + metaArb.io.in(3).bits.idx := s2_vaddr(cacheParams.idxMSB, cacheParams.idxLSB) + metaArb.io.in(3).bits.addr := Cat(io.cpu.req.bits.addr >> cacheParams.untagBits, s2_vaddr(idxMSB, 0)) metaArb.io.in(3).bits.data := tECC.encode(L1Metadata(s2_req.addr >> tagLSB, s2_hit_state.onGrant(s2_req.cmd, tl_out.d.bits.param)).asUInt) if (!cacheParams.separateUncachedResp) { @@ -741,28 +733,28 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { // Handle an incoming TileLink Probe message val block_probe_for_core_progress = blockProbeAfterGrantCount > 0.U || lrscValid - val block_probe_for_pending_release_ack = release_ack_wait && (tl_out.b.bits.address ^ release_ack_addr)(((pgIdxBits + pgLevelBits) min paddrBits) - 1, idxLSB) === 0.U + val block_probe_for_pending_release_ack = release_ack_wait && (tl_out.b.bits.address ^ release_ack_addr)(((cacheParams.pgIdxBits + cacheParams.pgLevelBits) min cacheParams.paddrBits) - 1, cacheParams.idxLSB) === 0.U val block_probe_for_ordering = releaseInFlight || block_probe_for_pending_release_ack || grantInProgress metaArb.io.in(6).valid := tl_out.b.valid && (!block_probe_for_core_progress || lrscBackingOff) tl_out.b.ready := metaArb.io.in(6).ready && !(block_probe_for_core_progress || block_probe_for_ordering || s1_valid || s2_valid) metaArb.io.in(6).bits.write := false.B metaArb.io.in(6).bits.idx := probeIdx(tl_out.b.bits) - metaArb.io.in(6).bits.addr := Cat(io.cpu.req.bits.addr >> paddrBits, tl_out.b.bits.address) + metaArb.io.in(6).bits.addr := Cat(io.cpu.req.bits.addr >> cacheParams.paddrBits, tl_out.b.bits.address) metaArb.io.in(6).bits.way_en := metaArb.io.in(4).bits.way_en metaArb.io.in(6).bits.data := metaArb.io.in(4).bits.data // replacement policy - s1_victim_way := (if (replacer.perSet && nWays > 1) { - val repl_array = Mem(nSets, UInt(replacer.nBits.W)) - val s1_repl_idx = s1_req.addr(idxBits+blockOffBits-1, blockOffBits) - val s2_repl_idx = s2_vaddr(idxBits+blockOffBits-1, blockOffBits) + s1_victim_way := (if (replacer.perSet && cacheParams.nWays > 1) { + val repl_array = Mem(cacheParams.nSets, UInt(replacer.nBits.W)) + val s1_repl_idx = s1_req.addr(cacheParams.idxBits+cacheParams.blockOffBits-1, cacheParams.blockOffBits) + val s2_repl_idx = s2_vaddr(cacheParams.idxBits+blockOffBits-1, cacheParams.blockOffBits) val s2_repl_state = Reg(UInt(replacer.nBits.W)) val s2_new_repl_state = replacer.get_next_state(s2_repl_state, OHToUInt(s2_hit_way)) val s2_repl_wen = s2_valid_masked && s2_hit_way.orR && s2_repl_state =/= s2_new_repl_state val s1_repl_state = Mux(s2_repl_wen && s2_repl_idx === s1_repl_idx, s2_new_repl_state, repl_array(s1_repl_idx)) when (s1_valid_not_nacked) { s2_repl_state := s1_repl_state } - val waddr = Mux(resetting, flushCounter(idxBits-1, 0), s2_repl_idx) + val waddr = Mux(resetting, flushCounter(cacheParams.idxBits-1, 0), s2_repl_idx) val wdata = Mux(resetting, 0.U, s2_new_repl_state) val wen = resetting || s2_repl_wen when (wen) { repl_array(waddr) := wdata } @@ -789,14 +781,14 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { val newCoh = WireDefault(probeNewCoh) releaseWay := s2_probe_way - if (!usingDataScratchpad) { + if (!cacheParams.usingDataScratchpad) { when (s2_victimize) { assert(s2_valid_flush_line || s2_flush_valid || io.cpu.s2_nack) val discard_line = s2_valid_flush_line && s2_req.size(1) || s2_flush_valid && flushing_req.size(1) release_state := Mux(s2_victim_dirty && !discard_line, s_voluntary_writeback, Mux(!cacheParams.silentDrop.B && !release_ack_wait && release_queue_empty && s2_victim_state.isValid() && (s2_valid_flush_line || s2_flush_valid || s2_readwrite && !s2_hit_valid), s_voluntary_release, s_voluntary_write_meta)) - probe_bits := addressToProbe(s2_vaddr, Cat(s2_victim_tag, s2_req.addr(tagLSB-1, idxLSB)) << idxLSB) + probe_bits := addressToProbe(s2_vaddr, Cat(s2_victim_tag, s2_req.addr(tagLSB-1, cacheParams.idxLSB)) << cacheParams.idxLSB) } when (s2_probe) { val probeNack = WireDefault(true.B) @@ -818,7 +810,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { when (release_state === s_probe_retry) { metaArb.io.in(6).valid := true.B metaArb.io.in(6).bits.idx := probeIdx(probe_bits) - metaArb.io.in(6).bits.addr := Cat(io.cpu.req.bits.addr >> paddrBits, probe_bits.address) + metaArb.io.in(6).bits.addr := Cat(io.cpu.req.bits.addr >> cacheParams.paddrBits, probe_bits.address) when (metaArb.io.in(6).ready) { release_state := s_ready s1_probe := true.B @@ -841,12 +833,12 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { when (release_state === s_voluntary_release) { tl_out_c.bits := edge.Release(fromSource = 0.U, toAddress = 0.U, - lgSize = lgCacheBlockBytes.U, + lgSize = cacheParams.lgCacheBlockBytes.U, shrinkPermissions = s2_shrink_param)._2 }.otherwise { tl_out_c.bits := edge.Release(fromSource = 0.U, toAddress = 0.U, - lgSize = lgCacheBlockBytes.U, + lgSize = cacheParams.lgCacheBlockBytes.U, shrinkPermissions = s2_shrink_param, data = 0.U)._2 } @@ -874,19 +866,19 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { x.writealloc := true.B } - dataArb.io.in(2).valid := inWriteback && releaseDataBeat < refillCycles.U + dataArb.io.in(2).valid := inWriteback && releaseDataBeat < cacheParams.refillCycles.U dataArb.io.in(2).bits := dataArb.io.in(1).bits dataArb.io.in(2).bits.write := false.B - dataArb.io.in(2).bits.addr := (probeIdx(probe_bits) << blockOffBits) | (releaseDataBeat(log2Up(refillCycles)-1,0) << rowOffBits) - dataArb.io.in(2).bits.wordMask := ~0.U((rowBytes / subWordBytes).W) - dataArb.io.in(2).bits.eccMask := ~0.U((wordBytes / eccBytes).W) - dataArb.io.in(2).bits.way_en := ~0.U(nWays.W) + dataArb.io.in(2).bits.addr := (probeIdx(probe_bits) << cacheParams.blockOffBits) | (releaseDataBeat(log2Up(cacheParams.refillCycles)-1,0) << cacheParams.rowOffBits) + dataArb.io.in(2).bits.wordMask := ~0.U((cacheParams.rowBytes / cacheParams.subWordBytes).W) + dataArb.io.in(2).bits.eccMask := ~0.U((cacheParams.wordBytes / cacheParams.eccBytes).W) + dataArb.io.in(2).bits.way_en := ~0.U(cacheParams.nWays.W) metaArb.io.in(4).valid := release_state.isOneOf(s_voluntary_write_meta, s_probe_write_meta) metaArb.io.in(4).bits.write := true.B metaArb.io.in(4).bits.way_en := releaseWay metaArb.io.in(4).bits.idx := probeIdx(probe_bits) - metaArb.io.in(4).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, probe_bits.address(idxMSB, 0)) + metaArb.io.in(4).bits.addr := Cat(io.cpu.req.bits.addr >> cacheParams.untagBits, probe_bits.address(cacheParams.idxMSB, 0)) metaArb.io.in(4).bits.data := tECC.encode(L1Metadata(tl_out_c.bits.address >> tagLSB, newCoh).asUInt) when (metaArb.io.in(4).fire()) { release_state := s_ready } @@ -908,7 +900,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { val s1_xcpt_valid = tlb.io.req.valid && !s1_isSlavePortAccess && !s1_nack io.cpu.s2_xcpt := Mux(RegNext(s1_xcpt_valid), s2_tlb_xcpt, 0.U.asTypeOf(s2_tlb_xcpt)) - if (usingDataScratchpad) { + if (cacheParams.usingDataScratchpad) { assert(!(s2_valid_masked && s2_req.cmd.isOneOf(M_XLR, M_XSC))) } else { ccover(tl_out.b.valid && !tl_out.b.ready, "BLOCK_B", "D$ B-channel blocked") @@ -916,8 +908,8 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { // uncached response val s1_uncached_data_word = { - val word_idx = uncachedResp.addr.extract(log2Up(rowBits/8)-1, log2Up(wordBytes)) - val words = tl_out.d.bits.data.grouped(wordBits) + val word_idx = uncachedResp.addr.extract(log2Up(cacheParams.rowBits/8)-1, log2Up(cacheParams.wordBytes)) + val words = tl_out.d.bits.data.grouped(cacheParams.wordBits) words(word_idx) } val s2_uncached_data_word = RegEnable(s1_uncached_data_word, io.cpu.replay_next) @@ -935,7 +927,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { resp.bits.tag := uncachedResp.tag resp.bits.size := uncachedResp.size resp.bits.signed := uncachedResp.signed - resp.bits.data := new LoadGen(uncachedResp.size, uncachedResp.signed, uncachedResp.addr, s1_uncached_data_word, false.B, wordBytes).data + resp.bits.data := new LoadGen(uncachedResp.size, uncachedResp.signed, uncachedResp.addr, s1_uncached_data_word, false.B, cacheParams.wordBytes).data resp.bits.data_raw := s1_uncached_data_word when (grantIsUncachedData && !resp.ready) { tl_out.d.ready := false.B @@ -943,10 +935,10 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { } // load data subword mux/sign extension - val s2_data_word = (0 until rowBits by wordBits).map(i => s2_data_uncorrected(wordBits+i-1,i)).reduce(_|_) - val s2_data_word_corrected = (0 until rowBits by wordBits).map(i => s2_data_corrected(wordBits+i-1,i)).reduce(_|_) + val s2_data_word = (0 until cacheParams.rowBits by cacheParams.wordBits).map(i => s2_data_uncorrected(cacheParams.wordBits+i-1,i)).reduce(_|_) + val s2_data_word_corrected = (0 until cacheParams.rowBits by cacheParams.wordBits).map(i => s2_data_corrected(cacheParams.wordBits+i-1,i)).reduce(_|_) val s2_data_word_possibly_uncached = Mux(cacheParams.pipelineWayMux.B && doUncachedResp, s2_uncached_data_word, 0.U) | s2_data_word - val loadgen = new LoadGen(s2_req.size, s2_req.signed, s2_req.addr, s2_data_word_possibly_uncached, s2_sc, wordBytes) + val loadgen = new LoadGen(s2_req.size, s2_req.signed, s2_req.addr, s2_data_word_possibly_uncached, s2_sc, cacheParams.wordBytes) io.cpu.resp.bits.data := loadgen.data | s2_sc_fail io.cpu.resp.bits.data_word_bypass := loadgen.wordData io.cpu.resp.bits.data_raw := s2_data_word @@ -954,15 +946,15 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { // AMOs if (usingRMW) { - val amoalus = (0 until coreDataBits / xLen).map { i => - val amoalu = Module(new AMOALU(xLen)) - amoalu.io.mask := pstore1_mask >> (i * xBytes) + val amoalus = (0 until cacheParams.coreDataBits / cacheParams.xLen).map { i => + val amoalu = Module(new AMOALU(cacheParams.xLen)) + amoalu.io.mask := pstore1_mask >> (i * cacheParams.xBytes) amoalu.io.cmd := (if (usingAtomicsInCache) pstore1_cmd else M_XWR) - amoalu.io.lhs := s2_data_word >> (i * xLen) - amoalu.io.rhs := pstore1_data >> (i * xLen) + amoalu.io.lhs := s2_data_word >> (i * cacheParams.xLen) + amoalu.io.rhs := pstore1_data >> (i * cacheParams.xLen) amoalu } - pstore1_storegen_data := (if (!usingDataScratchpad) amoalus.map(_.io.out).asUInt else { + pstore1_storegen_data := (if (!cacheParams.usingDataScratchpad) amoalus.map(_.io.out).asUInt else { val mask = FillInterleaved(8, Mux(s2_correct, 0.U, pstore1_mask)) amoalus.map(_.io.out_unmasked).asUInt & mask | s2_data_word_corrected & ~mask }) @@ -970,7 +962,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { assert(!(s1_valid_masked && s1_read && s1_write), "unsupported D$ operation") } - if (coreParams.useVector) { + if (useVector) { edge.manager.managers.foreach { m => // Statically ensure that no-allocate accesses are permitted. // We could consider turning some of these into dynamic PMA checks. @@ -980,18 +972,18 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { } // flushes - if (!usingDataScratchpad) + if (!cacheParams.usingDataScratchpad) when (RegNext(reset.asBool)) { resetting := true.B } val flushCounterNext = flushCounter +& 1.U - val flushDone = (flushCounterNext >> log2Ceil(nSets)) === nWays.U - val flushCounterWrap = flushCounterNext(log2Ceil(nSets)-1, 0) + val flushDone = (flushCounterNext >> log2Ceil(cacheParams.nSets)) === cacheParams.nWays.U + val flushCounterWrap = flushCounterNext(log2Ceil(cacheParams.nSets)-1, 0) ccover(s2_valid_masked && s2_cmd_flush_all && s2_meta_error, "TAG_ECC_ERROR_DURING_FENCE_I", "D$ ECC error in tag array during cache flush") ccover(s2_valid_masked && s2_cmd_flush_all && s2_data_error, "DATA_ECC_ERROR_DURING_FENCE_I", "D$ ECC error in data array during cache flush") s1_flush_valid := metaArb.io.in(5).fire() && !s1_flush_valid && !s2_flush_valid_pre_tag_ecc && release_state === s_ready && !release_ack_wait metaArb.io.in(5).valid := flushing && !flushed metaArb.io.in(5).bits.write := false.B - metaArb.io.in(5).bits.idx := flushCounter(idxBits-1, 0) - metaArb.io.in(5).bits.addr := Cat(io.cpu.req.bits.addr >> untagBits, metaArb.io.in(5).bits.idx << blockOffBits) + metaArb.io.in(5).bits.idx := flushCounter(cacheParams.idxBits-1, 0) + metaArb.io.in(5).bits.addr := Cat(io.cpu.req.bits.addr >> cacheParams.untagBits, metaArb.io.in(5).bits.idx << cacheParams.blockOffBits) metaArb.io.in(5).bits.way_en := metaArb.io.in(4).bits.way_en metaArb.io.in(5).bits.data := metaArb.io.in(4).bits.data @@ -1006,12 +998,12 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { when (tl_out_a.fire() && !s2_uncached) { flushed := false.B } when (flushing) { - s1_victim_way := flushCounter >> log2Up(nSets) + s1_victim_way := flushCounter >> log2Up(cacheParams.nSets) when (s2_flush_valid) { flushCounter := flushCounterNext when (flushDone) { flushed := true.B - if (!isPow2(nWays)) flushCounter := flushCounterWrap + if (!isPow2(cacheParams.nWays)) flushCounter := flushCounterWrap } } when (flushed && release_state === s_ready && !release_ack_wait) { @@ -1022,13 +1014,13 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { metaArb.io.in(0).valid := resetting metaArb.io.in(0).bits := metaArb.io.in(5).bits metaArb.io.in(0).bits.write := true.B - metaArb.io.in(0).bits.way_en := ~0.U(nWays.W) + metaArb.io.in(0).bits.way_en := ~0.U(cacheParams.nWays.W) metaArb.io.in(0).bits.data := tECC.encode(L1Metadata(0.U, ClientMetadata.onReset).asUInt) when (resetting) { flushCounter := flushCounterNext when (flushDone) { resetting := false.B - if (!isPow2(nWays)) flushCounter := flushCounterWrap + if (!isPow2(cacheParams.nWays)) flushCounter := flushCounterWrap } } @@ -1074,25 +1066,25 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { case _: AsynchronousCrossing => 1 // likewise case _: CreditedCrossing => 1 // likewise } - val near_end_of_refill = if (cacheBlockBytes / beatBytes <= beatsBeforeEnd) tl_out.d.valid else { - val refill_count = RegInit(0.U((cacheBlockBytes / beatBytes).log2.W)) + val near_end_of_refill = if (cacheParams.cacheBlockBytes / cacheParams.beatBytes <= beatsBeforeEnd) tl_out.d.valid else { + val refill_count = RegInit(0.U((cacheParams.cacheBlockBytes / cacheParams.beatBytes).log2.W)) when (tl_out.d.fire() && grantIsRefill) { refill_count := refill_count + 1.U } - refill_count >= (cacheBlockBytes / beatBytes - beatsBeforeEnd).U + refill_count >= (cacheParams.cacheBlockBytes / cacheParams.beatBytes - beatsBeforeEnd).U } cached_grant_wait && !near_end_of_refill } // report errors val (data_error, data_error_uncorrectable, data_error_addr) = - if (usingDataScratchpad) (s2_valid_data_error, s2_data_error_uncorrectable, s2_req.addr) else { + if (cacheParams.usingDataScratchpad) (s2_valid_data_error, s2_data_error_uncorrectable, s2_req.addr) else { (RegNext(tl_out_c.fire() && inWriteback && s2_data_error), RegNext(s2_data_error_uncorrectable), probe_bits.address) // This is stable for a cycle after tl_out_c.fire, so don't need a register } { val error_addr = - Mux(metaArb.io.in(1).valid, Cat(s2_first_meta_corrected.tag, metaArb.io.in(1).bits.addr(tagLSB-1, idxLSB)), - data_error_addr >> idxLSB) << idxLSB + Mux(metaArb.io.in(1).valid, Cat(s2_first_meta_corrected.tag, metaArb.io.in(1).bits.addr(tagLSB-1, cacheParams.idxLSB)), + data_error_addr >> cacheParams.idxLSB) << cacheParams.idxLSB io.errors.uncorrectable.foreach { u => u.valid := metaArb.io.in(1).valid && s2_meta_error_uncorrectable || data_error && data_error_uncorrectable u.bits := error_addr @@ -1103,77 +1095,77 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { io.errors.uncorrectable.foreach { u => when (u.valid) { c.valid := false.B } } } io.errors.bus.valid := tl_out.d.fire() && (tl_out.d.bits.denied || tl_out.d.bits.corrupt) - io.errors.bus.bits := Mux(grantIsCached, s2_req.addr >> idxLSB << idxLSB, 0.U) + io.errors.bus.bits := Mux(grantIsCached, s2_req.addr >> cacheParams.idxLSB << cacheParams.idxLSB, 0.U) ccoverNotScratchpad(io.errors.bus.valid && grantIsCached, "D_ERROR_CACHED", "D$ D-channel error, cached") ccover(io.errors.bus.valid && !grantIsCached, "D_ERROR_UNCACHED", "D$ D-channel error, uncached") } - if (usingDataScratchpad) { - val data_error_cover = Seq( - property.CoverBoolean(!data_error, Seq("no_data_error")), - property.CoverBoolean(data_error && !data_error_uncorrectable, Seq("data_correctable_error")), - property.CoverBoolean(data_error && data_error_uncorrectable, Seq("data_uncorrectable_error"))) - val request_source = Seq( - property.CoverBoolean(s2_isSlavePortAccess, Seq("from_TL")), - property.CoverBoolean(!s2_isSlavePortAccess, Seq("from_CPU"))) - - property.cover(new property.CrossProperty( - Seq(data_error_cover, request_source), - Seq(), - "MemorySystem;;Scratchpad Memory Bit Flip Cross Covers")) - } else { - - val data_error_type = Seq( - property.CoverBoolean(!s2_valid_data_error, Seq("no_data_error")), - property.CoverBoolean(s2_valid_data_error && !s2_data_error_uncorrectable, Seq("data_correctable_error")), - property.CoverBoolean(s2_valid_data_error && s2_data_error_uncorrectable, Seq("data_uncorrectable_error"))) - val data_error_dirty = Seq( - property.CoverBoolean(!s2_victim_dirty, Seq("data_clean")), - property.CoverBoolean(s2_victim_dirty, Seq("data_dirty"))) - val request_source = if (supports_flush) { - Seq( - property.CoverBoolean(!flushing, Seq("access")), - property.CoverBoolean(flushing, Seq("during_flush"))) - } else { - Seq(property.CoverBoolean(true.B, Seq("never_flush"))) - } - val tag_error_cover = Seq( - property.CoverBoolean( !s2_meta_error, Seq("no_tag_error")), - property.CoverBoolean( s2_meta_error && !s2_meta_error_uncorrectable, Seq("tag_correctable_error")), - property.CoverBoolean( s2_meta_error && s2_meta_error_uncorrectable, Seq("tag_uncorrectable_error"))) - property.cover(new property.CrossProperty( - Seq(data_error_type, data_error_dirty, request_source, tag_error_cover), - Seq(), - "MemorySystem;;Cache Memory Bit Flip Cross Covers")) - } +// if (cacheParams.usingDataScratchpad) { +// val data_error_cover = Seq( +// property.CoverBoolean(!data_error, Seq("no_data_error")), +// property.CoverBoolean(data_error && !data_error_uncorrectable, Seq("data_correctable_error")), +// property.CoverBoolean(data_error && data_error_uncorrectable, Seq("data_uncorrectable_error"))) +// val request_source = Seq( +// property.CoverBoolean(s2_isSlavePortAccess, Seq("from_TL")), +// property.CoverBoolean(!s2_isSlavePortAccess, Seq("from_CPU"))) +// +// property.cover(new property.CrossProperty( +// Seq(data_error_cover, request_source), +// Seq(), +// "MemorySystem;;Scratchpad Memory Bit Flip Cross Covers")) +// } else { +// +// val data_error_type = Seq( +// property.CoverBoolean(!s2_valid_data_error, Seq("no_data_error")), +// property.CoverBoolean(s2_valid_data_error && !s2_data_error_uncorrectable, Seq("data_correctable_error")), +// property.CoverBoolean(s2_valid_data_error && s2_data_error_uncorrectable, Seq("data_uncorrectable_error"))) +// val data_error_dirty = Seq( +// property.CoverBoolean(!s2_victim_dirty, Seq("data_clean")), +// property.CoverBoolean(s2_victim_dirty, Seq("data_dirty"))) +// val request_source = if (supports_flush) { +// Seq( +// property.CoverBoolean(!flushing, Seq("access")), +// property.CoverBoolean(flushing, Seq("during_flush"))) +// } else { +// Seq(property.CoverBoolean(true.B, Seq("never_flush"))) +// } +// val tag_error_cover = Seq( +// property.CoverBoolean( !s2_meta_error, Seq("no_tag_error")), +// property.CoverBoolean( s2_meta_error && !s2_meta_error_uncorrectable, Seq("tag_correctable_error")), +// property.CoverBoolean( s2_meta_error && s2_meta_error_uncorrectable, Seq("tag_uncorrectable_error"))) +// property.cover(new property.CrossProperty( +// Seq(data_error_type, data_error_dirty, request_source, tag_error_cover), +// Seq(), +// "MemorySystem;;Cache Memory Bit Flip Cross Covers")) +// } } // leaving gated-clock domain val dcacheImpl = withClock (gated_clock) { new DCacheModuleImpl } - def encodeData(x: UInt, poison: Bool) = x.grouped(eccBits).map(dECC.encode(_, if (dECC.canDetect) poison else false.B)).asUInt - def dummyEncodeData(x: UInt) = x.grouped(eccBits).map(dECC.swizzle(_)).asUInt - def decodeData(x: UInt) = x.grouped(dECC.width(eccBits)).map(dECC.decode(_)) - def eccMask(byteMask: UInt) = byteMask.grouped(eccBytes).map(_.orR).asUInt - def eccByteMask(byteMask: UInt) = FillInterleaved(eccBytes, eccMask(byteMask)) + def encodeData(x: UInt, poison: Bool) = x.grouped(cacheParams.eccBits).map(dECC.encode(_, if (dECC.canDetect) poison else false.B)).asUInt + def dummyEncodeData(x: UInt) = x.grouped(cacheParams.eccBits).map(dECC.swizzle(_)).asUInt + def decodeData(x: UInt) = x.grouped(dECC.width(cacheParams.eccBits)).map(dECC.decode(_)) + def eccMask(byteMask: UInt) = byteMask.grouped(cacheParams.eccBytes).map(_.orR).asUInt + def eccByteMask(byteMask: UInt) = FillInterleaved(cacheParams.eccBytes, eccMask(byteMask)) def likelyNeedsRead(req: HellaCacheReq) = { - val res = !req.cmd.isOneOf(M_XWR, M_PFW) || req.size < log2Ceil(eccBytes).U + val res = !req.cmd.isOneOf(M_XWR, M_PFW) || req.size < log2Ceil(cacheParams.eccBytes).U assert(!needsRead(req) || res) res } def needsRead(req: HellaCacheReq) = isRead(req.cmd) || - (isWrite(req.cmd) && (req.cmd === M_PWR || req.size < log2Ceil(eccBytes).U)) + (isWrite(req.cmd) && (req.cmd === M_PWR || req.size < log2Ceil(cacheParams.eccBytes).U)) def ccover(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) = - property.cover(cond, s"DCACHE_$label", "MemorySystem;;" + desc) + cover(cond, s"DCACHE_$label; MemorySystem;;" + desc) def ccoverNotScratchpad(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) = - if (!usingDataScratchpad) ccover(cond, label, desc) + if (!cacheParams.usingDataScratchpad) ccover(cond, label, desc) - require(!usingVM || tagLSB <= pgIdxBits, s"D$$ set size must not exceed ${1<<(pgIdxBits-10)} KiB; got ${(nSets * cacheBlockBytes)>>10} KiB") - def tagLSB: Int = untagBits - def probeIdx(b: TLBundleB): UInt = b.address(idxMSB, idxLSB) + require(!cacheParams.usingVM || tagLSB <= cacheParams.pgIdxBits, s"D$$ set size must not exceed ${1<<(cacheParams.pgIdxBits-10)} KiB; got ${(cacheParams.nSets * cacheParams.cacheBlockBytes)>>10} KiB") + def tagLSB: Int = cacheParams.untagBits + def probeIdx(b: TLBundleB): UInt = b.address(cacheParams.idxMSB, cacheParams.idxLSB) def addressToProbe(vaddr: UInt, paddr: UInt): TLBundleB = { val res = Wire(new TLBundleB(edge.bundle)) res <> DontCare @@ -1183,7 +1175,7 @@ class DCacheModule(outer: DCache) extends HellaCacheModule(outer) { } def acquire(vaddr: UInt, paddr: UInt, param: UInt): TLBundleA = { if (!edge.manager.anySupportAcquireB) WireDefault(0.U.asTypeOf(new TLBundleA(edge.bundle))) - else edge.AcquireBlock(0.U, paddr >> lgCacheBlockBytes << lgCacheBlockBytes, lgCacheBlockBytes.U, param)._2 + else edge.AcquireBlock(0.U, paddr >> cacheParams.lgCacheBlockBytes << cacheParams.lgCacheBlockBytes, cacheParams.lgCacheBlockBytes.U, param)._2 } } diff --git a/rocket/src/HellaCache.scala b/rocket/src/HellaCache.scala new file mode 100644 index 000000000..44d66b095 --- /dev/null +++ b/rocket/src/HellaCache.scala @@ -0,0 +1,283 @@ +// See LICENSE.SiFive for license details. +// See LICENSE.Berkeley for license details. + +package org.chipsalliance.rocket + +import chisel3._ +import chisel3.util.{isPow2,log2Ceil,log2Up,Decoupled,Valid} +import org.chipsalliance.rocket.util._ +import org.chipsalliance.rocket.MemoryOpConstants._ + +case class DCacheParams( + xLen: Int, + paddrBits: Int, + vaddrBitsExtended: Int, + coreDataBits: Int, + coreMaxAddrBits: Int, + cacheBlockBytes: Int, + pgIdxBits: Int, + addressBits: Int, + dataBits: Int, + lrscCycles: Int, // ISA requires 16-insn LRSC sequences to succeed + dcacheReqTagBits: Int, + dcacheArbPorts: Int, + usingVM: Boolean, + nSets: Int = 64, + nWays: Int = 4, + rowBits: Int = 64, + subWordBitsOption: Option[Int] = None, + replacementPolicy: String = "random", + nTLBSets: Int = 1, + nTLBWays: Int = 32, + nTLBBasePageSectors: Int = 4, + nTLBSuperpages: Int = 4, + tagECC: Option[String] = None, + dataECC: Option[String] = None, + dataECCBytes: Int = 1, + nMSHRs: Int = 1, + nSDQ: Int = 17, + nRPQ: Int = 16, + nMMIOs: Int = 1, + blockBytes: Int = 64, + separateUncachedResp: Boolean = false, + acquireBeforeRelease: Boolean = false, + pipelineWayMux: Boolean = false, + clockGate: Boolean = false, + scratch: Option[BigInt] = None) { + + def coreDataBytes: Int = coreDataBits / 8 + def xBytes: Int = xLen / 8 + + def tagCode: Code = Code.fromString(tagECC) + def dataCode: Code = Code.fromString(dataECC) + + def dataScratchpadBytes: Int = scratch.map(_ => nSets*blockBytes).getOrElse(0) + def usingDataScratchpad: Boolean = scratch.nonEmpty + + def replacement = new RandomReplacement(nWays) + + def silentDrop: Boolean = !acquireBeforeRelease + + require((!scratch.isDefined || nWays == 1), + "Scratchpad only allowed in direct-mapped cache.") + require((!scratch.isDefined || nMSHRs == 0), + "Scratchpad only allowed in blocking cache.") + if (scratch.isEmpty) + require(isPow2(nSets), s"nSets($nSets) must be pow2") + + def blockOffBits = log2Up(cacheBlockBytes) + def idxBits = log2Up(nSets) + def untagBits = blockOffBits + idxBits + def pgUntagBits = if (usingVM) untagBits min pgIdxBits else untagBits + def tagBits = addressBits - pgUntagBits + def wayBits = log2Up(nWays) + def isDM = nWays == 1 + def rowBytes = rowBits/8 + def rowOffBits = log2Up(rowBytes) + + def lgCacheBlockBytes = log2Ceil(cacheBlockBytes) + def cacheDataBits = dataBits + def cacheDataBytes = cacheDataBits / 8 + def cacheDataBeats = (cacheBlockBytes * 8) / cacheDataBits + def refillCycles = cacheDataBeats + + def wordBits = coreDataBits + def wordBytes = coreDataBits / 8 + def subWordBits = subWordBitsOption.getOrElse(wordBits) + def subWordBytes = subWordBits / 8 + def wordOffBits = log2Up(wordBytes) + def beatBytes = cacheBlockBytes / cacheDataBeats + def beatWords = beatBytes / wordBytes + def beatOffBits = log2Up(beatBytes) + def idxMSB = untagBits-1 + def idxLSB = blockOffBits + def offsetmsb = idxLSB-1 + def offsetlsb = wordOffBits + def rowWords = rowBits/wordBits + def doNarrowRead = coreDataBits * nWays % rowBits == 0 + def eccBytes = dataECCBytes + val eccBits = dataECCBytes * 8 + val encBits = dataCode.width(eccBits) + val encWordBits = encBits * (wordBits / eccBits) + def encDataBits = dataCode.width(coreDataBits) // NBDCache only + def encRowBits = encDataBits*rowWords + def lrscBackoff = 3 // disallow LRSC reacquisition briefly + def blockProbeAfterGrantCycles = 8 // give the processor some time to issue a request after a grant + def nIOMSHRs = nMMIOs + def maxUncachedInFlight = nMMIOs + def dataScratchpadSize = dataScratchpadBytes + + require(rowBits >= coreDataBits, s"rowBits($rowBits) < coreDataBits($coreDataBits)") + if (!scratch.isDefined) + require(rowBits == cacheDataBits, s"rowBits($rowBits) != cacheDataBits($cacheDataBits)") + // would need offset addr for puts if data width < xlen + require(xLen <= cacheDataBits, s"xLen($xLen) > cacheDataBits($cacheDataBits)") +} + +class HellaCacheReq(params: DCacheParams) extends Bundle { + val phys = Bool() + val no_alloc = Bool() + val no_xcpt = Bool() + + val addr = UInt(params.coreMaxAddrBits.W) + val idx = Option.when(params.usingVM && params.untagBits > params.pgIdxBits)(UInt(params.coreMaxAddrBits.W)) + val tag = UInt((params.dcacheReqTagBits + log2Ceil(params.dcacheArbPorts)).W) + val cmd = UInt(M_SZ.W) + val size = UInt(log2Ceil(params.coreDataBytes.log2 + 1).W) + val signed = Bool() + val dprv = UInt(PRV.SZ.W) + val dv = Bool() +} + +class HellaCacheWriteData(params: DCacheParams) extends Bundle { + val data = UInt(params.coreDataBits.W) + val mask = UInt(params.coreDataBytes.W) +} + +class HellaCacheResp(params: DCacheParams) extends Bundle { + val replay = Bool() + val has_data = Bool() + val data_word_bypass = UInt(params.coreDataBits.W) + val data_raw = UInt(params.coreDataBits.W) + val store_data = UInt(params.coreDataBits.W) + val data = UInt(params.coreDataBits.W) + val mask = UInt(params.coreDataBytes.W) + val tag = UInt((params.dcacheReqTagBits + log2Ceil(params.dcacheArbPorts)).W) + val size = UInt(log2Ceil(params.coreDataBytes.log2 + 1).W) +} + +class AlignmentExceptions extends Bundle { + val ld = Bool() + val st = Bool() +} + +class HellaCacheExceptions extends Bundle { + val ma = new AlignmentExceptions + val pf = new AlignmentExceptions + val gf = new AlignmentExceptions + val ae = new AlignmentExceptions +} + +class HellaCachePerfEvents extends Bundle { + val acquire = Bool() + val release = Bool() + val grant = Bool() + val tlbMiss = Bool() + val blocked = Bool() + val canAcceptStoreThenLoad = Bool() + val canAcceptStoreThenRMW = Bool() + val canAcceptLoadThenLoad = Bool() + val storeBufferEmptyAfterLoad = Bool() + val storeBufferEmptyAfterStore = Bool() +} + +// interface between D$ and processor/DTLB +class HellaCacheIO(params: DCacheParams) extends Bundle { + val req = Decoupled(new HellaCacheReq(params)) + val s1_kill = Output(Bool()) // kill previous cycle's req + val s1_data = Output(new HellaCacheWriteData(params)) // data for previous cycle's req + val s2_nack = Input(Bool()) // req from two cycles ago is rejected + val s2_nack_cause_raw = Input(Bool()) // reason for nack is store-load RAW hazard (performance hint) + val s2_kill = Output(Bool()) // kill req from two cycles ago + val s2_uncached = Input(Bool()) // advisory signal that the access is MMIO + val s2_paddr = Input(UInt(params.paddrBits.W)) // translated address + + val resp = Flipped(Valid(new HellaCacheResp(params))) + val replay_next = Input(Bool()) + val s2_xcpt = Input(new HellaCacheExceptions) + val s2_gpa = Input(UInt(params.vaddrBitsExtended.W)) + val s2_gpa_is_pte = Input(Bool()) + val uncached_resp = Option.when(params.separateUncachedResp)(Flipped(Decoupled(new HellaCacheResp(params)))) + val ordered = Input(Bool()) + val perf = Input(new HellaCachePerfEvents()) + + val keep_clock_enabled = Output(Bool()) // should D$ avoid clock-gating itself? + val clock_enabled = Input(Bool()) // is D$ currently being clocked? +} + +/** Metadata array used for all HellaCaches */ + +class L1Metadata(tagBits: Int) extends Bundle { + val coh = new ClientMetadata + val tag = UInt(tagBits.W) +} + +object L1Metadata { + def apply(tagBits: Int, tag: Bits, coh: ClientMetadata) = { + val meta = Wire(new L1Metadata(tagBits)) + meta.tag := tag + meta.coh := coh + meta + } +} + +class L1MetaReadReq(idxBits: Int, nWays: Int, tagBits: Int) extends Bundle { + val idx = UInt(idxBits.W) + val way_en = UInt(nWays.W) + val tag = UInt(tagBits.W) +} + +class L1MetaWriteReq(idxBits: Int, nWays: Int, tagBits: Int) extends L1MetaReadReq(idxBits, nWays, tagBits) { + val data = new L1Metadata(tagBits) +} + +class L1MetadataArray[T <: L1Metadata](onReset: () => T, idxBits: Int, tagBits: Int, nWays: Int, nSets: Int) extends Module { + val rstVal = onReset() + val io = IO(new Bundle { + val read = Flipped(Decoupled(new L1MetaReadReq(idxBits, nWays, tagBits))) + val write = Flipped(Decoupled(new L1MetaWriteReq(idxBits, nWays, tagBits))) + val resp = Output(Vec(nWays, rstVal.cloneType)) + }) + + val rst_cnt = RegInit(0.U(log2Up(nSets+1).W)) + val rst = rst_cnt < nSets.U + val waddr = Mux(rst, rst_cnt, io.write.bits.idx) + val wdata = Mux(rst, rstVal, io.write.bits.data).asUInt + val wmask = Mux(rst || (nWays == 1).B, (-1).S, io.write.bits.way_en.asSInt).asBools + val rmask = Mux(rst || (nWays == 1).B, (-1).S, io.read.bits.way_en.asSInt).asBools + when (rst) { rst_cnt := rst_cnt+1.U } + + val metabits = rstVal.getWidth + val tag_array = SyncReadMem(nSets, Vec(nWays, UInt(metabits.W))) + val wen = rst || io.write.valid + when (wen) { + tag_array.write(waddr, VecInit.fill(nWays)(wdata), wmask) + } + io.resp := tag_array.read(io.read.bits.idx, io.read.fire()).map(_.asTypeOf(chiselTypeOf(rstVal))) + + io.read.ready := !wen // so really this could be a 6T RAM + io.write.ready := !rst +} + + +/** Base classes for Diplomatic TL2 HellaCaches */ + +abstract class HellaCache(staticIdForMetadataUseOnly: Int, protected val cfg: DCacheParams) extends Module { + protected def cacheClientParameters = cfg.scratch.map(x => Seq()).getOrElse(Seq(TLMasterParameters.v1( + name = s"Core ${staticIdForMetadataUseOnly} DCache", + sourceId = IdRange(0, 1 max cfg.nMSHRs), + supportsProbe = TransferSizes(cfg.blockBytes, cfg.blockBytes)))) + + protected def mmioClientParameters = Seq(TLMasterParameters.v1( + name = s"Core ${staticIdForMetadataUseOnly} DCache MMIO", + sourceId = IdRange(firstMMIO, firstMMIO + cfg.nMMIOs), + requestFifo = true)) + + def firstMMIO = (cacheClientParameters.map(_.sourceId.end) :+ 0).max + + val node = TLClientNode(Seq(TLMasterPortParameters.v1( + clients = cacheClientParameters ++ mmioClientParameters, + minLatency = 1, + requestFields = tileParams.core.useVM.option(Seq()).getOrElse(Seq(AMBAProtField()))))) + + val hartIdSinkNodeOpt = cfg.scratch.map(_ => BundleBridgeSink[UInt]()) + val mmioAddressPrefixSinkNodeOpt = cfg.scratch.map(_ => BundleBridgeSink[UInt]()) + + val module: HellaCacheModule + + def flushOnFenceI = cfg.scratch.isEmpty && !node.edges.out(0).manager.managers.forall(m => !m.supportsAcquireB || !m.executable || m.regionType >= RegionType.TRACKED || m.regionType <= RegionType.IDEMPOTENT) + + def canSupportCFlushLine = !usingVM || cfg.blockBytes * cfg.nSets <= (1 << pgIdxBits) + + require(!tileParams.core.haveCFlush || cfg.scratch.isEmpty, "CFLUSH_D_L1 instruction requires a D$") +} \ No newline at end of file diff --git a/rocket/src/PMP.scala b/rocket/src/PMP.scala index fe409e032..a96ad869d 100644 --- a/rocket/src/PMP.scala +++ b/rocket/src/PMP.scala @@ -55,7 +55,7 @@ class PMP(paddrBits: Int, pmpGranularity: Int, pgIdxBits: Int, pgLevels: Int, pg val base = Cat(addr, cfg.a(0)) | ((pmpGranularity - 1).U >> lgAlign) Cat(base & ~(base + 1.U), ((1 << lgAlign) - 1).U) } - private def comparand = ~(~(addr << lgAlign) | (pmpGranularity - 1).U) + private def comparand: UInt = ~(~(addr << lgAlign) | (pmpGranularity - 1).U) private def pow2Match(x: UInt, lgSize: UInt, lgMaxSize: Int) = { def eval(a: UInt, b: UInt, m: UInt) = ((a ^ b) & ~m) === 0.U @@ -88,7 +88,7 @@ class PMP(paddrBits: Int, pmpGranularity: Int, pgIdxBits: Int, pgLevels: Int, pg private def upperBoundMatch(x: UInt, lgMaxSize: Int) = boundMatch(x, 0.U, lgMaxSize) - private def rangeMatch(x: UInt, lgSize: UInt, lgMaxSize: Int, prev: PMP) = + private def rangeMatch(x: UInt, lgSize: UInt, lgMaxSize: Int, prev: PMP): Bool = prev.lowerBoundMatch(x, lgSize, lgMaxSize) && upperBoundMatch(x, lgMaxSize) private def pow2Homogeneous(x: UInt, pgLevel: UInt) = { diff --git a/diplomatic/src/rocket/PTW.scala b/rocket/src/PTW.scala similarity index 80% rename from diplomatic/src/rocket/PTW.scala rename to rocket/src/PTW.scala index 4100ec858..8b656cdb7 100644 --- a/diplomatic/src/rocket/PTW.scala +++ b/rocket/src/PTW.scala @@ -7,12 +7,8 @@ import chisel3._ import chisel3.util.{Arbiter, Cat, Decoupled, Enum, Mux1H, OHToUInt, PopCount, PriorityEncoder, PriorityEncoderOH, RegEnable, UIntToOH, Valid, is, isPow2, log2Ceil, switch} import chisel3.withClock import chisel3.internal.sourceinfo.SourceInfo -import org.chipsalliance.cde.config.Parameters -import freechips.rocketchip.subsystem.CacheBlockBytes -import org.chipsalliance.rockettile._ -import freechips.rocketchip.tilelink._ -import freechips.rocketchip.util._ -import freechips.rocketchip.util.property +import org.chipsalliance.rocket.util._ +import org.chipsalliance.rocket.MemoryOpConstants._ import scala.collection.mutable.ListBuffer @@ -20,7 +16,7 @@ import scala.collection.mutable.ListBuffer * * TLB send a PTE request to PTW when L1TLB miss */ -class PTWReq(implicit p: Parameters) extends CoreBundle()(p) { +class PTWReq(vpnBits: Int) extends Bundle { val addr = UInt(vpnBits.W) val need_gpa = Bool() val vstage1 = Bool() @@ -31,7 +27,7 @@ class PTWReq(implicit p: Parameters) extends CoreBundle()(p) { * * containing: target PTE, exceptions, two-satge tanslation info */ -class PTWResp(implicit p: Parameters) extends CoreBundle()(p) { +class PTWResp(pgLevels: Int, vaddrBits: Int) extends Bundle { /** ptw access exception */ val ae_ptw = Bool() /** final access exception */ @@ -68,18 +64,30 @@ class PTWResp(implicit p: Parameters) extends CoreBundle()(p) { * - CSRs info * - pmp results from PMP(in TLB) */ -class TLBPTWIO(implicit p: Parameters) extends CoreBundle()(p) - with HasCoreParameters { - val req = Decoupled(Valid(new PTWReq)) - val resp = Flipped(Valid(new PTWResp)) - val ptbr = Input(new PTBR()) - val hgatp = Input(new PTBR()) - val vsatp = Input(new PTBR()) +class TLBPTWIO( + xLen: Int, + vpnBits: Int, + pgLevels: Int, + minPgLevels: Int, + pgLevelBits: Int, + maxPAddrBits: Int, + pgIdxBits: Int, + vaddrBits: Int, + paddrBits: Int, + pmpGranularity: Int, + nPMPs: Int, + customCSRsParam: CustomCSRs +) extends Bundle { + val req = Decoupled(Valid(new PTWReq(vpnBits))) + val resp = Flipped(Valid(new PTWResp(pgLevels, vaddrBits))) + val ptbr = Input(new PTBR(xLen, pgLevels, minPgLevels, maxPAddrBits, pgIdxBits)) + val hgatp = Input(new PTBR(xLen, pgLevels, minPgLevels, maxPAddrBits, pgIdxBits)) + val vsatp = Input(new PTBR(xLen, pgLevels, minPgLevels, maxPAddrBits, pgIdxBits)) val status = Input(new MStatus()) val hstatus = Input(new HStatus()) val gstatus = Input(new MStatus()) val pmp = Input(Vec(nPMPs, new PMP(paddrBits, pmpGranularity, pgIdxBits, pgLevels, pgLevelBits))) - val customCSRs = Input(coreParams.customCSRs) + val customCSRs = Input(customCSRsParam) } /** PTW performance statistics */ class PTWPerfEvents extends Bundle { @@ -95,18 +103,30 @@ class PTWPerfEvents extends Bundle { * * PTW sends its performance statistics to core */ -class DatapathPTWIO(implicit p: Parameters) extends CoreBundle()(p) - with HasCoreParameters { - val ptbr = Input(new PTBR()) - val hgatp = Input(new PTBR()) - val vsatp = Input(new PTBR()) - val sfence = Flipped(Valid(new SFenceReq)) +class DatapathPTWIO( + xLen: Int, + pgLevels: Int, + pgLevelBits: Int, + minPgLevels: Int, + maxPAddrBits: Int, + pgIdxBits: Int, + vaddrBits: Int, + paddrBits: Int, + asIdBits: Int, + pmpGranularity: Int, + nPMPs: Int, + customCSRsParam: CustomCSRs +) extends Bundle { + val ptbr = Input(new PTBR(xLen, pgLevels, minPgLevels, maxPAddrBits, pgIdxBits)) + val hgatp = Input(new PTBR(xLen, pgLevels, minPgLevels, maxPAddrBits, pgIdxBits)) + val vsatp = Input(new PTBR(xLen, pgLevels, minPgLevels, maxPAddrBits, pgIdxBits)) + val sfence = Flipped(Valid(new SFenceReq(vaddrBits, asIdBits))) val status = Input(new MStatus()) val hstatus = Input(new HStatus()) val gstatus = Input(new MStatus()) val pmp = Input(Vec(nPMPs, new PMP(paddrBits, pmpGranularity, pgIdxBits, pgLevels, pgLevelBits))) val perf = Output(new PTWPerfEvents()) - val customCSRs = Input(coreParams.customCSRs) + val customCSRs = Input(customCSRsParam) /** enable clock generated by ptw */ val clock_enabled = Output(Bool()) } @@ -115,7 +135,7 @@ class DatapathPTWIO(implicit p: Parameters) extends CoreBundle()(p) * contains useful methods to check PTE attributes * @see RV-priv spec 4.3.1 for pgae table entry format */ -class PTE(implicit p: Parameters) extends CoreBundle()(p) { +class PTE extends Bundle { val reserved_for_future = UInt(10.W) val ppn = UInt(44.W) val reserved_for_software = Bits(2.W) @@ -161,8 +181,13 @@ class PTE(implicit p: Parameters) extends CoreBundle()(p) { * @param nSets number of sets in L2TLB * @see RV-priv spec 4.3.1 for page table entry format */ -class L2TLBEntry(nSets: Int)(implicit p: Parameters) extends CoreBundle()(p) - with HasCoreParameters { +class L2TLBEntry( + nSets: Int, + maxSVAddrBits: Int, + pgIdxBits: Int, + ppnBits: Int, + usingHypervisor: Boolean, +) extends Bundle { val idxBits = log2Ceil(nSets) val tagBits = maxSVAddrBits - pgIdxBits - idxBits + (if (usingHypervisor) 1 else 0) val tag = UInt(tagBits.W) @@ -216,24 +241,85 @@ class L2TLBEntry(nSets: Int)(implicit p: Parameters) extends CoreBundle()(p) * @see RV-priv spec 8.5 for Two-Stage Address Translation * @todo details in two-stage translation */ -class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(p) { +class PTW( + n: Int, + xLen: Int, + vpnBits: Int, + ppnBits: Int, + pgLevels: Int, + minPgLevels: Int, + pgLevelBits: Int, + maxPAddrBits: Int, + pgIdxBits: Int, + addressBits: Int, + dataBits: Int, + vaddrBits: Int, + vaddrBitsExtended: Int, + paddrBits: Int, + asIdBits: Int, + pmpGranularity: Int, + nPMPs: Int, + nPTECacheEntries: Int, + nL2TLBEntries: Int, + nL2TLBWays: Int, + hypervisorExtraAddrBits: Int, + maxHypervisorExtraAddrBits: Int, + maxSVAddrBits: Int, + cacheBlockBytes: Int, + cacheDataBeats: Int, + cacheDataBits: Int, + coreDataBits: Int, + coreDataBytes: Int, + subWordBits: Int, + dcacheReqTagBits: Int, + dcacheArbPorts: Int, + untagBits: Int, + blockOffBits: Int, + rowBits: Int, + coreMaxAddrBits: Int, + lrscCycles: Int, + nWays: Int, + nMMIOs: Int, + dataScratchpadBytes: Int, + dataECCBytes: Int, + dataCode: Code, + customCSRsParam: CustomCSRs, + memSlaves: Seq[MemSlaveParameters], + clockGate: Boolean, + usingVM: Boolean, + usingHypervisor: Boolean, + usingDataScratchpad: Boolean, + separateUncachedResp: Boolean +) extends Module { val io = IO(new Bundle { /** to n TLB */ - val requestor = Flipped(Vec(n, new TLBPTWIO)) + val requestor = Flipped(Vec( + n, + new TLBPTWIO( + xLen, vpnBits, pgLevels, minPgLevels, pgLevelBits, maxPAddrBits, + pgIdxBits, vaddrBits, paddrBits, pmpGranularity, nPMPs, customCSRsParam + ) + )) /** to HellaCache */ - val mem = new HellaCacheIO + val mem = new HellaCacheIO(DCacheParams( + xLen, paddrBits, vaddrBitsExtended, coreDataBits, coreMaxAddrBits, cacheBlockBytes, + pgIdxBits, addressBits, dataBits, lrscCycles, dcacheReqTagBits, dcacheArbPorts, usingVM + )) /** to Core * * contains CSRs info and performance statistics */ - val dpath = new DatapathPTWIO + val dpath = new DatapathPTWIO( + xLen, pgLevels, pgLevelBits, minPgLevels, maxPAddrBits, pgIdxBits, + vaddrBits, paddrBits, asIdBits, pmpGranularity, nPMPs, customCSRsParam + ) }) val s_ready :: s_req :: s_wait1 :: s_dummy1 :: s_wait2 :: s_wait3 :: s_dummy2 :: s_fragment_superpage :: Nil = Enum(8) val state = RegInit(s_ready) val l2_refill_wire = Wire(Bool()) /** Arbiter to arbite request from n TLB */ - val arb = Module(new Arbiter(Valid(new PTWReq), n)) + val arb = Module(new Arbiter(Valid(new PTWReq(vpnBits)), n)) // use TLB req as arbitor's input arb.io.in <> io.requestor.map(_.req) // receive req only when s_ready and not in refill @@ -244,7 +330,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( val clock_en = state =/= s_ready || l2_refill_wire || arb.io.out.valid || io.dpath.sfence.valid || io.dpath.customCSRs.disableDCacheClockGate io.dpath.clock_enabled := usingVM.B && clock_en val gated_clock = - if (!usingVM || !tileParams.dcache.get.clockGate) clock + if (!usingVM || !clockGate) clock else ClockGate(clock, clock_en, "ptw_clock_gate") withClock (gated_clock) { // entering gated-clock domain @@ -267,18 +353,18 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( val resp_fragmented_superpage = Reg(Bool()) /** tlb request */ - val r_req = Reg(new PTWReq) + val r_req = Reg(new PTWReq(vpnBits)) /** current selected way in arbitor */ val r_req_dest = Reg(Bits()) // to respond to L1TLB : l2_hit // to construct mem.req.addr val r_pte = Reg(new PTE) - val r_hgatp = Reg(new PTBR) + val r_hgatp = Reg(new PTBR(xLen, pgLevels, minPgLevels, maxPAddrBits, pgIdxBits)) // 2-stage pageLevel val aux_count = Reg(UInt(log2Ceil(pgLevels).W)) /** pte for 2-stage translation */ val aux_pte = Reg(new PTE) - val aux_ppn_hi = (pgLevels > 4 && r_req.addr.getWidth > aux_pte.ppn.getWidth).option(Reg(UInt((r_req.addr.getWidth - aux_pte.ppn.getWidth).W))) + val aux_ppn_hi = Option.when(pgLevels > 4 && r_req.addr.getWidth > aux_pte.ppn.getWidth)(Reg(UInt((r_req.addr.getWidth - aux_pte.ppn.getWidth).W))) val gpa_pgoff = Reg(UInt(pgIdxBits.W)) // only valid in resp_gf case val stage2 = Reg(Bool()) val stage2_final = Reg(Bool()) @@ -302,7 +388,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( } // construct pte from mem.resp val (pte, invalid_paddr) = { - val tmp = mem_resp_data.asTypeOf(new PTE()) + val tmp = mem_resp_data.asTypeOf(new PTE) val res = WireDefault(tmp) res.ppn := Mux(do_both_stages && !stage2, tmp.ppn(vpnBits.min(tmp.ppn.getWidth)-1, 0), tmp.ppn(ppnBits-1, 0)) when (tmp.r || tmp.w || tmp.x) { @@ -310,7 +396,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( for (i <- 0 until pgLevels-1) when (count <= i.U && tmp.ppn((pgLevels-1-i)*pgLevelBits-1, (pgLevels-2-i)*pgLevelBits) =/= 0.U) { res.v := false.B } } - (res, Mux(do_both_stages && !stage2, (tmp.ppn >> vpnBits) =/= 0.U, (tmp.ppn >> ppnBits) =/= 0.U)) + (res, Mux(do_both_stages && !stage2, (tmp.ppn >> vpnBits).asUInt =/= 0.U, (tmp.ppn >> ppnBits).asUInt =/= 0.U)) } // find non-leaf PTE, need traverse val traverse = pte.table() && !invalid_paddr && count < (pgLevels-1).U @@ -322,7 +408,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( } val mask = Mux(stage2 && count === r_hgatp_initial_count, ((1 << (hypervisorExtraAddrBits + pgLevelBits)) - 1).U, ((1 << pgLevelBits) - 1).U) val vpn_idx = vpn_idxs(count) & mask - val raw_pte_addr = ((r_pte.ppn << pgLevelBits) | vpn_idx) << log2Ceil(xLen / 8) + val raw_pte_addr = ((r_pte.ppn << pgLevelBits).asUInt | vpn_idx) << log2Ceil(xLen / 8) val size = if (usingHypervisor) vaddrBits else paddrBits //use r_pte.ppn as page table base address //use vpn slice as offset @@ -357,14 +443,14 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( /** PTECache caches non-leaf PTE * @param s2 true: 2-stage address translation */ - def makePTECache(s2: Boolean): (Bool, UInt) = if (coreParams.nPTECacheEntries == 0) { + def makePTECache(s2: Boolean): (Bool, UInt) = if (nPTECacheEntries == 0) { (false.B, 0.U) } else { - val plru = new PseudoLRU(coreParams.nPTECacheEntries) - val valid = RegInit(0.U(coreParams.nPTECacheEntries.W)) - val tags = Reg(Vec(coreParams.nPTECacheEntries, UInt((if (usingHypervisor) 1 + vaddrBits else paddrBits).W))) + val plru = new PseudoLRU(nPTECacheEntries) + val valid = RegInit(0.U(nPTECacheEntries.W)) + val tags = Reg(Vec(nPTECacheEntries, UInt((if (usingHypervisor) 1 + vaddrBits else paddrBits).W))) // not include full pte, only ppn - val data = Reg(Vec(coreParams.nPTECacheEntries, UInt((if (usingHypervisor && s2) vpnBits else ppnBits).W))) + val data = Reg(Vec(nPTECacheEntries, UInt((if (usingHypervisor && s2) vpnBits else ppnBits).W))) val can_hit = if (s2) count === r_hgatp_initial_count && aux_count < (pgLevels-1).U && r_req.vstage1 && stage2 && !stage2_final else count < (pgLevels-1).U && Mux(r_req.vstage1, stage2, !r_req.stage2) @@ -412,38 +498,38 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( io.dpath.perf.l2miss := false.B io.dpath.perf.l2hit := false.B // l2tlb - val (l2_hit, l2_error, l2_pte, l2_tlb_ram) = if (coreParams.nL2TLBEntries == 0) (false.B, false.B, WireDefault(0.U.asTypeOf(new PTE)), None) else { + val (l2_hit, l2_error, l2_pte, l2_tlb_ram) = if (nL2TLBEntries == 0) (false.B, false.B, WireDefault(0.U.asTypeOf(new PTE)), None) else { val code = new ParityCode - require(isPow2(coreParams.nL2TLBEntries)) - require(isPow2(coreParams.nL2TLBWays)) - require(coreParams.nL2TLBEntries >= coreParams.nL2TLBWays) - val nL2TLBSets = coreParams.nL2TLBEntries / coreParams.nL2TLBWays + require(isPow2(nL2TLBEntries)) + require(isPow2(nL2TLBWays)) + require(nL2TLBEntries >= nL2TLBWays) + val nL2TLBSets = nL2TLBEntries / nL2TLBWays require(isPow2(nL2TLBSets)) val idxBits = log2Ceil(nL2TLBSets) - val l2_plru = new SetAssocLRU(nL2TLBSets, coreParams.nL2TLBWays, "plru") + val l2_plru = new SetAssocLRU(nL2TLBSets, nL2TLBWays, "plru") val ram = DescribedSRAM( name = "l2_tlb_ram", desc = "L2 TLB", size = nL2TLBSets, - data = Vec(coreParams.nL2TLBWays, UInt(code.width(new L2TLBEntry(nL2TLBSets).getWidth).W)) + data = Vec(nL2TLBWays, UInt(code.width(new L2TLBEntry(nL2TLBSets, maxSVAddrBits, pgIdxBits, ppnBits, usingHypervisor).getWidth).W)) ) - val g = Reg(Vec(coreParams.nL2TLBWays, UInt(nL2TLBSets.W))) - val valid = RegInit(VecInit(Seq.fill(coreParams.nL2TLBWays)(0.U(nL2TLBSets.W)))) + val g = Reg(Vec(nL2TLBWays, UInt(nL2TLBSets.W))) + val valid = RegInit(VecInit(Seq.fill(nL2TLBWays)(0.U(nL2TLBSets.W)))) // use r_req to construct tag val (r_tag, r_idx) = Split(Cat(r_req.vstage1, r_req.addr(maxSVAddrBits-pgIdxBits-1, 0)), idxBits) /** the valid vec for the selected set(including n ways) */ val r_valid_vec = valid.map(_(r_idx)).asUInt - val r_valid_vec_q = Reg(UInt(coreParams.nL2TLBWays.W)) - val r_l2_plru_way = Reg(UInt(log2Ceil(coreParams.nL2TLBWays max 1).W)) + val r_valid_vec_q = Reg(UInt(nL2TLBWays.W)) + val r_l2_plru_way = Reg(UInt(log2Ceil(nL2TLBWays max 1).W)) r_valid_vec_q := r_valid_vec // replacement way - r_l2_plru_way := (if (coreParams.nL2TLBWays > 1) l2_plru.way(r_idx) else 0.U) + r_l2_plru_way := (if (nL2TLBWays > 1) l2_plru.way(r_idx) else 0.U) // refill with r_pte(leaf pte) when (l2_refill && !invalidated) { - val entry = Wire(new L2TLBEntry(nL2TLBSets)) + val entry = Wire(new L2TLBEntry(nL2TLBSets, maxSVAddrBits, pgIdxBits, ppnBits, usingHypervisor)) entry.ppn := r_pte.ppn entry.d := r_pte.d entry.a := r_pte.a @@ -454,23 +540,23 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( entry.tag := r_tag // if all the way are valid, use plru to select one way to be replaced, // otherwise use PriorityEncoderOH to select one - val wmask = if (coreParams.nL2TLBWays > 1) Mux(r_valid_vec_q.andR, UIntToOH(r_l2_plru_way, coreParams.nL2TLBWays), PriorityEncoderOH(~r_valid_vec_q)) else 1.U(1.W) - ram.write(r_idx, VecInit(Seq.fill(coreParams.nL2TLBWays)(code.encode(entry.asUInt))), wmask.asBools) + val wmask = if (nL2TLBWays > 1) Mux(r_valid_vec_q.andR, UIntToOH(r_l2_plru_way, nL2TLBWays), PriorityEncoderOH(~r_valid_vec_q)) else 1.U(1.W) + ram.write(r_idx, VecInit(Seq.fill(nL2TLBWays)(code.encode(entry.asUInt))), wmask.asBools) val mask = UIntToOH(r_idx) - for (way <- 0 until coreParams.nL2TLBWays) { + for (way <- 0 until nL2TLBWays) { when (wmask(way)) { valid(way) := valid(way) | mask - g(way) := Mux(r_pte.g, g(way) | mask, g(way) & ~mask) + g(way) := Mux(r_pte.g, g(way) | mask, g(way) & (~mask).asUInt) } } } // sfence happens when (io.dpath.sfence.valid) { val hg = usingHypervisor.B && io.dpath.sfence.bits.hg - for (way <- 0 until coreParams.nL2TLBWays) { + for (way <- 0 until nL2TLBWays) { valid(way) := - Mux(!hg && io.dpath.sfence.bits.rs1, valid(way) & ~UIntToOH(io.dpath.sfence.bits.addr(idxBits+pgIdxBits-1, pgIdxBits)), + Mux(!hg && io.dpath.sfence.bits.rs1, valid(way) & (~UIntToOH(io.dpath.sfence.bits.addr(idxBits+pgIdxBits-1, pgIdxBits))).asUInt, Mux(!hg && io.dpath.sfence.bits.rs2, valid(way) & g(way), 0.U)) } @@ -485,11 +571,11 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( val s2_rdata = s1_rdata.map(s1_rdway => code.decode(RegEnable(s1_rdway, s1_valid))) val s2_valid_vec = RegEnable(r_valid_vec, s1_valid) val s2_g_vec = RegEnable(VecInit(g.map(_(r_idx))), s1_valid) - val s2_error = (0 until coreParams.nL2TLBWays).map(way => s2_valid_vec(way) && s2_rdata(way).error).orR + val s2_error = (0 until nL2TLBWays).map(way => s2_valid_vec(way) && s2_rdata(way).error).orR when (s2_valid && s2_error) { valid.foreach { _ := 0.U }} // decode - val s2_entry_vec = s2_rdata.map(_.uncorrected.asTypeOf(new L2TLBEntry(nL2TLBSets))) - val s2_hit_vec = (0 until coreParams.nL2TLBWays).map(way => s2_valid_vec(way) && (r_tag === s2_entry_vec(way).tag)) + val s2_entry_vec = s2_rdata.map(_.uncorrected.asTypeOf(new L2TLBEntry(nL2TLBSets, maxSVAddrBits, pgIdxBits, ppnBits, usingHypervisor))) + val s2_hit_vec = (0 until nL2TLBWays).map(way => s2_valid_vec(way) && (r_tag === s2_entry_vec(way).tag)) val s2_hit = s2_valid && s2_hit_vec.orR io.dpath.perf.l2miss := s2_valid && !(s2_hit_vec.orR) io.dpath.perf.l2hit := s2_hit @@ -512,7 +598,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( s2_pte.reserved_for_future := 0.U s2_pte.reserved_for_software := 0.U - for (way <- 0 until coreParams.nL2TLBWays) { + for (way <- 0 until nL2TLBWays) { ccover(s2_hit && s2_hit_vec(way), s"L2_TLB_HIT_WAY$way", s"L2 TLB hit way$way") } @@ -540,14 +626,14 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( val pmaPgLevelHomogeneous = (0 until pgLevels) map { i => val pgSize = BigInt(1) << (pgIdxBits + ((pgLevels - 1 - i) * pgLevelBits)) if (pageGranularityPMPs && i == pgLevels - 1) { - require(TLBPageLookup.homogeneous(edge.manager.managers, pgSize), s"All memory regions must be $pgSize-byte aligned") + require(TLBPageLookup.homogeneous(memSlaves, pgSize), s"All memory regions must be $pgSize-byte aligned") true.B } else { - TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), pgSize)(r_pte.ppn << pgIdxBits).homogeneous + TLBPageLookup(memSlaves, xLen, cacheBlockBytes, pgSize)((r_pte.ppn << pgIdxBits).asUInt).homogeneous } } val pmaHomogeneous = pmaPgLevelHomogeneous(count) - val pmpHomogeneous = new PMPHomogeneityChecker(io.dpath.pmp, paddrBits, pmpGranularity, pgIdxBits, pgLevels, pgLevelBits).apply(r_pte.ppn << pgIdxBits, count) + val pmpHomogeneous = new PMPHomogeneityChecker(io.dpath.pmp, paddrBits, pmpGranularity, pgIdxBits, pgLevels, pgLevelBits).apply((r_pte.ppn << pgIdxBits).asUInt, count) val homogeneous = pmaHomogeneous && pmpHomogeneous // response to tlb for (i <- 0 until io.requestor.size) { @@ -579,7 +665,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( // control state machine val next_state = WireDefault(state) - state := OptimizationBarrier(next_state) + state := next_state val do_switch = WireDefault(false.B) switch (state) { @@ -665,7 +751,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( makePTE(stage1_ppn & superpage_mask, aux_pte) } - r_pte := OptimizationBarrier( + r_pte := // l2tlb hit->find a leaf PTE(l2_pte), respond to L1TLB Mux(l2_hit && !l2_error, l2_pte, // pte cache hit->find a non-leaf PTE(pte_cache),continue to request mem @@ -678,7 +764,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( Mux(state === s_fragment_superpage && !homogeneous && count =/= (pgLevels - 1).U, makePTE(makeFragmentedSuperpagePPN(r_pte.ppn)(count), r_pte), // when tlb request come->request mem, use root address in satp(or vsatp,hgatp) Mux(arb.io.out.fire, Mux(arb.io.out.bits.bits.stage2, makeHypervisorRootPTE(io.dpath.hgatp, io.dpath.vsatp.ppn, r_pte), makePTE(satp.ppn, r_pte)), - r_pte))))))) + r_pte)))))) when (l2_hit && !l2_error) { assert(state === s_req || state === s_wait1) @@ -761,7 +847,7 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( } // leaving gated-clock domain private def ccover(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) = - if (usingVM) property.cover(cond, s"PTW_$label", "MemorySystem;;" + desc) + if (usingVM) cover(cond, s"PTW_$label; MemorySystem;;" + desc) /** Relace PTE.ppn with ppn */ private def makePTE(ppn: UInt, default: PTE) = { @@ -779,20 +865,3 @@ class PTW(n: Int)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()( pte } } - -/** Mix-ins for constructing tiles that might have a PTW */ -trait CanHavePTW extends HasTileParameters with HasHellaCache { this: BaseTile => - val module: CanHavePTWModule - var nPTWPorts = 1 - nDCachePorts += usingPTW.toInt -} - -trait CanHavePTWModule extends HasHellaCacheModule { - val outer: CanHavePTW - val ptwPorts = ListBuffer(outer.dcache.module.io.ptw) - val ptw = Module(new PTW(outer.nPTWPorts)(outer.dcache.node.edges.out(0), outer.p)) - ptw.io.mem <> DontCare - if (outer.usingPTW) { - dcachePorts += ptw.io.mem - } -} diff --git a/diplomatic/src/rocket/TLB.scala b/rocket/src/TLB.scala similarity index 84% rename from diplomatic/src/rocket/TLB.scala rename to rocket/src/TLB.scala index c73a14536..bc060bc86 100644 --- a/diplomatic/src/rocket/TLB.scala +++ b/rocket/src/TLB.scala @@ -6,19 +6,9 @@ package org.chipsalliance.rocket import chisel3._ import chisel3.util._ -import org.chipsalliance.cde.config.{Field, Parameters} -import freechips.rocketchip.subsystem.CacheBlockBytes -import freechips.rocketchip.diplomacy.RegionType -import org.chipsalliance.rockettile.{CoreModule, CoreBundle} -import freechips.rocketchip.tilelink._ -import freechips.rocketchip.util._ -import freechips.rocketchip.util.property -import freechips.rocketchip.devices.debug.DebugModuleKey import chisel3.internal.sourceinfo.SourceInfo - -case object PgLevels extends Field[Int](2) -case object ASIdBits extends Field[Int](0) -case object VMIdBits extends Field[Int](0) +import org.chipsalliance.rocket._ +import org.chipsalliance.rocket.util._ /** =SFENCE= * rs1 rs2 @@ -35,7 +25,7 @@ case object VMIdBits extends Field[Int](0) * If rs1!=x0 and rs2!=x0, the fence orders only reads and writes made to the leaf page table entry corresponding to the virtual address in rs1, for the address space identified by integer register rs2. Accesses to global mappings are not ordered. * }}} */ -class SFenceReq(implicit p: Parameters) extends CoreBundle()(p) { +class SFenceReq(vaddrBits: Int, asIdBits: Int) extends Bundle { val rs1 = Bool() val rs2 = Bool() val addr = UInt(vaddrBits.W) @@ -44,7 +34,7 @@ class SFenceReq(implicit p: Parameters) extends CoreBundle()(p) { val hg = Bool() } -class TLBReq(lgMaxSize: Int)(implicit p: Parameters) extends CoreBundle()(p) { +class TLBReq(lgMaxSize: Int, vaddrBitsExtended: Int) extends Bundle { /** request address from CPU. */ val vaddr = UInt(vaddrBitsExtended.W) /** don't lookup TLB, bypass vaddr as paddr */ @@ -52,7 +42,7 @@ class TLBReq(lgMaxSize: Int)(implicit p: Parameters) extends CoreBundle()(p) { /** granularity */ val size = UInt(log2Ceil(lgMaxSize + 1).W) /** memory command. */ - val cmd = Bits(M_SZ.W) + val cmd = Bits(MemoryOpConstants.M_SZ.W) val prv = UInt(PRV.SZ.W) /** virtualization mode */ val v = Bool() @@ -65,7 +55,7 @@ class TLBExceptions extends Bundle { val inst = Bool() } -class TLBResp(implicit p: Parameters) extends CoreBundle()(p) { +class TLBResp(paddrBits: Int, vaddrBitsExtended: Int) extends Bundle { // lookup responses val miss = Bool() /** physical address */ @@ -88,7 +78,7 @@ class TLBResp(implicit p: Parameters) extends CoreBundle()(p) { val prefetchable = Bool() } -class TLBEntryData(implicit p: Parameters) extends CoreBundle()(p) { +class TLBEntryData(ppnBits: Int) extends Bundle { val ppn = UInt(ppnBits.W) /** pte.u user */ val u = Bool() @@ -138,7 +128,16 @@ class TLBEntryData(implicit p: Parameters) extends CoreBundle()(p) { } /** basic cell for TLB data */ -class TLBEntry(val nSectors: Int, val superpage: Boolean, val superpageOnly: Boolean)(implicit p: Parameters) extends CoreBundle()(p) { +class TLBEntry( + val nSectors: Int, + val superpage: Boolean, + val superpageOnly: Boolean, + pgLevels: Int, + pgLevelBits: Int, + vpnBits: Int, + ppnBits: Int, + hypervisorExtraAddrBits: Int, + usingVM: Boolean) extends Bundle { require(nSectors == 1 || !superpage) require(!superpageOnly || superpage) @@ -148,19 +147,19 @@ class TLBEntry(val nSectors: Int, val superpage: Boolean, val superpageOnly: Boo /** tag in vitualization mode */ val tag_v = Bool() /** entry data */ - val data = Vec(nSectors, UInt(new TLBEntryData().getWidth.W)) + val data = Vec(nSectors, UInt(new TLBEntryData(ppnBits).getWidth.W)) /** valid bit */ val valid = Vec(nSectors, Bool()) /** returns all entry data in this entry */ - def entry_data = data.map(_.asTypeOf(new TLBEntryData)) + def entry_data = data.map(_.asTypeOf(new TLBEntryData(ppnBits))) /** returns the index of sector */ - private def sectorIdx(vpn: UInt) = vpn.extract(nSectors.log2-1, 0) + private def sectorIdx(vpn: UInt) = vpn(log2Ceil(nSectors) - 1, 0) /** returns the entry data matched with this vpn*/ - def getData(vpn: UInt) = OptimizationBarrier(data(sectorIdx(vpn)).asTypeOf(new TLBEntryData)) + def getData(vpn: UInt) = data(sectorIdx(vpn)).asTypeOf(new TLBEntryData(ppnBits)) /** returns whether a sector hits */ - def sectorHit(vpn: UInt, virtual: Bool) = valid.orR && sectorTagMatch(vpn, virtual) + def sectorHit(vpn: UInt, virtual: Bool) = valid.asUInt.orR && sectorTagMatch(vpn, virtual) /** returns whether tag matches vpn */ - def sectorTagMatch(vpn: UInt, virtual: Bool) = (((tag_vpn ^ vpn) >> nSectors.log2) === 0.U) && (tag_v === virtual) + def sectorTagMatch(vpn: UInt, virtual: Bool) = (((tag_vpn ^ vpn) >> log2Ceil(nSectors)) === 0.U) && (tag_v === virtual) /** returns hit signal */ def hit(vpn: UInt, virtual: Bool): Bool = { if (superpage && usingVM) { @@ -199,7 +198,7 @@ class TLBEntry(val nSectors: Int, val superpage: Boolean, val superpageOnly: Boo def insert(vpn: UInt, virtual: Bool, level: UInt, entry: TLBEntryData): Unit = { this.tag_vpn := vpn this.tag_v := virtual - this.level := level.extract(log2Ceil(pgLevels - superpageOnly.toInt)-1, 0) + this.level := level(log2Ceil(pgLevels - superpageOnly.B.litValue.toInt) - 1, 0) val idx = sectorIdx(vpn) valid(idx) := true.B @@ -302,18 +301,50 @@ case class TLBConfig( * @param instruction true for ITLB, false for DTLB * @param lgMaxSize @todo seems granularity * @param cfg [[TLBConfig]] - * @param edge collect SoC metadata. */ -class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: TLEdgeOut, p: Parameters) extends CoreModule()(p) { +class TLB( + instruction: Boolean, + lgMaxSize: Int, + cfg: TLBConfig, + memSlaves: Seq[MemSlaveParameters], + pmpGranularity: Int, + nPMPs: Int, + pgLevels: Int, + minPgLevels: Int, + pgLevelBits: Int, + pgIdxBits: Int, + vpnBits: Int, + ppnBits: Int, + vaddrBits: Int, + vaddrBitsExtended: Int, + paddrBits: Int, + maxPAddrBits: Int, + hypervisorExtraAddrBits: Int, + asIdBits: Int, + xLen: Int, + cacheBlockBytes: Int, + customCSRsParam: CustomCSRs, + debugModuleAddress: Option[AddressSet], + memoryCacheable: Boolean, + memoryHomogenous: Boolean, + usingHypervisor: Boolean, + usingVM: Boolean, + usingAtomics: Boolean, + usingAtomicsInCache: Boolean, + usingAtomicsOnlyForIO: Boolean, + usingDataScratchpad: Boolean) extends Module { val io = IO(new Bundle { /** request from Core */ - val req = Flipped(Decoupled(new TLBReq(lgMaxSize))) + val req = Flipped(Decoupled(new TLBReq(lgMaxSize, vaddrBitsExtended))) /** response to Core */ - val resp = Output(new TLBResp()) + val resp = Output(new TLBResp(paddrBits, vaddrBitsExtended)) /** SFence Input */ - val sfence = Flipped(Valid(new SFenceReq)) + val sfence = Flipped(Valid((new SFenceReq(vaddrBits, asIdBits)))) /** IO to PTW */ - val ptw = new TLBPTWIO + val ptw = new TLBPTWIO( + xLen, vpnBits, pgLevels, minPgLevels, pgLevelBits, maxPAddrBits, + pgIdxBits, vaddrBits, paddrBits, pmpGranularity, nPMPs, customCSRsParam + ) /** suppress a TLB refill, one cycle after a miss */ val kill = Input(Bool()) }) @@ -323,14 +354,14 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T /** index for sectored_Entry */ val memIdx = vpn.extract(cfg.nSectors.log2 + cfg.nSets.log2 - 1, cfg.nSectors.log2) /** TLB Entry */ - val sectored_entries = Reg(Vec(cfg.nSets, Vec(cfg.nWays / cfg.nSectors, new TLBEntry(cfg.nSectors, false, false)))) + val sectored_entries = Reg(Vec(cfg.nSets, Vec(cfg.nWays / cfg.nSectors, new TLBEntry(cfg.nSectors, false, false, pgLevels, pgLevelBits, vpnBits, ppnBits, hypervisorExtraAddrBits, usingVM)))) /** Superpage Entry */ - val superpage_entries = Reg(Vec(cfg.nSuperpageEntries, new TLBEntry(1, true, true))) + val superpage_entries = Reg(Vec(cfg.nSuperpageEntries, new TLBEntry(1, true, true, pgLevels, pgLevelBits, vpnBits, ppnBits, hypervisorExtraAddrBits, usingVM))) /** Special Entry * * If PMP granularity is less than page size, thus need additional "special" entry manage PMP. */ - val special_entry = (!pageGranularityPMPs).option(Reg(new TLBEntry(1, true, false))) + val special_entry = Option.when(!pageGranularityPMPs)(Reg(new TLBEntry(1, true, false, pgLevels, pgLevelBits, vpnBits, ppnBits, hypervisorExtraAddrBits, usingVM))) def ordinary_entries = sectored_entries(memIdx) ++ superpage_entries def all_entries = ordinary_entries ++ special_entry def all_real_entries = sectored_entries.flatten ++ superpage_entries ++ special_entry @@ -394,7 +425,7 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T /** refill signal */ val do_refill = usingVM.B && io.ptw.resp.valid /** sfence invalidate refill */ - val invalidate_refill = state.isOneOf(s_request /* don't care */, s_wait_invalidate) || io.sfence.valid + val invalidate_refill = Seq(s_request /* don't care */, s_wait_invalidate).map(state === _).reduce(_ || _) || io.sfence.valid // PMP val mpu_ppn = Mux(do_refill, refill_ppn, Mux(vm_enabled && special_entry.nonEmpty.B, special_entry.map(e => e.ppn(vpn, e.getData(vpn))).getOrElse(0.U), io.req.bits.vaddr >> pgIdxBits)) @@ -407,15 +438,12 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T pmp.io.prv := mpu_priv // PMA // check exist a slave can consume this address. - val legal_address = edge.manager.findSafe(mpu_physaddr).reduce(_||_) - // check utility to help check SoC property. - def fastCheck(member: TLManagerParameters => Boolean) = - legal_address && edge.manager.fastProperty(mpu_physaddr, member, (b:Boolean) => b.B) - // todo: using DataScratchpad doesn't support cacheable. - val cacheable = fastCheck(_.supportsAcquireB) && (instruction || !usingDataScratchpad).B - val homogeneous = TLBPageLookup(edge.manager.managers, xLen, p(CacheBlockBytes), BigInt(1) << pgIdxBits)(mpu_physaddr).homogeneous + val legal_address = Memory.findSafe(mpu_physaddr, memSlaves).reduce(_ || _) + // check utility to help check SoC property + def fastCheck(member: MemSlaveParameters => Boolean) = + legal_address && Memory.fastProperty(mpu_physaddr, member, (b:Boolean) => b.B, memSlaves) // In M mode, if access DM address(debug module program buffer) - val deny_access_to_debug = mpu_priv <= PRV.M.U && p(DebugModuleKey).map(dmp => dmp.address.contains(mpu_physaddr)).getOrElse(false.B) + val deny_access_to_debug = mpu_priv <= PRV.M.U && debugModuleAddress.map(_.contains(mpu_physaddr)).getOrElse(false.B) val prot_r = fastCheck(_.supportsGet) && !deny_access_to_debug && pmp.io.r val prot_w = fastCheck(_.supportsPutFull) && !deny_access_to_debug && pmp.io.w val prot_pp = fastCheck(_.supportsPutPartial) @@ -436,9 +464,9 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T when (do_refill) { val pte = io.ptw.resp.bits.pte val refill_v = r_vstage1_en || r_stage2_en - val newEntry = Wire(new TLBEntryData) + val newEntry = Wire(new TLBEntryData(ppnBits)) newEntry.ppn := pte.ppn - newEntry.c := cacheable + newEntry.c := memoryCacheable.B newEntry.u := pte.u newEntry.g := pte.g && pte.v newEntry.ae_ptw := io.ptw.resp.bits.ae_ptw @@ -470,7 +498,7 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T } // refill sectored_hit }.otherwise { - val r_memIdx = r_refill_tag.extract(cfg.nSectors.log2 + cfg.nSets.log2 - 1, cfg.nSectors.log2) + val r_memIdx = r_refill_tag(log2Ceil(cfg.nSectors) + log2Ceil(cfg.nSets) - 1, log2Ceil(cfg.nSectors)) val waddr = Mux(r_sectored_hit.valid, r_sectored_hit.bits, r_sectored_repl_addr) for ((e, i) <- sectored_entries(r_memIdx).zipWithIndex) when (waddr === i.U) { when (!r_sectored_hit.valid) { e.invalidate() } @@ -523,7 +551,7 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T // put effect val eff_array = Cat(Fill(nPhysicalEntries, prot_eff), normal_entries.map(_.eff).asUInt) // cacheable - val c_array = Cat(Fill(nPhysicalEntries, cacheable), normal_entries.map(_.c).asUInt) + val c_array = Cat(Fill(nPhysicalEntries, memoryCacheable.B), normal_entries.map(_.c).asUInt) // put partial val ppp_array = Cat(Fill(nPhysicalEntries, prot_pp), normal_entries.map(_.ppp).asUInt) // atomic arithmetic @@ -533,7 +561,7 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T val ppp_array_if_cached = ppp_array | c_array val paa_array_if_cached = paa_array | (if(usingAtomicsInCache) c_array else 0.U) val pal_array_if_cached = pal_array | (if(usingAtomicsInCache) c_array else 0.U) - val prefetchable_array = Cat((cacheable && homogeneous) << (nPhysicalEntries-1), normal_entries.map(_.c).asUInt) + val prefetchable_array = Cat((memoryCacheable && memoryHomogenous).B << (nPhysicalEntries-1), normal_entries.map(_.c).asUInt) // vaddr misaligned: vaddr[1:0]=b00 val misaligned = (io.req.bits.vaddr & (UIntToOH(io.req.bits.size) - 1.U)).orR @@ -543,11 +571,12 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T val signed = !guestPA val nPgLevelChoices = pgLevels - minPgLevels + 1 val minVAddrBits = pgIdxBits + minPgLevels * pgLevelBits + extraBits - (for (i <- 0 until nPgLevelChoices) yield { - val mask = ((BigInt(1) << vaddrBitsExtended) - (BigInt(1) << (minVAddrBits + i * pgLevelBits - signed.toInt))).U - val maskedVAddr = io.req.bits.vaddr & mask - additionalPgLevels === i.U && !(maskedVAddr === 0.U || signed.B && maskedVAddr === mask) - }).orR + VecInit(Seq.range(0, nPgLevelChoices).map { + i => + val mask = ((BigInt(1) << vaddrBitsExtended) - (BigInt(1) << (minVAddrBits + i * pgLevelBits - signed.B.litValue.toInt))).U + val maskedVAddr = io.req.bits.vaddr & mask + additionalPgLevels === i.U && !(maskedVAddr === 0.U || signed.B && maskedVAddr === mask) + }).asUInt.orR } val bad_gpa = if (!usingHypervisor) false.B @@ -556,15 +585,15 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T if (!usingVM || (minPgLevels == pgLevels && vaddrBits == vaddrBitsExtended)) false.B else vm_enabled && stage1_en && badVA(false) - val cmd_lrsc = usingAtomics.B && io.req.bits.cmd.isOneOf(M_XLR, M_XSC) - val cmd_amo_logical = usingAtomics.B && isAMOLogical(io.req.bits.cmd) - val cmd_amo_arithmetic = usingAtomics.B && isAMOArithmetic(io.req.bits.cmd) - val cmd_put_partial = io.req.bits.cmd === M_PWR - val cmd_read = isRead(io.req.bits.cmd) - val cmd_readx = usingHypervisor.B && io.req.bits.cmd === M_HLVX - val cmd_write = isWrite(io.req.bits.cmd) + val cmd_lrsc = usingAtomics.B && VecInit(Seq(MemoryOpConstants.M_XLR, MemoryOpConstants.M_XSC).map(io.req.bits.cmd === _)).asUInt.orR + val cmd_amo_logical = usingAtomics.B && MemoryOpConstants.isAMOLogical(io.req.bits.cmd) + val cmd_amo_arithmetic = usingAtomics.B && MemoryOpConstants.isAMOArithmetic(io.req.bits.cmd) + val cmd_put_partial = io.req.bits.cmd === MemoryOpConstants.M_PWR + val cmd_read = MemoryOpConstants.isRead(io.req.bits.cmd) + val cmd_readx = usingHypervisor.B && io.req.bits.cmd === MemoryOpConstants.M_HLVX + val cmd_write = MemoryOpConstants.isWrite(io.req.bits.cmd) val cmd_write_perms = cmd_write || - io.req.bits.cmd.isOneOf(M_FLUSH_ALL, M_WOK) // not a write, but needs write permissions + VecInit(Seq(MemoryOpConstants.M_FLUSH_ALL, MemoryOpConstants.M_WOK).map(io.req.bits.cmd === _)).asUInt.orR // not a write, but needs write permissions val lrscAllowed = Mux((usingDataScratchpad || usingAtomicsOnlyForIO).B, 0.U, c_array) val ae_array = @@ -606,7 +635,7 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T when (io.req.valid && vm_enabled) { // replace when (sector_hits.orR) { sectored_plru.access(memIdx, OHToUInt(sector_hits)) } - when (superpage_hits.orR) { superpage_plru.access(OHToUInt(superpage_hits)) } + when (VecInit(superpage_hits).asUInt.orR) { superpage_plru.access(OHToUInt(superpage_hits)) } } // Superpages create the possibility that two entries in the TLB may match. @@ -636,7 +665,7 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T io.resp.ma.inst := false.B // this is up to the pipeline to figure out io.resp.cacheable := (c_array & hits).orR io.resp.must_alloc := (must_alloc_array & hits).orR - io.resp.prefetchable := (prefetchable_array & hits).orR && edge.manager.managers.forall(m => !m.supportsAcquireB || m.supportsHint).B + io.resp.prefetchable := (prefetchable_array & hits).orR && memSlaves.forall(m => !m.supportsAcquireB || m.supportsHint).B io.resp.miss := do_refill || vsatp_mode_mismatch || tlb_miss || multipleHits io.resp.paddr := Cat(ppn, io.req.bits.vaddr(pgIdxBits-1, 0)) io.resp.gpa_is_pte := vstage1_en && r_gpa_is_pte @@ -673,7 +702,7 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T r_sectored_repl_addr := replacementEntry(sectored_entries(memIdx), sectored_plru.way(memIdx)) r_sectored_hit.valid := sector_hits.orR r_sectored_hit.bits := OHToUInt(sector_hits) - r_superpage_hit.valid := superpage_hits.orR + r_superpage_hit.valid := VecInit(superpage_hits).asUInt.orR r_superpage_hit.bits := OHToUInt(superpage_hits) } // Handle SFENCE.VMA when send request to PTW. @@ -731,7 +760,7 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T } def ccover(cond: Bool, label: String, desc: String)(implicit sourceInfo: SourceInfo) = - property.cover(cond, s"${if (instruction) "I" else "D"}TLB_$label", "MemorySystem;;" + desc) + cover(cond, s"${if (instruction) "I" else "D"}TLB_$label MemorySystem;; $desc") /** Decides which entry to be replaced * * If there is a invalid entry, replace it with priorityencoder; @@ -740,7 +769,7 @@ class TLB(instruction: Boolean, lgMaxSize: Int, cfg: TLBConfig)(implicit edge: T * @return mask for TLBEntry replacement */ def replacementEntry(set: Seq[TLBEntry], alt: UInt) = { - val valids = set.map(_.valid.orR).asUInt + val valids = VecInit(set.map(_.valid.asUInt.orR)).asUInt Mux(valids.andR, alt, PriorityEncoder(~valids)) } } diff --git a/diplomatic/src/rocket/TLBPermissions.scala b/rocket/src/TLBPermissions.scala similarity index 56% rename from diplomatic/src/rocket/TLBPermissions.scala rename to rocket/src/TLBPermissions.scala index 26c7c055e..706304f40 100644 --- a/diplomatic/src/rocket/TLBPermissions.scala +++ b/rocket/src/TLBPermissions.scala @@ -3,10 +3,9 @@ package org.chipsalliance.rocket import chisel3._ -import chisel3.util.isPow2 +import chisel3.util._ -import freechips.rocketchip.diplomacy._ -import freechips.rocketchip.tilelink._ +import org.chipsalliance.rocket.util._ case class TLBPermissions( homogeneous: Bool, // if false, the below are undefined @@ -30,16 +29,16 @@ object TLBPageLookup val useful = r || w || x || c || a || l } - private def groupRegions(managers: Seq[TLManagerParameters]): Map[TLBFixedPermissions, Seq[AddressSet]] = { - val permissions = managers.map { m => - (m.address, TLBFixedPermissions( - e = Seq(RegionType.PUT_EFFECTS, RegionType.GET_EFFECTS) contains m.regionType, - r = m.supportsGet || m.supportsAcquireB, // if cached, never uses Get - w = m.supportsPutFull || m.supportsAcquireT, // if cached, never uses Put - x = m.executable, - c = m.supportsAcquireB, - a = m.supportsArithmetic, - l = m.supportsLogical)) + private def groupRegions(memSlaves: Seq[MemSlaveParameters]): Map[TLBFixedPermissions, Seq[AddressSet]] = { // TODO: Decoupled from Tilelink + val permissions = memSlaves.map { p => + (p.address, TLBFixedPermissions( + e = Seq(RegionType.PUT_EFFECTS, RegionType.GET_EFFECTS) contains p.regionType, + r = p.supportsGet || p.supportsAcquireB, // if cached, never uses Get + w = p.supportsPutFull || p.supportsAcquireT, // if cached, never uses Put + x = p.executable, + c = p.supportsAcquireB, + a = p.supportsArithmetic, + l = p.supportsLogical)) } permissions @@ -50,8 +49,9 @@ object TLBPageLookup .toMap } + // TODO // Unmapped memory is considered to be inhomogeneous - def apply(managers: Seq[TLManagerParameters], xLen: Int, cacheBlockBytes: Int, pageSize: BigInt): UInt => TLBPermissions = { + def apply(memSlaves: Seq[MemSlaveParameters], xLen: Int, cacheBlockBytes: Int, pageSize: BigInt): UInt => TLBPermissions = { require (isPow2(xLen) && xLen >= 8) require (isPow2(cacheBlockBytes) && cacheBlockBytes >= xLen/8) require (isPow2(pageSize) && pageSize >= cacheBlockBytes) @@ -60,18 +60,18 @@ object TLBPageLookup val allSizes = TransferSizes(1, cacheBlockBytes) val amoSizes = TransferSizes(4, xLen/8) - val permissions = managers.foreach { m => - require (!m.supportsGet || m.supportsGet .contains(allSizes), s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsGet} Get, but must support ${allSizes}") - require (!m.supportsPutFull || m.supportsPutFull .contains(allSizes), s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsPutFull} PutFull, but must support ${allSizes}") - require (!m.supportsPutPartial || m.supportsPutPartial.contains(allSizes), s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsPutPartial} PutPartial, but must support ${allSizes}") - require (!m.supportsAcquireB || m.supportsAcquireB .contains(xferSizes), s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsAcquireB} AcquireB, but must support ${xferSizes}") - require (!m.supportsAcquireT || m.supportsAcquireT .contains(xferSizes), s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsAcquireT} AcquireT, but must support ${xferSizes}") - require (!m.supportsLogical || m.supportsLogical .contains(amoSizes), s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsLogical} Logical, but must support ${amoSizes}") - require (!m.supportsArithmetic || m.supportsArithmetic.contains(amoSizes), s"Memory region '${m.name}' at ${m.address} only supports ${m.supportsArithmetic} Arithmetic, but must support ${amoSizes}") - require (!(m.supportsAcquireB && m.supportsPutFull && !m.supportsAcquireT), s"Memory region '${m.name}' supports AcquireB (cached read) and PutFull (un-cached write) but not AcquireT (cached write)") + val permissions = memSlaves.foreach { p => + require (!p.supportsGet || p.supportsGet .contains(allSizes), s"Memory region '${p.name}' at ${p.address} only supports ${p.supportsGet} Get, but must support ${allSizes}") + require (!p.supportsPutFull || p.supportsPutFull .contains(allSizes), s"Memory region '${p.name}' at ${p.address} only supports ${p.supportsPutFull} PutFull, but must support ${allSizes}") + require (!p.supportsPutPartial || p.supportsPutPartial.contains(allSizes), s"Memory region '${p.name}' at ${p.address} only supports ${p.supportsPutPartial} PutPartial, but must support ${allSizes}") + require (!p.supportsAcquireB || p.supportsAcquireB .contains(xferSizes), s"Memory region '${p.name}' at ${p.address} only supports ${p.supportsAcquireB} AcquireB, but must support ${xferSizes}") + require (!p.supportsAcquireT || p.supportsAcquireT .contains(xferSizes), s"Memory region '${p.name}' at ${p.address} only supports ${p.supportsAcquireT} AcquireT, but must support ${xferSizes}") + require (!p.supportsLogical || p.supportsLogical .contains(amoSizes), s"Memory region '${p.name}' at ${p.address} only supports ${p.supportsLogical} Logical, but must support ${amoSizes}") + require (!p.supportsArithmetic || p.supportsArithmetic.contains(amoSizes), s"Memory region '${p.name}' at ${p.address} only supports ${p.supportsArithmetic} Arithmetic, but must support ${amoSizes}") + require (!(p.supportsAcquireB && p.supportsPutFull && !p.supportsAcquireT), s"Memory region '${p.name}' supports AcquireB (cached read) and PutFull (un-cached write) but not AcquireT (cached write)") } - val grouped = groupRegions(managers) + val grouped = groupRegions(memSlaves) .mapValues(_.filter(_.alignment >= pageSize)) // discard any region that's not big enough def lowCostProperty(prop: TLBFixedPermissions => Boolean): UInt => Bool = { @@ -108,7 +108,7 @@ object TLBPageLookup } // Are all pageSize intervals of mapped regions homogeneous? - def homogeneous(managers: Seq[TLManagerParameters], pageSize: BigInt): Boolean = { - groupRegions(managers).values.forall(_.forall(_.alignment >= pageSize)) + def homogeneous(memSlaves: Seq[MemSlaveParameters], pageSize: BigInt): Boolean = { + groupRegions(memSlaves).values.forall(_.forall(_.alignment >= pageSize)) } } diff --git a/rocket/src/util/AddressDecoder.scala b/rocket/src/util/AddressDecoder.scala new file mode 100644 index 000000000..8a84c8873 --- /dev/null +++ b/rocket/src/util/AddressDecoder.scala @@ -0,0 +1,134 @@ +// See LICENSE.SiFive for license details. + +package org.chipsalliance.rocket.util + +import Chisel.log2Ceil + +object AddressDecoder +{ + type Port = Seq[AddressSet] + type Ports = Seq[Port] + type Partition = Ports + type Partitions = Seq[Partition] + + val addressOrder = Ordering.ordered[AddressSet] + val portOrder = Ordering.Iterable(addressOrder) + val partitionOrder = Ordering.Iterable(portOrder) + + // Find the minimum subset of bits needed to disambiguate port addresses. + // ie: inspecting only the bits in the output, you can look at an address + // and decide to which port (outer Seq) the address belongs. + def apply(ports: Ports, givenBits: BigInt = BigInt(0)): BigInt = { + val nonEmptyPorts = ports.filter(_.nonEmpty) + if (nonEmptyPorts.size <= 1) { + givenBits + } else { + // Verify the user did not give us an impossible problem + nonEmptyPorts.combinations(2).foreach { case Seq(x, y) => + x.foreach { a => y.foreach { b => + require (!a.overlaps(b), s"Ports cannot overlap: $a $b") + } } + } + + val maxBits = log2Ceil(1 + nonEmptyPorts.map(_.map(_.base).max).max) + val (bitsToTry, bitsToTake) = (0 until maxBits).map(BigInt(1) << _).partition(b => (givenBits & b) == 0) + val partitions = Seq(nonEmptyPorts.map(_.sorted).sorted(portOrder)) + val givenPartitions = bitsToTake.foldLeft(partitions) { (p, b) => partitionPartitions(p, b) } + val selected = recurse(givenPartitions, bitsToTry.reverse.toSeq) + val output = selected.reduceLeft(_ | _) | givenBits + + // Modify the AddressSets to allow the new wider match functions + val widePorts = nonEmptyPorts.map { _.map { _.widen(~output) } } + // Verify that it remains possible to disambiguate all ports + widePorts.combinations(2).foreach { case Seq(x, y) => + x.foreach { a => y.foreach { b => + require (!a.overlaps(b), s"Ports cannot overlap: $a $b") + } } + } + + output + } + } + + // A simpler version that works for a Seq[Int] + def apply(keys: Seq[Int]): Int = { + val ports = keys.map(b => Seq(AddressSet(b, 0))) + apply(ports).toInt + } + + // The algorithm has a set of partitions, discriminated by the selected bits. + // Each partion has a set of ports, listing all addresses that lead to that port. + // Seq[Seq[Seq[AddressSet]]] + // ^^^^^^^^^^^^^^^ set of addresses that are routed out this port + // ^^^ the list of ports + // ^^^ cases already distinguished by the selected bits thus far + // + // Solving this problem is NP-hard, so we use a simple greedy heuristic: + // pick the bit which minimizes the number of ports in each partition + // as a secondary goal, reduce the number of AddressSets within a partition + + def bitScore(partitions: Partitions): Seq[Int] = { + val maxPortsPerPartition = partitions.map(_.size).max + val maxSetsPerPartition = partitions.map(_.map(_.size).sum).max + val sumSquarePortsPerPartition = partitions.map(p => p.size * p.size).sum + val sumSquareSetsPerPartition = partitions.map(_.map(p => p.size * p.size).sum).max + Seq(maxPortsPerPartition, maxSetsPerPartition, sumSquarePortsPerPartition, sumSquareSetsPerPartition) + } + + def partitionPort(port: Port, bit: BigInt): (Port, Port) = { + val addr_a = AddressSet(0, ~bit) + val addr_b = AddressSet(bit, ~bit) + // The addresses were sorted, so the filtered addresses are still sorted + val subset_a = port.filter(_.overlaps(addr_a)) + val subset_b = port.filter(_.overlaps(addr_b)) + (subset_a, subset_b) + } + + def partitionPorts(ports: Ports, bit: BigInt): (Ports, Ports) = { + val partitioned_ports = ports.map(p => partitionPort(p, bit)) + // because partitionPort dropped AddresSets, the ports might no longer be sorted + val case_a_ports = partitioned_ports.map(_._1).filter(!_.isEmpty).sorted(portOrder) + val case_b_ports = partitioned_ports.map(_._2).filter(!_.isEmpty).sorted(portOrder) + (case_a_ports, case_b_ports) + } + + def partitionPartitions(partitions: Partitions, bit: BigInt): Partitions = { + val partitioned_partitions = partitions.map(p => partitionPorts(p, bit)) + val case_a_partitions = partitioned_partitions.map(_._1).filter(!_.isEmpty) + val case_b_partitions = partitioned_partitions.map(_._2).filter(!_.isEmpty) + val new_partitions = (case_a_partitions ++ case_b_partitions).sorted(partitionOrder) + // Prevent combinational memory explosion; if two partitions are equal, keep only one + // Note: AddressSets in a port are sorted, and ports in a partition are sorted. + // This makes it easy to structurally compare two partitions for equality + val keep = (new_partitions.init zip new_partitions.tail) filter { case (a,b) => partitionOrder.compare(a,b) != 0 } map { _._2 } + new_partitions.head +: keep + } + + // requirement: ports have sorted addresses and are sorted lexicographically + val debug = false + def recurse(partitions: Partitions, bits: Seq[BigInt]): Seq[BigInt] = { + if (partitions.map(_.size <= 1).reduce(_ && _)) Seq() else { + if (debug) { + println("Partitioning:") + partitions.foreach { partition => + println(" Partition:") + partition.foreach { port => + print(" ") + port.foreach { a => print(s" ${a}") } + println("") + } + } + } + val candidates = bits.map { bit => + val result = partitionPartitions(partitions, bit) + val score = bitScore(result) + if (debug) + println(" For bit %x, %s".format(bit, score.toString)) + (score, bit, result) + } + val (bestScore, bestBit, bestPartitions) = candidates.min(Ordering.by[(Seq[Int], BigInt, Partitions), Iterable[Int]](_._1.toIterable)) + if (debug) println("=> Selected bit 0x%x".format(bestBit)) + bestBit +: recurse(bestPartitions, bits.filter(_ != bestBit)) + } + } +} diff --git a/rocket/src/util/DescribedSRAM.scala b/rocket/src/util/DescribedSRAM.scala new file mode 100644 index 000000000..535781a7f --- /dev/null +++ b/rocket/src/util/DescribedSRAM.scala @@ -0,0 +1,30 @@ +// See LICENSE.Berkeley for license details. +// See LICENSE.SiFive for license details. + +package org.chipsalliance.rocket.util + +import chisel3.{Data, SyncReadMem, Vec} +import chisel3.util.log2Ceil + +object DescribedSRAM { + def apply[T <: Data]( + name: String, + desc: String, + size: BigInt, // depth + data: T + ): SyncReadMem[T] = { + + val mem = SyncReadMem(size, data) + + mem.suggestName(name) + + val granWidth = data match { + case v: Vec[_] => v.head.getWidth + case d => d.getWidth + } + + val uid = 0 + + mem + } +} \ No newline at end of file diff --git a/rocket/src/util/ECC.scala b/rocket/src/util/ECC.scala new file mode 100644 index 000000000..50374558a --- /dev/null +++ b/rocket/src/util/ECC.scala @@ -0,0 +1,233 @@ +// See LICENSE.Berkeley for license details. + +package org.chipsalliance.rocket.util + +import chisel3._ +import chisel3.util._ +import chisel3.util.random.LFSR + +abstract class Decoding +{ + def uncorrected: UInt + def corrected: UInt + def correctable: Bool + def uncorrectable: Bool // If true, correctable should be ignored + def error = correctable || uncorrectable +} + +abstract class Code +{ + def canDetect: Boolean + def canCorrect: Boolean + + def width(w0: Int): Int + + /** Takes the unencoded width and returns a list of indices indicating which + * bits of the encoded value will be used for ecc + */ + def eccIndices(width: Int): Seq[Int] + + /** Encode x to a codeword suitable for decode. + * If poison is true, the decoded value will report uncorrectable + * error despite uncorrected == corrected == x. + */ + def encode(x: UInt, poison: Bool = false.B): UInt + def decode(x: UInt): Decoding + + /** Copy the bits in x to the right bit positions in an encoded word, + * so that x === decode(swizzle(x)).uncorrected; but don't generate + * the other code bits, so decode(swizzle(x)).error might be true. + * For codes for which this operation is not trivial, throw an + * UnsupportedOperationException. */ + def swizzle(x: UInt): UInt +} + +class IdentityCode extends Code +{ + def canDetect = false + def canCorrect = false + + def width(w0: Int) = w0 + def eccIndices(width: Int) = Seq.empty[Int] + def encode(x: UInt, poison: Bool = false.B) = { + require (poison.isLit && poison.litValue == 0, "IdentityCode can not be poisoned") + x + } + def swizzle(x: UInt) = x + def decode(y: UInt) = new Decoding { + def uncorrected = y + def corrected = y + def correctable = false.B + def uncorrectable = false.B + } +} + +class ParityCode extends Code +{ + def canDetect = true + def canCorrect = false + + def width(w0: Int) = w0+1 + def eccIndices(w0: Int) = Seq(w0) + def encode(x: UInt, poison: Bool = false.B) = Cat(x.xorR ^ poison, x) + def swizzle(x: UInt) = Cat(false.B, x) + def decode(y: UInt) = new Decoding { + val uncorrected = y(y.getWidth-2,0) + val corrected = uncorrected + val correctable = false.B + val uncorrectable = y.xorR + } +} + +class SECCode extends Code +{ + def canDetect = true + def canCorrect = true + + // SEC codes may or may not be poisonous depending on the length + // If the code is perfect, every non-codeword is correctable + def poisonous(n: Int) = !isPow2(n+1) + + def width(k: Int) = { + val m = log2Floor(k) + 1 + k + m + (if((1 << m) < m+k+1) 1 else 0) + } + + def eccIndices(w0: Int) = { + (0 until width(w0)).collect { + case i if i >= w0 => i + } + } + + def swizzle(x: UInt) = { + val k = x.getWidth + val n = width(k) + Cat(0.U((n-k).W), x) + } + + // An (n=16, k=11) Hamming code is naturally encoded as: + // PPxPxxxPxxxxxxxP where P are parity bits and x are data + // Indexes typically start at 1, because then the P are on powers of two + // In systematic coding, you put all the data in the front: + // xxxxxxxxxxxPPPPP + // Indexes typically start at 0, because Computer Science + // For sanity when reading SRAMs, you want systematic form. + + private def impl(n: Int, k: Int) = { + require (n >= 3 && k >= 1 && !isPow2(n)) + val hamm2sys = IndexedSeq.tabulate(n+1) { i => + if (i == 0) { + n /* undefined */ + } else if (isPow2(i)) { + k + log2Ceil(i) + } else { + i - 1 - log2Ceil(i) + } + } + val sys2hamm = hamm2sys.zipWithIndex.sortBy(_._1).map(_._2).toIndexedSeq + def syndrome(j: Int) = { + val bit = 1 << j + ("b" + Seq.tabulate(n) { i => + if ((sys2hamm(i) & bit) != 0) "1" else "0" + }.reverse.mkString).U + } + (hamm2sys, sys2hamm, syndrome _) + } + + def encode(x: UInt, poison: Bool = false.B) = { + val k = x.getWidth + val n = width(k) + val (_, _, syndrome) = impl(n, k) + + require ((poison.isLit && poison.litValue == 0) || poisonous(n), s"SEC code of length ${n} cannot be poisoned") + + /* By setting the entire syndrome on poison, the corrected bit falls off the end of the code */ + val syndromeUInt = VecInit.tabulate(n-k) { j => (syndrome(j)(k-1, 0) & x).xorR ^ poison }.asUInt + Cat(syndromeUInt, x) + } + + def decode(y: UInt) = new Decoding { + val n = y.getWidth + val k = n - log2Ceil(n) + val (_, sys2hamm, syndrome) = impl(n, k) + + val syndromeUInt = VecInit.tabulate(n-k) { j => (syndrome(j) & y).xorR }.asUInt + + val hammBadBitOH = UIntToOH(syndromeUInt, n+1) + val sysBadBitOH = VecInit.tabulate(k) { i => hammBadBitOH(sys2hamm(i)) }.asUInt + + val uncorrected = y(k-1, 0) + val corrected = uncorrected ^ sysBadBitOH + val correctable = syndromeUInt.orR + val uncorrectable = if (poisonous(n)) { syndromeUInt > n.U } else { false.B } + } +} + +class SECDEDCode extends Code +{ + def canDetect = true + def canCorrect = true + + private val sec = new SECCode + private val par = new ParityCode + + def width(k: Int) = sec.width(k)+1 + def eccIndices(w0: Int) = { + (0 until width(w0)).collect { + case i if i >= w0 => i + } + } + def encode(x: UInt, poison: Bool = false.B) = { + // toggling two bits ensures the error is uncorrectable + // to ensure corrected == uncorrected, we pick one redundant + // bit from SEC (the highest); correcting it does not affect + // corrected == uncorrected. the second toggled bit is the + // parity bit, which also does not appear in the decoding + val toggle_lo = Cat(poison.asUInt, poison.asUInt) + val toggle_hi = (toggle_lo << (sec.width(x.getWidth)-1)).asUInt + par.encode(sec.encode(x)) ^ toggle_hi + } + def swizzle(x: UInt) = par.swizzle(sec.swizzle(x)) + def decode(x: UInt) = new Decoding { + val secdec = sec.decode(x(x.getWidth-2,0)) + val pardec = par.decode(x) + + val uncorrected = secdec.uncorrected + val corrected = secdec.corrected + val correctable = pardec.uncorrectable + val uncorrectable = !pardec.uncorrectable && secdec.correctable + } +} + +object ErrGen +{ + // generate a 1-bit error with approximate probability 2^-f + def apply(width: Int, f: Int): UInt = { + require(width > 0 && f >= 0 && log2Up(width) + f <= 16) + UIntToOH(LFSR(16)(log2Up(width)+f-1,0))(width-1,0) + } + def apply(x: UInt, f: Int): UInt = x ^ apply(x.getWidth, f) +} + +trait CanHaveErrors extends Bundle { + val correctable: Option[ValidIO[UInt]] + val uncorrectable: Option[ValidIO[UInt]] +} + +case class ECCParams( + bytes: Int = 1, + code: Code = new IdentityCode, + notifyErrors: Boolean = false, +) + +object Code { + def fromString(s: Option[String]): Code = fromString(s.getOrElse("none")) + def fromString(s: String): Code = s.toLowerCase match { + case "none" => new IdentityCode + case "identity" => new IdentityCode + case "parity" => new ParityCode + case "sec" => new SECCode + case "secded" => new SECDEDCode + case _ => throw new IllegalArgumentException("Unknown ECC type") + } +} \ No newline at end of file diff --git a/rocket/src/util/Memory.scala b/rocket/src/util/Memory.scala new file mode 100644 index 000000000..a5bcf7822 --- /dev/null +++ b/rocket/src/util/Memory.scala @@ -0,0 +1,244 @@ +// See LICENSE.SiFive for license details. + +package org.chipsalliance.rocket.util + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental._ + +import org.chipsalliance.rocket._ + +object Memory { + // The safe version will check the entire address + def findSafe(address: UInt, slaves: Seq[MemSlaveParameters]) = VecInit(slaves.map(_.address.map(_.contains(address)).reduce(_ || _))) + + // Compute the simplest AddressSets that decide a key + def fastPropertyGroup[K](p: MemSlaveParameters => K, slaves: Seq[MemSlaveParameters]): Seq[(K, Seq[AddressSet])] = { + val groups = groupByIntoSeq(slaves.map(m => (p(m), m.address)))( _._1).map { case (k, vs) => + k -> vs.flatMap(_._2) + } + val reductionMask = AddressDecoder(groups.map(_._2)) + groups.map { case (k, seq) => k -> AddressSet.unify(seq.map(_.widen(~reductionMask)).distinct) } + } + // Select a property + def fastProperty[K, D <: Data](address: UInt, p: MemSlaveParameters => K, d: K => D, slaves: Seq[MemSlaveParameters]): D = + Mux1H(fastPropertyGroup(p, slaves).map { case (v, a) => (a.map(_.contains(address)).reduce(_||_), d(v)) }) +} + +/** Options for describing the attributes of memory regions */ +object RegionType { + // Define the 'more relaxed than' ordering + val cases = Seq(CACHED, TRACKED, UNCACHED, IDEMPOTENT, VOLATILE, PUT_EFFECTS, GET_EFFECTS) + sealed trait T extends Ordered[T] { + def compare(that: T): Int = cases.indexOf(that) compare cases.indexOf(this) + } + + case object CACHED extends T // an intermediate agent may have cached a copy of the region for you + case object TRACKED extends T // the region may have been cached by another master, but coherence is being provided + case object UNCACHED extends T // the region has not been cached yet, but should be cached when possible + case object IDEMPOTENT extends T // gets return most recently put content, but content should not be cached + case object VOLATILE extends T // content may change without a put, but puts and gets have no side effects + case object PUT_EFFECTS extends T // puts produce side effects and so must not be combined/delayed + case object GET_EFFECTS extends T // gets produce side effects and so must not be issued speculatively +} + +// An potentially empty inclusive range of 2-powers [min, max] (in bytes) +case class TransferSizes(min: Int, max: Int) +{ + def this(x: Int) = this(x, x) + + require (min <= max, s"Min transfer $min > max transfer $max") + require (min >= 0 && max >= 0, s"TransferSizes must be positive, got: ($min, $max)") + require (max == 0 || isPow2(max), s"TransferSizes must be a power of 2, got: $max") + require (min == 0 || isPow2(min), s"TransferSizes must be a power of 2, got: $min") + require (max == 0 || min != 0, s"TransferSize 0 is forbidden unless (0,0), got: ($min, $max)") + + def none = min == 0 + def contains(x: Int) = isPow2(x) && min <= x && x <= max + def containsLg(x: Int) = contains(1 << x) + def containsLg(x: UInt) = + if (none) false.B + else if (min == max) { log2Ceil(min).U === x } + else { log2Ceil(min).U <= x && x <= log2Ceil(max).U } + + def contains(x: TransferSizes) = x.none || (min <= x.min && x.max <= max) + + def intersect(x: TransferSizes) = + if (x.max < min || max < x.min) TransferSizes.none + else TransferSizes(scala.math.max(min, x.min), scala.math.min(max, x.max)) + + // Not a union, because the result may contain sizes contained by neither term + // NOT TO BE CONFUSED WITH COVERPOINTS + def mincover(x: TransferSizes) = { + if (none) { + x + } else if (x.none) { + this + } else { + TransferSizes(scala.math.min(min, x.min), scala.math.max(max, x.max)) + } + } + + override def toString() = "TransferSizes[%d, %d]".format(min, max) +} + +object TransferSizes { + def apply(x: Int) = new TransferSizes(x) + val none = new TransferSizes(0) + + def mincover(seq: Seq[TransferSizes]) = seq.foldLeft(none)(_ mincover _) + def intersect(seq: Seq[TransferSizes]) = seq.reduce(_ intersect _) + + implicit def asBool(x: TransferSizes) = !x.none +} + +// AddressSets specify the address space managed by the manager +// Base is the base address, and mask are the bits consumed by the manager +// e.g: base=0x200, mask=0xff describes a device managing 0x200-0x2ff +// e.g: base=0x1000, mask=0xf0f decribes a device managing 0x1000-0x100f, 0x1100-0x110f, ... +case class AddressSet(val bitSet: BitSet) extends Ordered[AddressSet] +{ + // TODO: This assumption might not hold true after BitSet intersection or subtraction. It is highly depended on the concrete implementation of BitSet. + require(bitSet.terms.size == 1, "The wrapped BitSet should only have one BitPat") + + val base = bitSet.terms.head.value + val mask = bitSet.terms.head.mask + + def contains(x: BigInt) = bitSet matches x.U + def contains(x: UInt) = bitSet matches x + + // turn x into an address contained in this set + def legalize(x: UInt): UInt = base.U | (mask.U & x) + + // overlap iff bitwise: both care (~mask0 & ~mask1) => both equal (base0=base1) + def overlaps(x: AddressSet) = bitSet overlap x.bitSet + // contains iff bitwise: x.mask => mask && contains(x.base) + def contains(x: AddressSet) = bitSet cover x.bitSet + + // The number of bytes to which the manager must be aligned + def alignment = ((mask + 1) & ~mask) + // Is this a contiguous memory range + def contiguous = alignment == mask+1 + + def finite = mask >= 0 + def max = { require (finite, "Max cannot be calculated on infinite mask"); base | mask } + + // Widen the match function to ignore all bits in imask + def widen(imask: BigInt) = AddressSet(base & ~imask, mask | imask) + + // Return an AddressSet that only contains the addresses both sets contain + def intersect(x: AddressSet): Option[AddressSet] = { + if (!overlaps(x)) { + None + } else { + Some(AddressSet(bitSet intersect x.bitSet)) + } + } + + def subtract(x: AddressSet): Seq[AddressSet] = { + (bitSet intersect x.bitSet).terms.toSeq.map(p => AddressSet(BitSet(p))) + } + + // AddressSets have one natural Ordering (the containment order, if contiguous) + def compare(x: AddressSet) = { + val primary = (this.base - x.base).signum // smallest address first + val secondary = (x.mask - this.mask).signum // largest mask first + if (primary != 0) primary else secondary + } + + // We always want to see things in hex + override def toString() = { + if (mask >= 0) { + "AddressSet(0x%x, 0x%x)".format(base, mask) + } else { + "AddressSet(0x%x, ~0x%x)".format(base, ~mask) + } + } + + def toRanges = { + require (finite, "Ranges cannot be calculated on infinite mask") + val size = alignment + val fragments = mask & ~(size-1) + val bits = bitIndexes(fragments) + (BigInt(0) until (BigInt(1) << bits.size)).map { i => + val off = bitIndexes(i).foldLeft(base) { case (a, b) => a.setBit(bits(b)) } + AddressSet(off, size) + } + } +} + +object AddressSet +{ + def apply(base: BigInt, mask: BigInt): AddressSet = { + // Forbid misaligned base address (and empty sets) + require ((base & mask) == 0, s"Mis-aligned AddressSets are forbidden, got: ${this.toString}") + require (base >= 0, s"AddressSet negative base is ambiguous: $base") // TL2 address widths are not fixed => negative is ambiguous + // We do allow negative mask (=> ignore all high bits) + + AddressSet(BitSet(new BitPat(base, mask, base.U.getWidth max mask.U.getWidth))) + } + + val everything = AddressSet(0, -1) + def misaligned(base: BigInt, size: BigInt, tail: Seq[AddressSet] = Seq()): Seq[AddressSet] = { + if (size == 0) tail.reverse else { + val maxBaseAlignment = base & (-base) // 0 for infinite (LSB) + val maxSizeAlignment = BigInt(1) << log2Floor(size) // MSB of size + val step = + if (maxBaseAlignment == 0 || maxBaseAlignment > maxSizeAlignment) + maxSizeAlignment else maxBaseAlignment + misaligned(base+step, size-step, AddressSet(base, step-1) +: tail) + } + } + + def unify(seq: Seq[AddressSet], bit: BigInt): Seq[AddressSet] = { + // Pair terms up by ignoring 'bit' + seq.distinct.groupBy(x => AddressSet(x.base & ~bit, x.mask)).map { case (key, seq) => + if (seq.size == 1) { + seq.head // singleton -> unaffected + } else { + AddressSet(key.base, key.mask | bit) // pair - widen mask by bit + } + }.toList + } + + def unify(seq: Seq[AddressSet]): Seq[AddressSet] = { + val bits = seq.map(_.base).foldLeft(BigInt(0))(_ | _) + AddressSet.enumerateBits(bits).foldLeft(seq) { case (acc, bit) => unify(acc, bit) }.sorted + } + + def enumerateMask(mask: BigInt): Seq[BigInt] = { + def helper(id: BigInt, tail: Seq[BigInt]): Seq[BigInt] = + if (id == mask) (id +: tail).reverse else helper(((~mask | id) + 1) & mask, id +: tail) + helper(0, Nil) + } + + def enumerateBits(mask: BigInt): Seq[BigInt] = { + def helper(x: BigInt): Seq[BigInt] = { + if (x == 0) { + Nil + } else { + val bit = x & (-x) + bit +: helper(x & ~bit) + } + } + helper(mask) + } +} + +case class MemSlaveParameters( + val address: Seq[AddressSet], + val regionType: RegionType.T = RegionType.GET_EFFECTS, + + val executable: Boolean = false, + + val supportsAcquireT: TransferSizes = TransferSizes.none, + val supportsAcquireB: TransferSizes = TransferSizes.none, + val supportsArithmetic: TransferSizes = TransferSizes.none, + val supportsLogical: TransferSizes = TransferSizes.none, + val supportsGet: TransferSizes = TransferSizes.none, + val supportsPutFull: TransferSizes = TransferSizes.none, + val supportsPutPartial: TransferSizes = TransferSizes.none, + val supportsHint: TransferSizes = TransferSizes.none, + + val name: String, +) \ No newline at end of file diff --git a/rocket/src/util/Replacement.scala b/rocket/src/util/Replacement.scala new file mode 100644 index 000000000..2d4dbb266 --- /dev/null +++ b/rocket/src/util/Replacement.scala @@ -0,0 +1,325 @@ +// See LICENSE.SiFive for license details. +// See LICENSE.Berkeley for license details. + +// TODO: Should be upstreamed to Chisel + +package org.chipsalliance.rocket.util + +import chisel3._ +import chisel3.util._ +import chisel3.util.random._ + +abstract class ReplacementPolicy { + def nBits: Int + def perSet: Boolean + def way: UInt + def miss: Unit + def hit: Unit + def access(touch_way: UInt): Unit + def access(touch_ways: Seq[Valid[UInt]]): Unit + def state_read: UInt + def get_next_state(state: UInt, touch_way: UInt): UInt + def get_next_state(state: UInt, touch_ways: Seq[Valid[UInt]]): UInt = { + touch_ways.foldLeft(state)((prev, touch_way) => Mux(touch_way.valid, get_next_state(prev, touch_way.bits), prev)) + } + def get_replace_way(state: UInt): UInt +} + +object ReplacementPolicy { + def fromString(s: String, n_ways: Int): ReplacementPolicy = s.toLowerCase match { + case "random" => new RandomReplacement(n_ways) + case "lru" => new TrueLRU(n_ways) + case "plru" => new PseudoLRU(n_ways) + case t => throw new IllegalArgumentException(s"unknown Replacement Policy type $t") + } +} + +class RandomReplacement(n_ways: Int) extends ReplacementPolicy { + private val replace = Wire(Bool()) + replace := false.B + def nBits = 16 + def perSet = false + private val lfsr = LFSR(nBits, replace) + def state_read = WireDefault(lfsr) + + def way = Random(n_ways, lfsr) + def miss = replace := true.B + def hit = {} + def access(touch_way: UInt) = {} + def access(touch_ways: Seq[Valid[UInt]]) = {} + def get_next_state(state: UInt, touch_way: UInt) = 0.U //DontCare + def get_replace_way(state: UInt) = way +} + +abstract class SeqReplacementPolicy { + def access(set: UInt): Unit + def update(valid: Bool, hit: Bool, set: UInt, way: UInt): Unit + def way: UInt +} + +abstract class SetAssocReplacementPolicy { + def access(set: UInt, touch_way: UInt): Unit + def access(sets: Seq[UInt], touch_ways: Seq[Valid[UInt]]): Unit + def way(set: UInt): UInt +} + +class SeqRandom(n_ways: Int) extends SeqReplacementPolicy { + val logic = new RandomReplacement(n_ways) + def access(set: UInt) = { } + def update(valid: Bool, hit: Bool, set: UInt, way: UInt) = { + when (valid && !hit) { logic.miss } + } + def way = logic.way +} + +class TrueLRU(n_ways: Int) extends ReplacementPolicy { + // True LRU replacement policy, using a triangular matrix to track which sets are more recently used than others. + // The matrix is packed into a single UInt (or Bits). Example 4-way (6-bits): + // [5] - 3 more recent than 2 + // [4] - 3 more recent than 1 + // [3] - 2 more recent than 1 + // [2] - 3 more recent than 0 + // [1] - 2 more recent than 0 + // [0] - 1 more recent than 0 + def nBits = (n_ways * (n_ways-1)) / 2 + def perSet = true + private val state_reg = RegInit(0.U(nBits.W)) + def state_read = WireDefault(state_reg) + + private def extractMRUVec(state: UInt): Seq[UInt] = { + // Extract per-way information about which higher-indexed ways are more recently used + val moreRecentVec = Wire(Vec(n_ways-1, UInt(n_ways.W))) + var lsb = 0 + for (i <- 0 until n_ways-1) { + moreRecentVec(i) := Cat(state(lsb+n_ways-i-2,lsb), 0.U((i+1).W)) + lsb = lsb + (n_ways - i - 1) + } + moreRecentVec + } + + def get_next_state(state: UInt, touch_way: UInt): UInt = { + val nextState = Wire(Vec(n_ways-1, UInt(n_ways.W))) + val moreRecentVec = extractMRUVec(state) // reconstruct lower triangular matrix + val wayDec = UIntToOH(touch_way, n_ways) + + // Compute next value of triangular matrix + // set the touched way as more recent than every other way + nextState.zipWithIndex.map { case (e, i) => + e := Mux(i.U === touch_way, 0.U(n_ways.W), moreRecentVec(i) | wayDec) + } + + nextState.zipWithIndex.tail.foldLeft((nextState.head.apply(n_ways-1,1),0)) { case ((pe,pi),(ce,ci)) => (Cat(ce.apply(n_ways-1,ci+1), pe), ci) }._1 + } + + def access(touch_way: UInt): Unit = { + state_reg := get_next_state(state_reg, touch_way) + } + def access(touch_ways: Seq[Valid[UInt]]): Unit = { + when (touch_ways.map(_.valid).orR) { + state_reg := get_next_state(state_reg, touch_ways) + } + for (i <- 1 until touch_ways.size) { + cover(PopCount(touch_ways.map(_.valid)) === i.U, s"LRU_UpdateCount$i; LRU Update $i simultaneous") + } + } + + def get_replace_way(state: UInt): UInt = { + val moreRecentVec = extractMRUVec(state) // reconstruct lower triangular matrix + // For each way, determine if all other ways are more recent + val mruWayDec = (0 until n_ways).map { i => + val upperMoreRecent = (if (i == n_ways-1) true.B else moreRecentVec(i).apply(n_ways-1,i+1).andR) + val lowerMoreRecent = (if (i == 0) true.B else moreRecentVec.map(e => !e(i)).reduce(_ && _)) + upperMoreRecent && lowerMoreRecent + } + OHToUInt(mruWayDec) + } + + def way = get_replace_way(state_reg) + def miss = access(way) + def hit = {} + @deprecated("replace 'replace' with 'way' from abstract class ReplacementPolicy","Rocket Chip 2020.05") + def replace: UInt = way +} + +class PseudoLRU(n_ways: Int) extends ReplacementPolicy { + // Pseudo-LRU tree algorithm: https://en.wikipedia.org/wiki/Pseudo-LRU#Tree-PLRU + // + // + // - bits storage example for 4-way PLRU binary tree: + // bit[2]: ways 3+2 older than ways 1+0 + // / \ + // bit[1]: way 3 older than way 2 bit[0]: way 1 older than way 0 + // + // + // - bits storage example for 3-way PLRU binary tree: + // bit[1]: way 2 older than ways 1+0 + // \ + // bit[0]: way 1 older than way 0 + // + // + // - bits storage example for 8-way PLRU binary tree: + // bit[6]: ways 7-4 older than ways 3-0 + // / \ + // bit[5]: ways 7+6 > 5+4 bit[2]: ways 3+2 > 1+0 + // / \ / \ + // bit[4]: way 7>6 bit[3]: way 5>4 bit[1]: way 3>2 bit[0]: way 1>0 + + def nBits = n_ways - 1 + def perSet = true + private val state_reg = if (nBits == 0) Reg(UInt(0.W)) else RegInit(0.U(nBits.W)) + def state_read = WireDefault(state_reg) + + def access(touch_way: UInt): Unit = { + state_reg := get_next_state(state_reg, touch_way) + } + def access(touch_ways: Seq[Valid[UInt]]): Unit = { + when (touch_ways.map(_.valid).orR) { + state_reg := get_next_state(state_reg, touch_ways) + } + for (i <- 1 until touch_ways.size) { + cover(PopCount(touch_ways.map(_.valid)) === i.U, s"PLRU_UpdateCount$i; PLRU Update $i simultaneous") + } + } + + + /** @param state state_reg bits for this sub-tree + * @param touch_way touched way encoded value bits for this sub-tree + * @param tree_nways number of ways in this sub-tree + */ + def get_next_state(state: UInt, touch_way: UInt, tree_nways: Int): UInt = { + require(state.getWidth == (tree_nways-1), s"wrong state bits width ${state.getWidth} for $tree_nways ways") + require(touch_way.getWidth == (log2Ceil(tree_nways) max 1), s"wrong encoded way width ${touch_way.getWidth} for $tree_nways ways") + + if (tree_nways > 2) { + // we are at a branching node in the tree, so recurse + val right_nways: Int = 1 << (log2Ceil(tree_nways) - 1) // number of ways in the right sub-tree + val left_nways: Int = tree_nways - right_nways // number of ways in the left sub-tree + val set_left_older = !touch_way(log2Ceil(tree_nways)-1) + val left_subtree_state = state.extract(tree_nways-3, right_nways-1) + val right_subtree_state = state(right_nways-2, 0) + + if (left_nways > 1) { + // we are at a branching node in the tree with both left and right sub-trees, so recurse both sub-trees + Cat(set_left_older, + Mux(set_left_older, + left_subtree_state, // if setting left sub-tree as older, do NOT recurse into left sub-tree + get_next_state(left_subtree_state, touch_way.extract(log2Ceil(left_nways)-1,0), left_nways)), // recurse left if newer + Mux(set_left_older, + get_next_state(right_subtree_state, touch_way(log2Ceil(right_nways)-1,0), right_nways), // recurse right if newer + right_subtree_state)) // if setting right sub-tree as older, do NOT recurse into right sub-tree + } else { + // we are at a branching node in the tree with only a right sub-tree, so recurse only right sub-tree + Cat(set_left_older, + Mux(set_left_older, + get_next_state(right_subtree_state, touch_way(log2Ceil(right_nways)-1,0), right_nways), // recurse right if newer + right_subtree_state)) // if setting right sub-tree as older, do NOT recurse into right sub-tree + } + } else if (tree_nways == 2) { + // we are at a leaf node at the end of the tree, so set the single state bit opposite of the lsb of the touched way encoded value + !touch_way(0) + } else { // tree_nways <= 1 + // we are at an empty node in an empty tree for 1 way, so return single zero bit for Chisel (no zero-width wires) + 0.U(1.W) + } + } + + def get_next_state(state: UInt, touch_way: UInt): UInt = { + val touch_way_sized = if (touch_way.getWidth < log2Ceil(n_ways)) touch_way.padTo (log2Ceil(n_ways)) + else touch_way.extract(log2Ceil(n_ways)-1,0) + get_next_state(state, touch_way_sized, n_ways) + } + + + /** @param state state_reg bits for this sub-tree + * @param tree_nways number of ways in this sub-tree + */ + def get_replace_way(state: UInt, tree_nways: Int): UInt = { + require(state.getWidth == (tree_nways-1), s"wrong state bits width ${state.getWidth} for $tree_nways ways") + + // this algorithm recursively descends the binary tree, filling in the way-to-replace encoded value from msb to lsb + if (tree_nways > 2) { + // we are at a branching node in the tree, so recurse + val right_nways: Int = 1 << (log2Ceil(tree_nways) - 1) // number of ways in the right sub-tree + val left_nways: Int = tree_nways - right_nways // number of ways in the left sub-tree + val left_subtree_older = state(tree_nways-2) + val left_subtree_state = state.extract(tree_nways-3, right_nways-1) + val right_subtree_state = state(right_nways-2, 0) + + if (left_nways > 1) { + // we are at a branching node in the tree with both left and right sub-trees, so recurse both sub-trees + Cat(left_subtree_older, // return the top state bit (current tree node) as msb of the way-to-replace encoded value + Mux(left_subtree_older, // if left sub-tree is older, recurse left, else recurse right + get_replace_way(left_subtree_state, left_nways), // recurse left + get_replace_way(right_subtree_state, right_nways))) // recurse right + } else { + // we are at a branching node in the tree with only a right sub-tree, so recurse only right sub-tree + Cat(left_subtree_older, // return the top state bit (current tree node) as msb of the way-to-replace encoded value + Mux(left_subtree_older, // if left sub-tree is older, return and do not recurse right + 0.U(1.W), + get_replace_way(right_subtree_state, right_nways))) // recurse right + } + } else if (tree_nways == 2) { + // we are at a leaf node at the end of the tree, so just return the single state bit as lsb of the way-to-replace encoded value + state(0) + } else { // tree_nways <= 1 + // we are at an empty node in an unbalanced tree for non-power-of-2 ways, so return single zero bit as lsb of the way-to-replace encoded value + 0.U(1.W) + } + } + + def get_replace_way(state: UInt): UInt = get_replace_way(state, n_ways) + + def way = get_replace_way(state_reg) + def miss = access(way) + def hit = {} +} + +class SeqPLRU(n_sets: Int, n_ways: Int) extends SeqReplacementPolicy { + val logic = new PseudoLRU(n_ways) + val state = SyncReadMem(n_sets, UInt(logic.nBits.W)) + val current_state = Wire(UInt(logic.nBits.W)) + val next_state = Wire(UInt(logic.nBits.W)) + val plru_way = logic.get_replace_way(current_state) + + def access(set: UInt) = { + current_state := state.read(set) + } + + def update(valid: Bool, hit: Bool, set: UInt, way: UInt) = { + val update_way = Mux(hit, way, plru_way) + next_state := logic.get_next_state(current_state, update_way) + when (valid) { state.write(set, next_state) } + } + + def way = plru_way +} + + +class SetAssocLRU(n_sets: Int, n_ways: Int, policy: String) extends SetAssocReplacementPolicy { + val logic = policy.toLowerCase match { + case "plru" => new PseudoLRU(n_ways) + case "lru" => new TrueLRU(n_ways) + case t => throw new IllegalArgumentException(s"unknown Replacement Policy type $t") + } + val state_vec = + if (logic.nBits == 0) Reg(Vec(n_sets, UInt(logic.nBits.W))) // Work around elaboration error on following line + else RegInit(VecInit(Seq.fill(n_sets)(0.U(logic.nBits.W)))) + + def access(set: UInt, touch_way: UInt) = { + state_vec(set) := logic.get_next_state(state_vec(set), touch_way) + } + + def access(sets: Seq[UInt], touch_ways: Seq[Valid[UInt]]) = { + require(sets.size == touch_ways.size, "internal consistency check: should be same number of simultaneous updates for sets and touch_ways") + for (set <- 0 until n_sets) { + val set_touch_ways = (sets zip touch_ways).map { case (touch_set, touch_way) => + Pipe(touch_way.valid && (touch_set === set.U), touch_way.bits, 0)} + when (set_touch_ways.map(_.valid).orR) { + state_vec(set) := logic.get_next_state(state_vec(set), set_touch_ways) + } + } + } + + def way(set: UInt) = logic.get_replace_way(state_vec(set)) + +} \ No newline at end of file