diff --git a/src/sst/elements/carcosa/Makefile.am b/src/sst/elements/carcosa/Makefile.am new file mode 100644 index 0000000000..986cab2899 --- /dev/null +++ b/src/sst/elements/carcosa/Makefile.am @@ -0,0 +1,66 @@ +# -*- Makefile -*- +# +# + +AM_CPPFLAGS += \ + $(MPI_CPPFLAGS) \ + -I$(top_srcdir)/src + +compdir = $(pkglibdir) +comp_LTLIBRARIES = libcarcosa.la +libcarcosa_la_SOURCES = \ + injectors/faultInjectorBase.cc \ + injectors/faultInjectorBase.h \ + injectors/stuckAtFaultInjector.cc \ + injectors/stuckAtFaultInjector.h \ + injectors/corruptMemFaultInjector.cc \ + injectors/corruptMemFaultInjector.h \ + injectors/randomDropFaultInjector.cc \ + injectors/randomDropFaultInjector.h \ + injectors/randomFlipFaultInjector.cc \ + injectors/randomFlipFaultInjector.h \ + injectors/dropFlipFaultInjector.cc \ + injectors/dropFlipFaultInjector.h \ + faultlogic/faultBase.cc \ + faultlogic/faultBase.h \ + faultlogic/stuckAtFault.cc \ + faultlogic/stuckAtFault.h \ + faultlogic/corruptMemFault.cc \ + faultlogic/corruptMemFault.h \ + faultlogic/randomDropFault.cc \ + faultlogic/randomDropFault.h \ + faultlogic/randomFlipFault.cc \ + faultlogic/randomFlipFault.h + +EXTRA_DIST = \ + tests/testCorruptMemBasic.py \ + tests/testCorruptMemDouble.py \ + tests/testCorruptMemDoubleOverlap.py \ + tests/testRandomDrop.py \ + tests/testRandomFlip.py \ + tests/testStuckAtBasic.py \ + tests/testStuckAtMultiple.py \ + tests/testStuckAtOverlap.py \ + tests/testStuckAtSameByte.py + +sstdir = $(includedir)/sst/elements/carcosa +nobase_sst_HEADERS = \ + injectors/faultInjectorBase.h \ + injectors/stuckAtFaultInjector.h \ + injectors/corruptMemFaultInjector.h \ + injectors/randomDropFaultInjector.h \ + injectors/randomFlipFaultInjector.h \ + injectors/dropFlipFaultInjector.h \ + faultlogic/faultBase.h \ + faultlogic/stuckAtFault.h \ + faultlogic/corruptMemFault.h \ + faultlogic/randomDropFault.h \ + faultlogic/randomFlipFault.h + +libcarcosa_la_LDFLAGS = -module -avoid-version +libcarcosa_la_LIBADD = + +AM_CPPFLAGS += $(HMC_FLAG) +install-exec-hook: + $(SST_REGISTER_TOOL) SST_ELEMENT_SOURCE carcosa=$(abs_srcdir) + $(SST_REGISTER_TOOL) SST_ELEMENT_TESTS carcosa=$(abs_srcdir)/tests diff --git a/src/sst/elements/carcosa/configure.m4 b/src/sst/elements/carcosa/configure.m4 new file mode 100644 index 0000000000..a47dc7ab12 --- /dev/null +++ b/src/sst/elements/carcosa/configure.m4 @@ -0,0 +1,9 @@ +dnl -*- Autoconf -*- +dnl vim:ft=config +dnl + +AC_DEFUN([SST_carcosa_CONFIG], [ + carcosa_happy="yes" + + AS_IF([test "$carcosa_happy" = "yes"], [$1], [$2]) +]) diff --git a/src/sst/elements/carcosa/faultlogic/corruptMemFault.cc b/src/sst/elements/carcosa/faultlogic/corruptMemFault.cc new file mode 100644 index 0000000000..fdabd7deb1 --- /dev/null +++ b/src/sst/elements/carcosa/faultlogic/corruptMemFault.cc @@ -0,0 +1,125 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#include "sst/elements/carcosa/faultlogic/corruptMemFault.h" + +using namespace SST::Carcosa; + +CorruptMemFault::CorruptMemFault(Params& params, FaultInjectorBase* injector) : FaultBase(params, injector) { +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Fault type: Corrupt Memory Region\n"); +#endif + // read in data regions + std::vector regionVec; + + // parameter format: {"regions": ["start_addr0, end_addr0", "start_addr1, end_addr1",...]} + params.find_array("regions", regionVec); + + // process entries into region + for (std::string region: regionVec) { + std::pair region_pair = convertString(region); + + // check validity + if (region_pair.first > region_pair.second) { + getSimulationOutput()->fatal(CALL_INFO_LONG, -1, "Invalid corruption region: [0x%zx, 0x%zx].\n", + region_pair.first, region_pair.second); + } + + corruptionRegions_.push_back(region_pair); +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Inserted corruption region: [0x%zx, 0x%zx]\n", + region_pair.first, region_pair.second); +#endif + } +} + +bool CorruptMemFault::faultLogic(Event*& ev) { + SST::MemHierarchy::MemEvent* mem_ev = convertMemEvent(ev); + + Addr base_addr = mem_ev->getBaseAddr(); + dataVec original_payload = mem_ev->getPayload(); + dataVec new_payload(original_payload); + for (int r: regionsToUse_) { + auto& region = corruptionRegions_[r]; + size_t payload_sz = mem_ev->getPayloadSize(); + int32_t start = computeStartIndex(base_addr, payload_sz, region.first); + if (start < 0) { + getSimulationOutput()->fatal(CALL_INFO_LONG, -1, "No valid start index for corruption.\n"); + } + int32_t end = computeEndIndex(base_addr, payload_sz, region.second); + if (end < 0) { + getSimulationOutput()->fatal(CALL_INFO_LONG, -1, "No valid start index for corruption.\n"); + } + for (int i = start; i < end; i++) { + new_payload[i] = static_cast(injector_->randUInt32(0,255)); + } + } + setMemEventPayload(ev, new_payload); + return true; +} + +std::pair CorruptMemFault::convertString(std::string& region) { + std::stringstream ss(region); + uint64_t addr0, addr1; + + ss >> std::hex >> addr0; + if (ss.peek() == ','){ + ss.ignore(); + } + ss >> std::hex >> addr1; + +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "Extracted region pair: [0x%zx, 0x%zx]\n", + addr0, addr1); +#endif + return make_pair(addr0, addr1); +} + +int32_t CorruptMemFault::computeStartIndex(Addr base_addr, size_t payload_sz, Addr region_start) { + // start index is always the first byte of this payload in the corruption region + int payload_bytes = payload_sz / 8; + Addr addr = base_addr; + for (int i = 0; i < payload_bytes; i++, addr+=8) { + if (addr >= region_start) { + return addr - base_addr; + } + } + return -1; +} + +int32_t CorruptMemFault::computeEndIndex(Addr base_addr, size_t payload_sz, Addr region_end) { + // end index is either the final addr's final byte, or the region end's addr's final byte + int payload_bytes = payload_sz / 8; + Addr addr = base_addr + ((payload_bytes - 1) * 8); + for (int i = payload_bytes; i >= 0; i--, addr-=8) { + if (addr <= region_end) { + return addr - base_addr + 8; + } + } + return -1; +} + +std::vector* CorruptMemFault::checkAddrUsage(Event*& ev) { + Addr base_addr = convertMemEvent(ev)->getBaseAddr(); + for (int i = 0; i < corruptionRegions_.size(); i++) { + auto& region = corruptionRegions_[i]; + // check if message contains ANY address in this region + int payload_bytes = convertMemEvent(ev)->getPayloadSize() / 8; + Addr addr = base_addr; + for (int j = 0; j < payload_bytes; addr+=8, j++) { + if ((addr >= region.first) && (addr <= region.second)) { + regionsToUse_.push_back(i); + break; + } + } + } + return ®ionsToUse_; +} \ No newline at end of file diff --git a/src/sst/elements/carcosa/faultlogic/corruptMemFault.h b/src/sst/elements/carcosa/faultlogic/corruptMemFault.h new file mode 100644 index 0000000000..5574639bad --- /dev/null +++ b/src/sst/elements/carcosa/faultlogic/corruptMemFault.h @@ -0,0 +1,72 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_ELEMENTS_CARCOSA_CORRUPTMEMFAULT_H +#define SST_ELEMENTS_CARCOSA_CORRUPTMEMFAULT_H + +#include "sst/elements/carcosa/faultlogic/faultBase.h" +#include "sst/core/rng/mersenne.h" +#include +#include +#include +#include + +namespace SST::Carcosa { + +typedef std::vector dataVec; +typedef SST::MemHierarchy::Addr Addr; + +/** + * This fault is intended to be placed on the input/output ports + * of memory components such as DRAM or HBM. Events that pass through + * it, and whose data addresses fall within the ranges set in this + * module's parameters, will have their payloads randomly altered + * to simulate corruption in the affected region of memory. + */ +class CorruptMemFault : public FaultBase +{ +public: + + CorruptMemFault(Params& params, FaultInjectorBase* injector); + + CorruptMemFault() = default; + ~CorruptMemFault() {} + + /** + * 1. Read in event + * 2. Test if event is in specified region + * 3. Corrupt event payload if necessary + * 4. Replace payload + */ + bool faultLogic(Event*& ev) override; + + std::vector* checkAddrUsage(Event*& ev); +protected: + + std::vector> corruptionRegions_; + + std::vector regionsToUse_; + + std::pair convertString(std::string& region); + + int32_t computeStartIndex(Addr base_addr, size_t payload_sz, Addr region_start); + int32_t computeEndIndex(Addr base_addr, size_t payload_sz, Addr region_end); + + void serialize_order(SST::Core::Serialization::serializer& ser) override { + FaultBase::serialize_order(ser); + SST_SER(corruptionRegions_); + SST_SER(regionsToUse_); + } + ImplementVirtualSerializable(CorruptMemFault) +}; // CorruptMemFault +} // namespace SST::Carcosa + +#endif // SST_ELEMENTS_CARCOSA_CORRUPTMEMFAULT_H \ No newline at end of file diff --git a/src/sst/elements/carcosa/faultlogic/faultBase.cc b/src/sst/elements/carcosa/faultlogic/faultBase.cc new file mode 100644 index 0000000000..4a648f6699 --- /dev/null +++ b/src/sst/elements/carcosa/faultlogic/faultBase.cc @@ -0,0 +1,77 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#include "sst/elements/carcosa/faultlogic/faultBase.h" + +using namespace SST::Carcosa; + +FaultBase::FaultBase(Params& params, FaultInjectorBase* injector) : injector_(injector) { + // +} + +bool FaultBase::faultLogic(Event*& ev) { + return true; +} + +SST::Output* FaultBase::getSimulationOutput() { + return injector_->getOutput(); +} + +SST::Output* FaultBase::getSimulationDebug() { + return injector_->getDebug(); +} + +SST::MemHierarchy::MemEvent* FaultBase::convertMemEvent(Event*& ev) { + SST::MemHierarchy::MemEvent* mem_ev = dynamic_cast(ev); + + if (mem_ev == nullptr) { + getSimulationOutput()->fatal(CALL_INFO_LONG, -1, "Attempting to inject mem fault on a non-MemEvent type.\n"); + } + +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 3, 0, "Intercepted event %zu/%d\n", mem_ev->getID().first, mem_ev->getID().second); +#endif + return mem_ev; +} + +dataVec& FaultBase::getMemEventPayload(Event*& ev) { + return convertMemEvent(ev)->getPayload(); +} + +void FaultBase::setMemEventPayload(Event*& ev, dataVec newPayload) { +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "Payload before replacement:\n"); + SST::MemHierarchy::MemEvent* mem_ev = convertMemEvent(ev); + dataVec payload = getMemEventPayload(ev); + for (int i = 0; i < payload.size(); i+=8) { + getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "\n0x%zx: [\t", mem_ev->getBaseAddr() + i); + for (int j = i; j < (i+8); j++) { + getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "%d\t", payload[j]); + } + getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "]\n"); + } +#endif + convertMemEvent(ev)->setPayload(newPayload); + +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "Payload after replacement:\n"); + mem_ev = convertMemEvent(ev); + payload = getMemEventPayload(ev); + for (int i = 0; i < payload.size(); i+=8) { + getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "\n0x%zx: [\t", mem_ev->getBaseAddr() + i); + for (int j = i; j < (i+8); j++) { + getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "%d\t", payload[j]); + } + getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "]\n"); + } +#endif +} + diff --git a/src/sst/elements/carcosa/faultlogic/faultBase.h b/src/sst/elements/carcosa/faultlogic/faultBase.h new file mode 100644 index 0000000000..08da7ac773 --- /dev/null +++ b/src/sst/elements/carcosa/faultlogic/faultBase.h @@ -0,0 +1,56 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_ELEMENTS_CARCOSA_FAULTBASE_H +#define SST_ELEMENTS_CARCOSA_FAULTBASE_H + +#include "sst/elements/carcosa/injectors/faultInjectorBase.h" +#include "sst/core/serialization/serializable.h" +#include +#include +#include + +namespace SST::Carcosa { + +typedef std::vector dataVec; + +class FaultInjectorBase; + +class FaultBase : public SST::Core::Serialization::serializable { +public: + FaultBase(Params& params, FaultInjectorBase* injector); + + FaultBase() = default; + ~FaultBase() {} + + virtual bool faultLogic(Event*& ev); + + SST::Output* getSimulationOutput(); + + SST::Output* getSimulationDebug(); + + SST::MemHierarchy::MemEvent* convertMemEvent(Event*& ev); + + dataVec& getMemEventPayload(Event*& ev); + + void setMemEventPayload(Event*& ev, dataVec newPayload); +protected: + FaultInjectorBase* injector_ = nullptr; + + void serialize_order(SST::Core::Serialization::serializer& ser) override { + FaultBase::serialize_order(ser); + SST_SER(injector_); + } + ImplementVirtualSerializable(FaultBase) +}; // class FaultBase +} // namespace SST::Carcosa + +#endif \ No newline at end of file diff --git a/src/sst/elements/carcosa/faultlogic/randomDropFault.cc b/src/sst/elements/carcosa/faultlogic/randomDropFault.cc new file mode 100644 index 0000000000..b939c88200 --- /dev/null +++ b/src/sst/elements/carcosa/faultlogic/randomDropFault.cc @@ -0,0 +1,31 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#include "sst/elements/carcosa/faultlogic/randomDropFault.h" + +using namespace SST::Carcosa; + +RandomDropFault::RandomDropFault(Params& params, FaultInjectorBase* injector) : FaultBase(params, injector) { + // +} + +bool RandomDropFault::faultLogic(Event*& ev) { + SST::MemHierarchy::MemEvent* mem_ev = convertMemEvent(ev); + + delete mem_ev; + if (injector_->getInstallDirection() == installDirection::Receive) { + injector_->cancelDelivery(); + } +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Event dropped.\n"); +#endif + return true; +} \ No newline at end of file diff --git a/src/sst/elements/carcosa/faultlogic/randomDropFault.h b/src/sst/elements/carcosa/faultlogic/randomDropFault.h new file mode 100644 index 0000000000..5dfa679d4b --- /dev/null +++ b/src/sst/elements/carcosa/faultlogic/randomDropFault.h @@ -0,0 +1,41 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_ELEMENTS_CARCOSA_RANDOMDROPFAULT_H +#define SST_ELEMENTS_CARCOSA_RANDOMDROPFAULT_H + +#include "sst/elements/carcosa/faultlogic/faultBase.h" +#include +#include +#include + +namespace SST::Carcosa { + +typedef std::vector dataVec; + +class RandomDropFault : public FaultBase { +public: + RandomDropFault(Params& params, FaultInjectorBase* injector); + + RandomDropFault() = default; + ~RandomDropFault() {} + + bool faultLogic(Event*& ev) override; +protected: + void serialize_order(SST::Core::Serialization::serializer& ser) override { + FaultBase::serialize_order(ser); + } + ImplementVirtualSerializable(RandomDropFault) +}; // RandomDropFault + +} // namespace SST::Carcosa + +#endif // SST_ELEMENTS_CARCOSA_RANDOMDROPFAULT_H \ No newline at end of file diff --git a/src/sst/elements/carcosa/faultlogic/randomFlipFault.cc b/src/sst/elements/carcosa/faultlogic/randomFlipFault.cc new file mode 100644 index 0000000000..48da3c7ea7 --- /dev/null +++ b/src/sst/elements/carcosa/faultlogic/randomFlipFault.cc @@ -0,0 +1,38 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#include "sst/elements/carcosa/faultlogic/randomFlipFault.h" + +using namespace SST::Carcosa; + +RandomFlipFault::RandomFlipFault(Params& params, FaultInjectorBase* injector) : FaultBase(params, injector) { + // +} + +bool RandomFlipFault::faultLogic(Event*& ev) { + // check if this is the proper event type and get payload if it is + dataVec payload = getMemEventPayload(ev); + std::pair lucky_number = pickByteAndBit(payload.size()); + uint8_t byte = payload[lucky_number.first]; + uint8_t mask = static_cast(1) << (lucky_number.second); + payload[lucky_number.first] = byte ^ mask; + setMemEventPayload(ev, payload); + return true; +} + +inline std::pair RandomFlipFault::pickByteAndBit(size_t payload_sz) { + uint32_t byte = injector_->randUInt32(0, payload_sz); + uint32_t bit = injector_->randUInt32(0, 8); +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Flipping bit %u in byte %u.\n", (uint32_t)bit, (uint32_t)byte); +#endif + return make_pair(byte, bit); +} \ No newline at end of file diff --git a/src/sst/elements/carcosa/faultlogic/randomFlipFault.h b/src/sst/elements/carcosa/faultlogic/randomFlipFault.h new file mode 100644 index 0000000000..0ddd02ee63 --- /dev/null +++ b/src/sst/elements/carcosa/faultlogic/randomFlipFault.h @@ -0,0 +1,41 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_ELEMENTS_CARCOSA_RANDOMFLIPFAULT_H +#define SST_ELEMENTS_CARCOSA_RANDOMFLIPFAULT_H + +#include "sst/elements/carcosa/faultlogic/faultBase.h" + +namespace SST::Carcosa { + +class RandomFlipFault : public FaultBase { +public: + RandomFlipFault(Params& params, FaultInjectorBase* injector); + + RandomFlipFault() = default; + ~RandomFlipFault() {} + + bool faultLogic(Event*& ev) override; +protected: + /** + * Randomly choose which bit in which byte to flip + * @return (byte, bit) + */ + inline std::pair pickByteAndBit(size_t payload_sz); +protected: + void serialize_order(SST::Core::Serialization::serializer& ser) override { + FaultBase::serialize_order(ser); + } + ImplementVirtualSerializable(RandomFlipFault) +}; // RandomFlipFault +} + +#endif // SST_ELEMENTS_CARCOSA_RANDOMFLIPFAULT_H \ No newline at end of file diff --git a/src/sst/elements/carcosa/faultlogic/stuckAtFault.cc b/src/sst/elements/carcosa/faultlogic/stuckAtFault.cc new file mode 100644 index 0000000000..571d1e45a6 --- /dev/null +++ b/src/sst/elements/carcosa/faultlogic/stuckAtFault.cc @@ -0,0 +1,187 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#include "sst/elements/carcosa/faultlogic/stuckAtFault.h" + +using namespace SST::Carcosa; + +/********** StuckAtFault **********/ + +StuckAtFault::StuckAtFault(Params& params, FaultInjectorBase* injector) : FaultBase(params, injector) +{ +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Fault Type: Stuck-At Fault\n"); +#endif + // read in masks + // parameter format: {masks: ["addr, byte, zeroMask, oneMask"]} + std::vector paramVecStr; + params.find_array("masks", paramVecStr); + + std::vector paramVec = convertString(paramVecStr); + // build maps + for (auto param = paramVec.begin(); param != paramVec.end(); param++) { + Addr addr = param->addr; + int byte = param->byte; + uint8_t zeroMask = param->zeroMask; + uint8_t oneMask = param->oneMask; + if ((int)(zeroMask & oneMask) > 0) { + getSimulationOutput()->fatal(CALL_INFO_LONG, -1, "Masks contain overlapping values. Addr: 0x%zx, " + "byte: %d\n", addr, byte); + } + // check for vector in each map before creating it + if (stuckAtZeroMask_.count(addr) == 1) { + stuckAtZeroMask_.at(addr).push_back(make_pair(byte, zeroMask)); + } else { + auto addrVecPair = stuckAtZeroMask_.emplace(make_pair(addr, std::vector>())); + if (addrVecPair.second) { + addrVecPair.first->second.push_back(make_pair(byte, zeroMask)); + } else { + getSimulationOutput()->fatal(CALL_INFO_LONG, -1, "Failed to insert mask.\n"); + } + } +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Finished inserting zero-masks for 0x%zx.\n", addr); +#endif + if (stuckAtOneMask_.count(addr) == 1) { + stuckAtOneMask_.at(addr).push_back(make_pair(byte, oneMask)); + } else { + auto addrVecPair = stuckAtOneMask_.emplace(make_pair(addr, std::vector>())); + if (addrVecPair.second) { + addrVecPair.first->second.push_back(make_pair(byte, oneMask)); + } else { + getSimulationOutput()->fatal(CALL_INFO_LONG, -1, "Failed to insert mask.\n"); + } + } +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Finished inserting one-masks for 0x%zx.\n", addr); +#endif + } + + endianness_ = (params.find("endianness", "little") == std::string("little")) ? false : true; +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Endianness set to %s.\n", endianness_ ? "big" : "little"); +#endif +} + +bool StuckAtFault::faultLogic(SST::Event*& ev) { + // Convert to memEvent + SST::MemHierarchy::MemEvent* mem_ev = this->convertMemEvent(ev); + + Addr addr = mem_ev->getAddr(); + std::vector masked_addrs; + for (int i = mem_ev->getBaseAddr(); i < mem_ev->getBaseAddr() + mem_ev->getPayloadSize(); i+=8) { + if (stuckAtZeroMask_.count(i) == 1 || stuckAtOneMask_.count(i) == 1) { + masked_addrs.push_back(i); + } + } + + // check for the addr in question in the fault map + if (masked_addrs.size() > 0) { +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Masked Addr at cache line 0x%zx found in stuck map.\n", addr); +#endif + // replace data if necessary + dataVec payload = this->getMemEventPayload(ev); + + for (int masked_addr: masked_addrs){ + uint8_t mask = 0b00000000; +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Begin zero mask for address: 0x%zx\n", masked_addr); +#endif + // mask tuple is (byte, mask) + if (stuckAtZeroMask_.count(masked_addr) == 1) { + for (auto maskPair: stuckAtZeroMask_.at(masked_addr)) { + mask = maskPair.second; + uint32_t final_byte = computeByte(masked_addr, mem_ev->getBaseAddr(), maskPair.first); +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "\tbyte %d, value: %d, mask: %d, new value: %d\n", + maskPair.first, (int)payload[final_byte], (int) mask, + (int)(payload[final_byte] & (~mask))); + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "\tPayload index: %d\n", final_byte); +#endif + payload[final_byte] &= (~mask); + } + } +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "End zero mask for address: 0x%zx\n", masked_addr); + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Begin one mask for address: 0x%zx\n", masked_addr); +#endif + if (stuckAtOneMask_.count(masked_addr) == 1) { + for (auto maskPair: stuckAtOneMask_.at(masked_addr)) { + mask = maskPair.second; + uint32_t final_byte = computeByte(masked_addr, mem_ev->getBaseAddr(), maskPair.first); +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "\tbyte %d, value: %d, mask: %d, new value: %d\n", + maskPair.first, (int)payload[final_byte], (int) mask, + (int)(payload[final_byte] | mask)); + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "\tPayload index: %d\n", final_byte); +#endif + payload[final_byte] |= mask; + } +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "End one mask for address: 0x%zx\n", masked_addr); +#endif + } + } + + // replace payload + this->setMemEventPayload(ev, payload); + } // if (found) + return true; +} + +std::vector StuckAtFault::convertString(std::vector& paramVecString) { + std::vector paramVec; + + for (auto param = paramVecString.begin(); param != paramVecString.end(); param++) { + // disassemble string + std::stringstream stream; + Addr addr; int byte; std::string zeroMaskStr, oneMaskStr; uint8_t zeroMask, oneMask; + stream.str(*param); + stream >> std::hex >> addr; + if (stream.peek() == ',') { + stream.ignore(); + } + stream >> std::dec >> byte; + if (stream.peek() == ',') { + stream.ignore(); + } + stream >> zeroMaskStr; + zeroMask = static_cast(std::bitset<8>(zeroMaskStr).to_ulong()); + if (stream.peek() == ',') { + stream.ignore(); + } + stream >> oneMaskStr; + oneMask = static_cast(std::bitset<8>(oneMaskStr).to_ulong()); + if (stream.peek() == ',') { + stream.ignore(); + } +#ifdef __SST_DEBUG_OUTPUT__ + getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Masks for addr 0x%zx, byte %d: %d %d\n", addr, byte, (int)zeroMask, (int)oneMask); +#endif + // insert maskParam + paramVec.push_back({addr, byte, zeroMask, oneMask}); + } + + return paramVec; +} + +uint32_t StuckAtFault::computeByte(Addr addr, Addr base_addr, uint32_t byte) { + uint32_t base_byte = addr - base_addr; + // vanadis riscv is little endian, so bytes are in reverse order + // Big endian: Addr->(B7|B6|B5|B4|B3|B2|B1|B0); Little endian: Addr->(B0|B1|B2|B3|B4|B5|B6|B7) + // endianness bool -> true = big; false = little + if (endianness_) { + return (base_byte + 7) - byte; + } else { + return base_byte + byte; + } +} \ No newline at end of file diff --git a/src/sst/elements/carcosa/faultlogic/stuckAtFault.h b/src/sst/elements/carcosa/faultlogic/stuckAtFault.h new file mode 100644 index 0000000000..9ec4cdb59a --- /dev/null +++ b/src/sst/elements/carcosa/faultlogic/stuckAtFault.h @@ -0,0 +1,84 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_ELEMENTS_CARCOSA_STUCKATFAULT_H +#define SST_ELEMENTS_CARCOSA_STUCKATFAULT_H + +#include "sst/elements/carcosa/faultlogic/faultBase.h" +#include +#include +#include +#include +#include + +namespace SST::Carcosa { + +typedef std::vector dataVec; +typedef SST::MemHierarchy::Addr Addr; + +/** + * This fault is used to simulate a stuck bit fault. + * To ensure correct operation, make sure that the port module + * using this fault is attached at every point where the data + * for this bit could be read. For example, a stuck bit in the L2 + * cache would need a port module with this fault installed on all + * input OR all output ports to the L2; if the simulator has forwarding enabled, + * but the actual system being simulated does not do the forwarding from memory + * directly into the L1 or the core (bypassing L2 ops in simulation), it may be + * advisable to also place these port modules on the ports used to forward these events. + */ +class StuckAtFault : public FaultBase +{ +public: + + StuckAtFault(Params& params, FaultInjectorBase* injector); + + StuckAtFault() = default; + ~StuckAtFault() {} + + /** + * Read event payload and perform the following: + * - If stuckAtMap.at(addr) exists, compare all listed bits with payload value + * - If payload value does not match mapped value, add bit to flip mask + * - Once all stored bit values have been compared, use flip mask to modify address data + */ + bool faultLogic(Event*& ev) override; +protected: + + // map of addr->{byte, mask} for saving stuck bit values + std::map>> stuckAtZeroMask_; + // add stuckAtOneMask + std::map>> stuckAtOneMask_; + // false = little; true = big + bool endianness_ = false; + + typedef struct maskParam { + Addr addr; + int byte; + uint8_t zeroMask; + uint8_t oneMask; + } maskParam_t; + + std::vector convertString(std::vector& paramVecStr); + uint32_t computeByte(Addr addr, Addr base_addr, uint32_t byte); + + void serialize_order(SST::Core::Serialization::serializer& ser) override { + FaultBase::serialize_order(ser); + SST_SER(stuckAtZeroMask_); + SST_SER(stuckAtOneMask_); + SST_SER(endianness_); + } + ImplementVirtualSerializable(StuckAtFault) +}; + +} // namespace SST::Carcosa + +#endif // SST_ELEMENTS_CARCOSA_STUCKATFAULT_H \ No newline at end of file diff --git a/src/sst/elements/carcosa/injectors/corruptMemFaultInjector.cc b/src/sst/elements/carcosa/injectors/corruptMemFaultInjector.cc new file mode 100644 index 0000000000..1d744e7abe --- /dev/null +++ b/src/sst/elements/carcosa/injectors/corruptMemFaultInjector.cc @@ -0,0 +1,40 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#include "sst/elements/carcosa/injectors/corruptMemFaultInjector.h" +#include "sst/elements/carcosa/faultlogic/corruptMemFault.h" + +using namespace SST::Carcosa; + +CorruptMemFaultInjector::CorruptMemFaultInjector(Params& params) : FaultInjectorBase(params) { + // create fault + fault.push_back(new CorruptMemFault(params, this)); + setValidInstallation(params, SEND_RECEIVE_VALID); +} + +void CorruptMemFaultInjector::executeFaults(Event*& ev) { + // is this addr in a corrupt region? + std::vector* regionsToUse = dynamic_cast(fault[0])->checkAddrUsage(ev); + // if returned vec is not empty, save to fault-accessible location and execute + if (regionsToUse->size() != 0) { +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 2, 0, "Corruption region detected.\n"); +#endif + if (!fault[0]) { + out_->fatal(CALL_INFO_LONG, -1, "No valid fault to execute.\n"); + } + if (!fault[0]->faultLogic(ev)) { + out_->fatal(CALL_INFO_LONG, -1, "Fault somehow returned unsuccessful... How?\n"); + } + // reset vec + regionsToUse->clear(); + } +} \ No newline at end of file diff --git a/src/sst/elements/carcosa/injectors/corruptMemFaultInjector.h b/src/sst/elements/carcosa/injectors/corruptMemFaultInjector.h new file mode 100644 index 0000000000..acdcad1e45 --- /dev/null +++ b/src/sst/elements/carcosa/injectors/corruptMemFaultInjector.h @@ -0,0 +1,50 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_ELEMENTS_CARCOSA_CORRUPTMEMFAULTINJECTOR_H +#define SST_ELEMENTS_CARCOSA_CORRUPTMEMFAULTINJECTOR_H + +#include "sst/elements/carcosa/injectors/faultInjectorBase.h" + +namespace SST::Carcosa { + +class CorruptMemFaultInjector : public FaultInjectorBase { +public: + SST_ELI_REGISTER_PORTMODULE( + CorruptMemFaultInjector, + "carcosa", + "CorruptMemFaultInjector", + SST_ELI_ELEMENT_VERSION(0, 1, 0), + "PortModule class used to simulate a whole memory region being corrupted" + ) + + SST_ELI_DOCUMENT_PARAMS( + {"regions", "Formatted as an array of strings: [\"start_addr0, end_addr0\", \"start_addr1, end_addr1\",...,\"start_addrN, end_addrN\"]. Addresses expected in hexadecimal."} + ) + + CorruptMemFaultInjector(Params& params); + + CorruptMemFaultInjector() = default; + ~CorruptMemFaultInjector() {} +protected: + void executeFaults(Event*& ev) override; + + void serialize_order(SST::Core::Serialization::serializer& ser) override + { + SST::PortModule::serialize_order(ser); + // serialize parameters like `SST_SER()` + } + ImplementVirtualSerializable(SST::Carcosa::CorruptMemFaultInjector) +}; // class CorruptMemFaultInjector + +} // namespace SST::Carcosa + +#endif \ No newline at end of file diff --git a/src/sst/elements/carcosa/injectors/dropFlipFaultInjector.cc b/src/sst/elements/carcosa/injectors/dropFlipFaultInjector.cc new file mode 100644 index 0000000000..1dcbddec05 --- /dev/null +++ b/src/sst/elements/carcosa/injectors/dropFlipFaultInjector.cc @@ -0,0 +1,99 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#include "sst/elements/carcosa/injectors/dropFlipFaultInjector.h" +#include "sst/elements/carcosa/faultlogic/randomFlipFault.h" +#include "sst/elements/carcosa/faultlogic/randomDropFault.h" + +using namespace SST::Carcosa; + +DropFlipFaultInjector::DropFlipFaultInjector(Params& params) : FaultInjectorBase(params) { + // create fault + fault.resize(2); + fault[0] = new RandomDropFault(params, this); + fault[1] = new RandomFlipFault(params, this); + + // read probability params + drop_probability_ = params.find("drop_probability", 0.0); +#ifdef __SST_DEBUG_OUTPUT__ + if (drop_probability_ > 0.0){ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Drop probability set to %lf.\n", drop_probability_); + } +#endif + + flip_probability_ = params.find("flip_probability", 0.0); +#ifdef __SST_DEBUG_OUTPUT__ + if (flip_probability_ > 0.0){ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Flip probability set to %lf.\n", flip_probability_); + } +#endif + + setValidInstallation(params, SEND_RECEIVE_VALID); +} + +bool DropFlipFaultInjector::doInjection() { + if (this->randFloat(0.0, 1.0) <= this->drop_probability_) { +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Drop triggered.\n"); +#endif + this->triggered_injection_[0] = true; + } else { +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Drop skipped.\n"); +#endif + this->triggered_injection_[0] = false; + } + + if (this->randFloat(0.0, 1.0) <= this->flip_probability_) { +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Flip triggered.\n"); +#endif + this->triggered_injection_[1] = true; + } + else { +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Flip skipped.\n"); +#endif + this->triggered_injection_[1] = false; + } + + return this->triggered_injection_[0] || this->triggered_injection_[1]; +} + +/** + * Overridden execution function to cause faults to be chosen at random + * from the vector once a fault has been triggered + */ +void DropFlipFaultInjector::executeFaults(Event*& ev) { + if (this->triggered_injection_[0]) { + // do drop + if (fault[0]) { + fault[0]->faultLogic(ev); +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Drop triggered.\n"); +#endif + } else { + out_->fatal(CALL_INFO_LONG, -1, "No valid drop fault object.\n"); + } + return; + } + if (this->triggered_injection_[1]) { + // do flip + if (fault[1]) { + fault[1]->faultLogic(ev); +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Flip triggered.\n"); +#endif + } else { + out_->fatal(CALL_INFO_LONG, -1, "No valid flip fault object.\n"); + } + } +} \ No newline at end of file diff --git a/src/sst/elements/carcosa/injectors/dropFlipFaultInjector.h b/src/sst/elements/carcosa/injectors/dropFlipFaultInjector.h new file mode 100644 index 0000000000..a0b165f62c --- /dev/null +++ b/src/sst/elements/carcosa/injectors/dropFlipFaultInjector.h @@ -0,0 +1,61 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_ELEMENTS_CARCOSA_DOUBLEFAULTINJECTOR_H +#define SST_ELEMENTS_CARCOSA_DOUBLEFAULTINJECTOR_H + +#include "sst/elements/carcosa/injectors/faultInjectorBase.h" +#include + +namespace SST::Carcosa { + +class DropFlipFaultInjector : public FaultInjectorBase { +public: + SST_ELI_REGISTER_PORTMODULE( + DropFlipFaultInjector, + "carcosa", + "DropFlipFaultInjector", + SST_ELI_ELEMENT_VERSION(0, 1, 0), + "PortModule class used to simulate a data transfer lost at random and a random bit flip in transit" + ) + + SST_ELI_DOCUMENT_PARAMS( + {"drop_probability", "The probability that a drop will be injected. Default = 0.0"}, + {"flip_probability", "The probability that a flip will be injected. Default = 0.0"} + ) + + DropFlipFaultInjector(Params& params); + + DropFlipFaultInjector() = default; + ~DropFlipFaultInjector() {} +protected: + double drop_probability_; + double flip_probability_; + // Byte array representing triggered fault. First is drop, second is flip. + std::array triggered_injection_; + + bool doInjection() override; + void executeFaults(Event*& ev) override; + + void serialize_order(SST::Core::Serialization::serializer& ser) override + { + SST::PortModule::serialize_order(ser); + // serialize parameters like `SST_SER()` + SST_SER(drop_probability_); + SST_SER(flip_probability_); + SST_SER(triggered_injection_); + } + ImplementVirtualSerializable(SST::Carcosa::DoubleFaultInjector) +}; // class DoubleFaultInjector + +} // namespace SST::Carcosa + +#endif \ No newline at end of file diff --git a/src/sst/elements/carcosa/injectors/faultInjectorBase.cc b/src/sst/elements/carcosa/injectors/faultInjectorBase.cc new file mode 100644 index 0000000000..6e760de39f --- /dev/null +++ b/src/sst/elements/carcosa/injectors/faultInjectorBase.cc @@ -0,0 +1,168 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#include "sst/elements/carcosa/injectors/faultInjectorBase.h" +#include "sst/core/params.h" + +using namespace SST::Carcosa; + +/********** FaultInjectorBase **********/ + +FaultInjectorBase::FaultInjectorBase(SST::Params& params) : PortModule() +{ + out_ = new Output(); + out_->init("", params.find("verbose", 1), 0, Output::STDOUT); + + dbg_ = new Output(); + dbg_->init("", params.find("debug_level", 1), 0, (Output::output_location_t)params.find("debug", 0)); + +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Initializing FaultInjector:\n"); +#endif + seed_ = params.find("seed", 0); + if (seed_ != 0) { + base_rng_.seed(seed_); +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "\tRNG Seed: %d\n", seed_); +#endif + } +} + +/** + * Default behavior is to delete all fault objects in the order they were + * added to the vector + */ +FaultInjectorBase::~FaultInjectorBase() { + for (int i = 0; i < fault.size(); i++) { + if (fault[i]) { + delete fault[i]; + } + } +} + +void +FaultInjectorBase::eventSent(uintptr_t key, Event*& ev) +{ + if (!valid_installs_set) { + out_->fatal(CALL_INFO_LONG, -1, "Valid installation directions not set -- did you forget to call setValidInstallation() in your constructor?\n"); + } + if (doInjection()){ +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 3, 0, "Injection triggered.\n"); +#endif + this->executeFaults(ev); + } +#ifdef __SST_DEBUG_OUTPUT__ + else { + dbg_->debug(CALL_INFO_LONG, 3, 0, "Injection skipped.\n"); + } +#endif +} + +void +FaultInjectorBase::interceptHandler(uintptr_t key, Event*& ev, bool& cancel) +{ + if (!valid_installs_set) { + out_->fatal(CALL_INFO_LONG, -1, "Valid installation directions not set -- did you forget to call setValidInstallation() in your constructor?\n"); + } + // do not cancel delivery by default + cancel = false; + cancel_ = &cancel; + + if (doInjection()){ +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 3, 0, "Injection triggered.\n"); +#endif + this->executeFaults(ev); + } +#ifdef __SST_DEBUG_OUTPUT__ + else { + dbg_->debug(CALL_INFO_LONG, 3, 0, "Injection skipped.\n"); + } +#endif +} + +uint32_t FaultInjectorBase::randUInt32(uint32_t start, uint32_t end) { + uint32_t range = end - start; + return start + (base_rng_.generateNextUInt32() % range); +} + +int32_t FaultInjectorBase::randInt32(int32_t start, int32_t end) { + int32_t range = end - start; + return start + (base_rng_.generateNextInt32() % range); +} + +uint64_t FaultInjectorBase::randUInt64(uint64_t start, uint64_t end) { + uint64_t range = end - start; + return start + (base_rng_.generateNextUInt64() % range); +} + +int64_t FaultInjectorBase::randInt64(int64_t start, int64_t end) { + int64_t range = end - start; + return start + (base_rng_.generateNextInt64() % range); +} + +double FaultInjectorBase::randFloat(double start, double end) { + double range = end - start; + return start + (base_rng_.nextUniform() * range); +} + +bool FaultInjectorBase::doInjection() { + return true; +} + +installDirection FaultInjectorBase::setInstallDirection(std::string param) { + if ( param == "Receive" ) { + if (valid_installation_[0]) { + return installDirection::Receive; + } else { + out_->fatal(CALL_INFO_LONG, 1, 0, "This PortModule Fault Injector cannot intercept Receive events.\n"); + } + } else if ( param == "Send" ) { + if (valid_installation_[1]) { + return installDirection::Send; + } else { + out_->fatal(CALL_INFO_LONG, 1, 0, "This PortModule Fault Injector cannot intercept Send events.\n"); + } + } + return installDirection::Invalid; +} + +void FaultInjectorBase::setValidInstallation(Params& params, std::array valid_install) { + valid_installation_ = valid_install; + std::string install_dir = params.find("install_direction", "Receive"); + install_direction_ = setInstallDirection(install_dir); + + if (install_direction_ == installDirection::Invalid) { + out_->fatal(CALL_INFO_LONG, -1, "Install Direction should never be set to Invalid! Did you forget to set which directions are valid?\n"); + } + +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "\tInstall Direction: %s\n", install_dir.c_str()); +#endif + valid_installs_set = true; +} + +/** + * Default behavior is to execute faults in the order they were + * added to the vector + */ +void FaultInjectorBase::executeFaults(Event*& ev) { + bool success = false; + for (int i = 0; i < fault.size(); i++) { + if (fault[i]) { + success = fault[i]->faultLogic(ev); + } + } + if (!success) { + out_->fatal(CALL_INFO_LONG, -1, "No valid fault object, or no fault successfully executed.\n"); + } +} \ No newline at end of file diff --git a/src/sst/elements/carcosa/injectors/faultInjectorBase.h b/src/sst/elements/carcosa/injectors/faultInjectorBase.h new file mode 100644 index 0000000000..18fa2dbd22 --- /dev/null +++ b/src/sst/elements/carcosa/injectors/faultInjectorBase.h @@ -0,0 +1,178 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_ELEMENTS_CARCOSA_FAULTINJECTORBASE_H +#define SST_ELEMENTS_CARCOSA_FAULTINJECTORBASE_H + +#include "sst/core/portModule.h" +#include "sst/core/event.h" +#include "sst/core/output.h" +#include "sst/elements/memHierarchy/memEvent.h" +#include "sst/elements/carcosa/faultlogic/faultBase.h" +#include +#include "sst/core/rng/mersenne.h" +#include +#include +#include +#include + +namespace SST::Carcosa { + +typedef std::vector dataVec; +class FaultBase; + +#define SEND_RECEIVE_VALID {{true, true}} +#define RECEIVE_VALID {{true, false}} +#define SEND_VALID {{false, true}} + +/********** FaultInjectorBase **********/ + +enum installDirection { + Send = 0, + Receive, + Invalid +}; + +/** + * Base class containing required functions and basic data for + * creating fault injection on component ports. + * + * Injectors are used to execute the logic that tests for + * whether or not an injection should occur. Upon triggering + * an injection, a fault object which inherits from the + * FaultBase class but be used to execute the fault logic + * on the triggering message. + */ +class FaultInjectorBase : public SST::PortModule +{ +public: + + SST_ELI_REGISTER_PORTMODULE( + FaultInjectorBase, + "carcosa", + "FaultInjectorBase", + SST_ELI_ELEMENT_VERSION(0, 1, 0), + "Base PortModule class used to connect fault injection logic to components" + ) + + SST_ELI_DOCUMENT_PARAMS( + {"install_direction", "Flag which direction the injector should read from on a port. Valid optins are \'Send\', \'Receive\', and \'Both\'. Default is \'Receive\'."}, + {"seed", "Optional integer seed to give to the random number generator. Default = 0 (0 seed will be assumed to mean NO seed)."}, + {"debug", "Integer determining if debug should be active. 0 disables, 1 sends output to STDOUT, 2 to STDERR. Default = 0"}, + {"debug_level", "Integer determining verbosity of debug output. 1 enables basic text output, 2 enables signficant activity output."} + ) + + FaultInjectorBase(Params& params); + + FaultInjectorBase() = default; + ~FaultInjectorBase(); + + void virtual eventSent(uintptr_t key, Event*& ev) override; + void virtual interceptHandler(uintptr_t key, Event*& ev, bool& cancel) override; + + bool installOnReceive() override + { + switch (install_direction_) { + case Send: + return false; + case Receive: + default: + return true; + } + } + bool installOnSend() override + { + switch (install_direction_) { + case Send: + return true; + case Receive: + default: + return false; + } + } + + void cancelDelivery() { + *cancel_ = true; + } + + installDirection setInstallDirection(std::string param); + + installDirection getInstallDirection() { + return install_direction_; + } + + enum memEventType { + DataRequest = 0, + Response, + Writeback, + RoutedByAddr, + Invalid + }; + + SST::Output* getOutput() { + return out_; + } + + SST::Output* getDebug() { + return dbg_; + } + + uint32_t randUInt32(uint32_t start, uint32_t end); + int32_t randInt32(int32_t start, int32_t end); + uint64_t randUInt64(uint64_t start, uint64_t end); + int64_t randInt64(int64_t start, int64_t end); + + double randFloat(double start, double end); + +protected: + SST::Output* out_; + SST::Output* dbg_; + std::vector fault; + bool* cancel_; + installDirection install_direction_ = installDirection::Receive; + SST::RNG::MersenneRNG base_rng_; + uint64_t seed_ = 0; +private: + std::array valid_installation_ = {{false, false}}; + bool valid_installs_set = false; +protected: + + virtual bool doInjection(); + virtual void executeFaults(Event*& ev); + + /** + * This function MUST be called by the derived class constructor + * @arg params pass the same params object to this function + * @arg valid_install_ pass either SEND_VALID, RECEIVE_VALID, + * or SEND_RECEIVE_VALID + */ + void setValidInstallation(Params& params, std::array valid_install); + + void serialize_order(SST::Core::Serialization::serializer& ser) override + { + SST::PortModule::serialize_order(ser); + // serialize parameters like `SST_SER()` + SST_SER(out_); + SST_SER(dbg_); + SST_SER(fault); + SST_SER(cancel_); + SST_SER(install_direction_); + SST_SER(base_rng_); + SST_SER(seed_); + SST_SER(valid_installation_); + SST_SER(valid_installs_set); + } + ImplementVirtualSerializable(SST::Carcosa::FaultInjectorBase) +}; + +} // namespace SST::FaultInjectorBase + +#endif // SST_ELEMENTS_CARCOSA_FAULTINJECTORBASE_H \ No newline at end of file diff --git a/src/sst/elements/carcosa/injectors/randomDropFaultInjector.cc b/src/sst/elements/carcosa/injectors/randomDropFaultInjector.cc new file mode 100644 index 0000000000..33f9bcc735 --- /dev/null +++ b/src/sst/elements/carcosa/injectors/randomDropFaultInjector.cc @@ -0,0 +1,58 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#include "sst/elements/carcosa/injectors/randomDropFaultInjector.h" +#include "sst/elements/carcosa/faultlogic/randomDropFault.h" + +using namespace SST::Carcosa; + +RandomDropFaultInjector::RandomDropFaultInjector(Params& params) : FaultInjectorBase(params) { + // read injection probability + injection_probability_ = params.find("injection_probability", 0.0); + // create fault + fault.push_back(new RandomDropFault(params, this)); + setValidInstallation(params, RECEIVE_VALID); +} + +bool RandomDropFaultInjector::doInjection() { + if (this->randFloat(0.0, 1.0) <= this->injection_probability_) { +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Injection triggered.\n"); +#endif + return true; + } else { +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Injection skipped.\n"); +#endif + return false; + } +} + +/** + * Custom execution is required to ensure delivery is canceled + * + * In the base interceptHandler, a reference to a boolean called + * 'cancel' is accepted as an argument. That function assigns the + * injector's pointer (called 'cancel_') to that reference's address, + * and that reference must be updated here after the event is destroyed + * if the installation direction of this PortModule was set to 'Receive'. + */ +void RandomDropFaultInjector::executeFaults(Event*& ev) { + if (fault[0]) { + if (this->doInjection()) { + if (!fault[0]->faultLogic(ev)) { + out_->fatal(CALL_INFO_LONG, -1, "Fault execution failed.\n"); + } + } + } else { + out_->fatal(CALL_INFO_LONG, -1, "No valid fault object.\n"); + } +} \ No newline at end of file diff --git a/src/sst/elements/carcosa/injectors/randomDropFaultInjector.h b/src/sst/elements/carcosa/injectors/randomDropFaultInjector.h new file mode 100644 index 0000000000..e1b3b901e2 --- /dev/null +++ b/src/sst/elements/carcosa/injectors/randomDropFaultInjector.h @@ -0,0 +1,54 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_ELEMENTS_CARCOSA_RANDOMDROPFAULTINJECTOR_H +#define SST_ELEMENTS_CARCOSA_RANDOMDROPFAULTINJECTOR_H + +#include "sst/elements/carcosa/injectors/faultInjectorBase.h" + +namespace SST::Carcosa { + +class RandomDropFaultInjector : public FaultInjectorBase { +public: + SST_ELI_REGISTER_PORTMODULE( + RandomDropFaultInjector, + "carcosa", + "RandomDropFaultInjector", + SST_ELI_ELEMENT_VERSION(0, 2, 0), + "PortModule class used to simulate a data transfer lost at random" + ) + + SST_ELI_DOCUMENT_PARAMS( + {"injection_probability", "Probability for injection to randomly occur. Default = 0.0"} + ) + + RandomDropFaultInjector(Params& params); + + RandomDropFaultInjector() = default; + ~RandomDropFaultInjector() {} +protected: + + double injection_probability_; + bool doInjection() override; + void executeFaults(Event*& ev) override; + + void serialize_order(SST::Core::Serialization::serializer& ser) override + { + SST::PortModule::serialize_order(ser); + SST_SER(injection_probability_); + // serialize parameters like `SST_SER()` + } + ImplementVirtualSerializable(SST::Carcosa::RandomDropFaultInjector) +}; // class RandomDropFaultInjector + +} // namespace SST::Carcosa + +#endif \ No newline at end of file diff --git a/src/sst/elements/carcosa/injectors/randomFlipFaultInjector.cc b/src/sst/elements/carcosa/injectors/randomFlipFaultInjector.cc new file mode 100644 index 0000000000..999045c316 --- /dev/null +++ b/src/sst/elements/carcosa/injectors/randomFlipFaultInjector.cc @@ -0,0 +1,55 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#include "sst/elements/carcosa/injectors/randomFlipFaultInjector.h" +#include "sst/elements/carcosa/faultlogic/randomFlipFault.h" + +using namespace SST::Carcosa; + +RandomFlipFaultInjector::RandomFlipFaultInjector(Params& params) : FaultInjectorBase(params) { + // read injection probability + this->injection_probability_ = params.find("injection_probability", 0.0); +#ifdef __SST_DEBUG_OUTPUT__ + if (injection_probability_ > 0.0){ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Injection probability set to %lf.\n", injection_probability_); + } +#endif + + // create fault + fault.push_back(new RandomFlipFault(params, this)); + setValidInstallation(params, SEND_RECEIVE_VALID); +} + +bool RandomFlipFaultInjector::doInjection() { + if (this->randFloat(0.0, 1.0) <= this->injection_probability_) { +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Injection triggered.\n"); +#endif + return true; + } else { +#ifdef __SST_DEBUG_OUTPUT__ + dbg_->debug(CALL_INFO_LONG, 1, 0, "Injection skipped.\n"); +#endif + return false; + } +} + +void RandomFlipFaultInjector::executeFaults(Event*& ev) { + if (fault[0]) { + if (this->doInjection()) { + if (!fault[0]->faultLogic(ev)) { + out_->fatal(CALL_INFO_LONG, -1, "Fault execution failed.\n"); + } + } + } else { + out_->fatal(CALL_INFO_LONG, -1, "No valid fault object.\n"); + } +} \ No newline at end of file diff --git a/src/sst/elements/carcosa/injectors/randomFlipFaultInjector.h b/src/sst/elements/carcosa/injectors/randomFlipFaultInjector.h new file mode 100644 index 0000000000..cf79131bef --- /dev/null +++ b/src/sst/elements/carcosa/injectors/randomFlipFaultInjector.h @@ -0,0 +1,55 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_ELEMENTS_CARCOSA_RANDOMFLIPFAULTINJECTOR_H +#define SST_ELEMENTS_CARCOSA_RANDOMFLIPFAULTINJECTOR_H + +#include "sst/elements/carcosa/injectors/faultInjectorBase.h" + +namespace SST::Carcosa { + +class RandomFlipFaultInjector : public FaultInjectorBase { +public: + SST_ELI_REGISTER_PORTMODULE( + RandomFlipFaultInjector, + "carcosa", + "RandomFlipFaultInjector", + SST_ELI_ELEMENT_VERSION(0, 1, 0), + "PortModule class used to simulate a random bit flip when transferring data" + ) + + SST_ELI_DOCUMENT_PARAMS( + {"injection_probability", "Probability for fault injection to trigger. Default = 0.0"} + ) + + RandomFlipFaultInjector(Params& params); + + RandomFlipFaultInjector() = default; + ~RandomFlipFaultInjector() {} +protected: + double injection_probability_; + + + bool doInjection() override; + void executeFaults(Event*& ev) override; + + void serialize_order(SST::Core::Serialization::serializer& ser) override + { + SST::PortModule::serialize_order(ser); + // serialize parameters like `SST_SER()` + SST_SER(injection_probability_); + } + ImplementVirtualSerializable(SST::Carcosa::RandomFlipFaultInjector) +}; // class RandomFlipFaultInjector + +} // namespace SST::Carcosa + +#endif \ No newline at end of file diff --git a/src/sst/elements/carcosa/injectors/stuckAtFaultInjector.cc b/src/sst/elements/carcosa/injectors/stuckAtFaultInjector.cc new file mode 100644 index 0000000000..f8968a63c4 --- /dev/null +++ b/src/sst/elements/carcosa/injectors/stuckAtFaultInjector.cc @@ -0,0 +1,21 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#include "sst/elements/carcosa/injectors/stuckAtFaultInjector.h" +#include "sst/elements/carcosa/faultlogic/stuckAtFault.h" + +using namespace SST::Carcosa; + +StuckAtFaultInjector::StuckAtFaultInjector(Params& params) : FaultInjectorBase(params) { + // create fault + fault.push_back(new StuckAtFault(params, this)); + setValidInstallation(params, SEND_RECEIVE_VALID); +} \ No newline at end of file diff --git a/src/sst/elements/carcosa/injectors/stuckAtFaultInjector.h b/src/sst/elements/carcosa/injectors/stuckAtFaultInjector.h new file mode 100644 index 0000000000..87b1157c3d --- /dev/null +++ b/src/sst/elements/carcosa/injectors/stuckAtFaultInjector.h @@ -0,0 +1,51 @@ +// Copyright 2009-2025 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2025, NTESS +// All rights reserved. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef SST_ELEMENTS_CARCOSA_STUCKATFAULTINJECTOR_H +#define SST_ELEMENTS_CARCOSA_STUCKATFAULTINJECTOR_H + +#include "sst/elements/carcosa/injectors/faultInjectorBase.h" +#include + +namespace SST::Carcosa { + +class StuckAtFaultInjector : public FaultInjectorBase { +public: + SST_ELI_REGISTER_PORTMODULE( + StuckAtFaultInjector, + "carcosa", + "StuckAtFaultInjector", + SST_ELI_ELEMENT_VERSION(0, 1, 0), + "PortModule class used to simulate a stuck bit within a given component" + ) + + SST_ELI_DOCUMENT_PARAMS( + {"masks", "String array formatted as [\"addr0, byte0, zeroMask0, oneMask0\",...,\"addrN, byteN, zeroMaskN, oneMaskN\"]." \ + "Addresses are expected to be in hexadecimal, and masks are 8 bit strings."}, + {"endianness", "Byte ordering in memory. Given as a string containing \'little\' or \'big\'. Default: little"} + ) + + StuckAtFaultInjector(Params& params); + + StuckAtFaultInjector() = default; + ~StuckAtFaultInjector() {} +protected: + void serialize_order(SST::Core::Serialization::serializer& ser) override + { + SST::PortModule::serialize_order(ser); + // serialize parameters like `SST_SER()` + } + ImplementVirtualSerializable(SST::Carcosa::StuckAtFaultInjector) +}; // class StuckAtFaultInjector + +} // namespace SST::Carcosa + +#endif \ No newline at end of file diff --git a/src/sst/elements/carcosa/tests/testCorruptMemBasic.py b/src/sst/elements/carcosa/tests/testCorruptMemBasic.py new file mode 100644 index 0000000000..54b8785484 --- /dev/null +++ b/src/sst/elements/carcosa/tests/testCorruptMemBasic.py @@ -0,0 +1,570 @@ +import os +import sst +mh_debug_level=10 +mh_debug=0 +# this has to be a string +dbgAddr="0" +stopDbg="0" + +checkpointDir = "" +checkpoint = "" + +#checkpointDir = "checkpoint0" +#checkpoint = "load" +#checkpoint = "save" + +pythonDebug=False + +vanadis_isa = os.getenv("VANADIS_ISA", "MIPS") +isa="mipsel" +vanadis_isa = os.getenv("VANADIS_ISA", "RISCV64") +isa="riscv64" + +loader_mode = os.getenv("VANADIS_LOADER_MODE", "0") + +testDir="basic-io" +exe = "hello-world" +#exe = "hello-world-cpp" +#exe = "openat" +#exe = "printf-check" +#exe = "read-write" +#exe = "fread-fwrite" +#exe = "unlink" +#exe = "unlinkat" +#exe = "lseek" + +#testDir = "basic-math" +#exe = "sqrt-double" +#exe = "sqrt-float" + +#testDir = "basic-ops" +#exe = "test-branch" +#exe = "test-shift" + +#testDir = "misc" +#exe = "mt-dgemm" +#exe = "stream" +#exe = "stream-fortran" +#exe = "gettime" +#exe = "splitLoad" +#exe = "fork" +#exe = "clone" +#exe = "pthread" +#exe = "openmp" +#exe = "openmp2" +#exe = "uname" +#exe = "mem-test" +#exe = "checkpoint" + +physMemSize = "4GiB" + +tlbType = "simpleTLB" +mmuType = "simpleMMU" + +# Define SST core options +sst.setProgramOption("timebase", "1ps") +sst.setProgramOption("stop-at", "0 ns") + +# Tell SST what statistics handling we want +sst.setStatisticLoadLevel(4) +sst.setStatisticOutput("sst.statOutputConsole") + +full_exe_name = "../../vanadis/tests/small/basic-math/sqrt-double/riscv64/sqrt-double"#os.getenv("VANADIS_EXE", "./small/" + testDir + "/" + exe + "/" + isa + "/" + exe ) +exe_name= full_exe_name.split("/")[-1] + +verbosity = int(os.getenv("VANADIS_VERBOSE", 0)) +os_verbosity = os.getenv("VANADIS_OS_VERBOSE", verbosity) +pipe_trace_file = os.getenv("VANADIS_PIPE_TRACE", "") +lsq_ld_entries = os.getenv("VANADIS_LSQ_LD_ENTRIES", 16) +lsq_st_entries = os.getenv("VANADIS_LSQ_ST_ENTRIES", 8) + +rob_slots = os.getenv("VANADIS_ROB_SLOTS", 64) +retires_per_cycle = os.getenv("VANADIS_RETIRES_PER_CYCLE", 4) +issues_per_cycle = os.getenv("VANADIS_ISSUES_PER_CYCLE", 4) +decodes_per_cycle = os.getenv("VANADIS_DECODES_PER_CYCLE", 4) + +integer_arith_cycles = int(os.getenv("VANADIS_INTEGER_ARITH_CYCLES", 2)) +integer_arith_units = int(os.getenv("VANADIS_INTEGER_ARITH_UNITS", 2)) +fp_arith_cycles = int(os.getenv("VANADIS_FP_ARITH_CYCLES", 8)) +fp_arith_units = int(os.getenv("VANADIS_FP_ARITH_UNITS", 2)) +branch_arith_cycles = int(os.getenv("VANADIS_BRANCH_ARITH_CYCLES", 2)) + +cpu_clock = os.getenv("VANADIS_CPU_CLOCK", "2.3GHz") + +numCpus = int(os.getenv("VANADIS_NUM_CORES", 1)) +numThreads = int(os.getenv("VANADIS_NUM_HW_THREADS", 1)) + +vanadis_cpu_type = "vanadis." +vanadis_cpu_type += os.getenv("VANADIS_CPU_ELEMENT_NAME","dbg_VanadisCPU") + +if (verbosity > 0): + print("Verbosity: " + str(verbosity) + " -> loading Vanadis CPU type: " + vanadis_cpu_type) + print("Auto-clock syscalls: " + str(auto_clock_sys)) +# vanadis_cpu_type = "vanadisdbg.VanadisCPU" + +app_args = os.getenv("VANADIS_EXE_ARGS", "") + +app_params = {} +if app_args != "": + app_args_list = app_args.split(" ") + # We have a plus 1 because the executable name is arg0 + app_args_count = len( app_args_list ) + 1 + + app_params["argc"] = app_args_count + + if (verbosity > 0): + print("Identified " + str(app_args_count) + " application arguments, adding to input parameters.") + arg_start = 1 + for next_arg in app_args_list: + if (verbosity > 0): + print("arg" + str(arg_start) + " = " + next_arg) + app_params["arg" + str(arg_start)] = next_arg + arg_start = arg_start + 1 +else: + app_params["argc"] = 1 + if (verbosity > 0): + print("No application arguments found, continuing with argc=1") + +vanadis_decoder = "vanadis.Vanadis" + vanadis_isa + "Decoder" +vanadis_os_hdlr = "vanadis.Vanadis" + vanadis_isa + "OSHandler" + + +protocol="MESI" + +# OS related params +osParams = { + "processDebugLevel" : 0, + "dbgLevel" : os_verbosity, + "dbgMask" : 8, + "cores" : numCpus, + "hardwareThreadCount" : numThreads, + "page_size" : 4096, + "physMemSize" : physMemSize, + "useMMU" : True, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +processList = ( + ( 1, { + "env_count" : 1, + "env0" : "OMP_NUM_THREADS={}".format(numCpus*numThreads), + "exe" : full_exe_name, + "arg0" : exe_name, + } ), +) + +processList[0][1].update(app_params) + +osl1cacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +mmuParams = { + "debug_level": 0, + "num_cores": numCpus, + "num_threads": numThreads, + "page_size": 4096, +} + +memRtrParams ={ + "xbar_bw" : "1GB/s", + "link_bw" : "1GB/s", + "input_buf_size" : "2KB", + "num_ports" : str(numCpus+2), + "flit_size" : "72B", + "output_buf_size" : "2KB", + "id" : "0", + "topology" : "merlin.singlerouter" +} + +dirCtrlParams = { + "coherence_protocol" : protocol, + "entry_cache_size" : "1024", + "debug" : mh_debug, + "debug_level" : mh_debug_level, + "addr_range_start" : "0x0", + "addr_range_end" : "0xFFFFFFFF" +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 2, +} + +memCtrlParams = { + "clock" : cpu_clock, + "backend.mem_size" : physMemSize, + "backing" : "malloc", + "initBacking": 1, + "addr_range_start": 0, + "addr_range_end": 0xffffffff, + "debug_level" : mh_debug_level, + "debug" : mh_debug, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +memParams = { + "mem_size" : "4GiB", + "access_time" : "1 ns" +} + +# CPU related params +tlbParams = { + "debug_level": 0, + "hitLatency": 1, + "num_hardware_threads": numThreads, + "num_tlb_entries_per_thread": 64, + "tlb_set_size": 4, +} + +tlbWrapperParams = { + "debug_level": 0, +} + +decoderParams = { + "loader_mode" : loader_mode, + "uop_cache_entries" : 1536, + "predecode_cache_entries" : 4 +} + +osHdlrParams = { } + +branchPredParams = { + "branch_entries" : 32 +} + +cpuParams = { + "clock" : cpu_clock, + "verbose" : verbosity, + "hardware_threads": numThreads, + "physical_fp_registers" : 168 * numThreads, + "physical_integer_registers" : 180 * numThreads, + "integer_arith_cycles" : integer_arith_cycles, + "integer_arith_units" : integer_arith_units, + "fp_arith_cycles" : fp_arith_cycles, + "fp_arith_units" : fp_arith_units, + "branch_unit_cycles" : branch_arith_cycles, + "print_int_reg" : False, + "print_fp_reg" : False, + "pipeline_trace_file" : pipe_trace_file, + "reorder_slots" : rob_slots, + "decodes_per_cycle" : decodes_per_cycle, + "issues_per_cycle" : issues_per_cycle, + "retires_per_cycle" : retires_per_cycle, + "pause_when_retire_address" : os.getenv("VANADIS_HALT_AT_ADDRESS", 0), + "start_verbose_when_issue_address": dbgAddr, + "stop_verbose_when_retire_address": stopDbg, + "print_rob" : False, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +lsqParams = { + "verbose" : verbosity, + "address_mask" : 0xFFFFFFFF, + "max_stores" : lsq_st_entries, + "max_loads" : lsq_ld_entries, +} + +l1dcacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l1icacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "prefetcher" : "cassini.NextBlockPrefetcher", + "prefetcher.reach" : 1, + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l2cacheParams = { + "access_latency_cycles" : "14", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "16", + "cache_line_size" : "64", + "cache_size" : "1MB", + "mshr_latency_cycles": 3, + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} +busParams = { + "bus_frequency" : cpu_clock, +} + +l2memLinkParams = { + "group" : 1, + "network_bw" : "25GB/s" +} + +class CPU_Builder: + def __init__(self): + pass + + # CPU + def build( self, prefix, nodeId, cpuId ): + + if pythonDebug: + print("build {}".format(prefix) ) + + # CPU + cpu = sst.Component(prefix, vanadis_cpu_type) + cpu.addParams( cpuParams ) + cpu.addParam( "core_id", cpuId ) + cpu.enableAllStatistics() + + # CPU.decoder + for n in range(numThreads): + decode = cpu.setSubComponent( "decoder"+str(n), vanadis_decoder ) + decode.addParams( decoderParams ) + + decode.enableAllStatistics() + + # CPU.decoder.osHandler + os_hdlr = decode.setSubComponent( "os_handler", vanadis_os_hdlr ) + os_hdlr.addParams( osHdlrParams ) + + # CPU.decocer.branch_pred + branch_pred = decode.setSubComponent( "branch_unit", "vanadis.VanadisBasicBranchUnit" ) + branch_pred.addParams( branchPredParams ) + branch_pred.enableAllStatistics() + + # CPU.lsq + cpu_lsq = cpu.setSubComponent( "lsq", "vanadis.VanadisBasicLoadStoreQueue" ) + cpu_lsq.addParams(lsqParams) + cpu_lsq.enableAllStatistics() + + # CPU.lsq mem interface which connects to D-cache + cpuDcacheIf = cpu_lsq.setSubComponent( "memory_interface", "memHierarchy.standardInterface" ) + + # CPU.mem interface for I-cache + cpuIcacheIf = cpu.setSubComponent( "mem_interface_inst", "memHierarchy.standardInterface" ) + + # L1 D-cache + cpu_l1dcache = sst.Component(prefix + ".l1dcache", "memHierarchy.Cache") + cpu_l1dcache.addParams( l1dcacheParams ) + + # L2 I-cache + cpu_l1icache = sst.Component( prefix + ".l1icache", "memHierarchy.Cache") + cpu_l1icache.addParams( l1icacheParams ) + + # L2 cache + cpu_l2cache = sst.Component(prefix+".l2cache", "memHierarchy.Cache") + cpu_l2cache.addParams( l2cacheParams ) + + # L2 cache mem interface + l2cache_2_mem = cpu_l2cache.setSubComponent("lowlink", "memHierarchy.MemNIC") + l2cache_2_mem.addParams( l2memLinkParams ) + + # L1 to L2 buss + cache_bus = sst.Component(prefix+".bus", "memHierarchy.Bus") + cache_bus.addParams(busParams) + + # CPU data TLB + dtlbWrapper = sst.Component(prefix+".dtlb", "mmu.tlb_wrapper") + dtlbWrapper.addParams(tlbWrapperParams) +# dtlbWrapper.addParam( "debug_level", 0) + dtlb = dtlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + dtlb.addParams(tlbParams) + + # CPU instruction TLB + itlbWrapper = sst.Component(prefix+".itlb", "mmu.tlb_wrapper") + itlbWrapper.addParams(tlbWrapperParams) +# itlbWrapper.addParam( "debug_level", 0) + itlbWrapper.addParam("exe",True) + itlb = itlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + itlb.addParams(tlbParams) + + # CPU (data) -> TLB -> Cache + link_cpu_dtlb_link = sst.Link(prefix+".link_cpu_dtlb_link") + link_cpu_dtlb_link.connect( (cpuDcacheIf, "lowlink", "1ns"), (dtlbWrapper, "cpu_if", "1ns") ) + link_cpu_dtlb_link.setNoCut() + + # data TLB -> data L1 + link_cpu_l1dcache_link = sst.Link(prefix+".link_cpu_l1dcache_link") + link_cpu_l1dcache_link.connect( (dtlbWrapper, "cache_if", "1ns"), (cpu_l1dcache, "highlink", "1ns") ) + link_cpu_l1dcache_link.setNoCut() + + # CPU (instruction) -> TLB -> Cache + link_cpu_itlb_link = sst.Link(prefix+".link_cpu_itlb_link") + link_cpu_itlb_link.connect( (cpuIcacheIf, "lowlink", "1ns"), (itlbWrapper, "cpu_if", "1ns") ) + link_cpu_itlb_link.setNoCut() + + # instruction TLB -> instruction L1 + link_cpu_l1icache_link = sst.Link(prefix+".link_cpu_l1icache_link") + link_cpu_l1icache_link.connect( (itlbWrapper, "cache_if", "1ns"), (cpu_l1icache, "highlink", "1ns") ) + link_cpu_l1icache_link.setNoCut(); + + # data L1 -> bus + link_l1dcache_l2cache_link = sst.Link(prefix+".link_l1dcache_l2cache_link") + link_l1dcache_l2cache_link.connect( (cpu_l1dcache, "lowlink", "1ns"), (cache_bus, "highlink0", "1ns") ) + link_l1dcache_l2cache_link.setNoCut() + + # instruction L1 -> bus + link_l1icache_l2cache_link = sst.Link(prefix+".link_l1icache_l2cache_link") + link_l1icache_l2cache_link.connect( (cpu_l1icache, "lowlink", "1ns"), (cache_bus, "highlink1", "1ns") ) + link_l1icache_l2cache_link.setNoCut() + + # BUS to L2 cache + link_bus_l2cache_link = sst.Link(prefix+".link_bus_l2cache_link") + link_bus_l2cache_link.connect( (cache_bus, "lowlink0", "1ns"), (cpu_l2cache, "highlink", "1ns") ) + link_bus_l2cache_link.setNoCut() + + return (cpu, "os_link", "5ns"), (l2cache_2_mem, "port", "1ns") , (dtlb, "mmu", "1ns"), (itlb, "mmu", "1ns") + + +def addParamsPrefix(prefix,params): + #print( prefix ) + ret = {} + for key, value in params.items(): + #print( key, value ) + ret[ prefix + "." + key] = value + + #print( ret ) + return ret + +# node OS +node_os = sst.Component("os", "vanadis.VanadisNodeOS") +node_os.addParams(osParams) + +num=0 +for i,process in processList: + #print( process ) + for y in range(i): + #print( "process", num ) + node_os.addParams( addParamsPrefix( "process" + str(num), process ) ) + num+=1 + +if pythonDebug: + print('total hardware threads ' + str(num) ) + +# node OS MMU +node_os_mmu = node_os.setSubComponent( "mmu", "mmu." + mmuType ) +node_os_mmu.addParams(mmuParams) + +# node OS memory interface to L1 data cache +node_os_mem_if = node_os.setSubComponent( "mem_interface", "memHierarchy.standardInterface" ) + +# node OS l1 data cache +os_cache = sst.Component("node_os.cache", "memHierarchy.Cache") +os_cache.addParams(osl1cacheParams) +os_cache_2_mem = os_cache.setSubComponent("lowlink", "memHierarchy.MemNIC") +os_cache_2_mem.addParams( l2memLinkParams ) + +# node memory router +comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router") +comp_chiprtr.addParams(memRtrParams) +comp_chiprtr.setSubComponent("topology","merlin.singlerouter") + +# node directory controller +dirctrl = sst.Component("dirctrl", "memHierarchy.DirectoryController") +dirctrl.addParams(dirCtrlParams) + +# node directory controller port to cpu +dirNIC = dirctrl.setSubComponent("highlink", "memHierarchy.MemNIC") +dirNIC.addParams(dirNicParams) + +# node memory controller +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.addParams( memCtrlParams ) +memctrl.addPortModule("highlink", "carcosa.CorruptMemFaultInjector", { + "install_direction": "Receive", + "regions": ["4D88, 4DA0"], + "debug" : 1, + "debug_level": 2 +}) + +# node memory controller backend +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams(memParams) + +# node OS data TLB +#ostlbWrapper = sst.Component("ostlb", "mmu.tlb_wrapper") +#ostlbWrapper.addParams(tlbWrapperParams) +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu.passThroughTLB" ); +#ostlb.addParams(tlbParams) + +# OS (data) -> TLB -> Cache +#link_os_ostlb_link = sst.Link("link_os_ostlb_link") +#link_os_ostlb_link.connect( (node_os_mem_if, "lowlink", "1ns"), (ostlbWrapper, "cpu_if", "1ns") ) + +# Directory controller to memory router +link_dir_2_rtr = sst.Link("link_dir_2_rtr") +link_dir_2_rtr.connect( (comp_chiprtr, "port"+str(numCpus), "1ns"), (dirNIC, "port", "1ns") ) +link_dir_2_rtr.setNoCut() + +# Directory controller to memory controller +link_dir_2_mem = sst.Link("link_dir_2_mem") +link_dir_2_mem.connect( (dirctrl, "lowlink", "1ns"), (memctrl, "highlink", "1ns") ) +link_dir_2_mem.setNoCut() + +# MMU -> ostlb +# don't need when using pass through TLB +#link_mmu_ostlb_link = sst.Link("link_mmu_ostlb_link") +#link_mmu_ostlb_link.connect( (node_os_mmu, "ostlb", "1ns"), (ostlb, "mmu", "1ns") ) + +# ostlb -> os l1 cache +link_os_cache_link = sst.Link("link_os_cache_link") +#link_os_cache_link.connect( (ostlbWrapper, "cache_if", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.connect( (node_os_mem_if, "lowlink", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.setNoCut() + +os_cache_2_rtr = sst.Link("os_cache_2_rtr") +os_cache_2_rtr.connect( (os_cache_2_mem, "port", "1ns"), (comp_chiprtr, "port"+str(numCpus+1), "1ns") ) +os_cache_2_rtr.setNoCut() + +cpuBuilder = CPU_Builder() + +# build all CPUs +nodeId = 0 +for cpu in range(numCpus): + + prefix="node" + str(nodeId) + ".cpu" + str(cpu) + os_hdlr, l2cache, dtlb, itlb = cpuBuilder.build(prefix, nodeId, cpu) + + # MMU -> dtlb + link_mmu_dtlb_link = sst.Link(prefix + ".link_mmu_dtlb_link") + link_mmu_dtlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".dtlb", "1ns"), dtlb ) + + # MMU -> itlb + link_mmu_itlb_link = sst.Link(prefix + ".link_mmu_itlb_link") + link_mmu_itlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".itlb", "1ns"), itlb ) + + # CPU os handler -> node OS + link_core_os_link = sst.Link(prefix + ".link_core_os_link") + link_core_os_link.connect( os_hdlr, (node_os, "core" + str(cpu), "5ns") ) + + # connect cpu L2 to router + link_l2cache_2_rtr = sst.Link(prefix + ".link_l2cache_2_rtr") + link_l2cache_2_rtr.connect( l2cache, (comp_chiprtr, "port" + str(cpu), "1ns") ) + diff --git a/src/sst/elements/carcosa/tests/testCorruptMemDouble.py b/src/sst/elements/carcosa/tests/testCorruptMemDouble.py new file mode 100644 index 0000000000..d38f90d7ba --- /dev/null +++ b/src/sst/elements/carcosa/tests/testCorruptMemDouble.py @@ -0,0 +1,570 @@ +import os +import sst +mh_debug_level=10 +mh_debug=0 +# this has to be a string +dbgAddr="0" +stopDbg="0" + +checkpointDir = "" +checkpoint = "" + +#checkpointDir = "checkpoint0" +#checkpoint = "load" +#checkpoint = "save" + +pythonDebug=False + +vanadis_isa = os.getenv("VANADIS_ISA", "MIPS") +isa="mipsel" +vanadis_isa = os.getenv("VANADIS_ISA", "RISCV64") +isa="riscv64" + +loader_mode = os.getenv("VANADIS_LOADER_MODE", "0") + +testDir="basic-io" +exe = "hello-world" +#exe = "hello-world-cpp" +#exe = "openat" +#exe = "printf-check" +#exe = "read-write" +#exe = "fread-fwrite" +#exe = "unlink" +#exe = "unlinkat" +#exe = "lseek" + +#testDir = "basic-math" +#exe = "sqrt-double" +#exe = "sqrt-float" + +#testDir = "basic-ops" +#exe = "test-branch" +#exe = "test-shift" + +#testDir = "misc" +#exe = "mt-dgemm" +#exe = "stream" +#exe = "stream-fortran" +#exe = "gettime" +#exe = "splitLoad" +#exe = "fork" +#exe = "clone" +#exe = "pthread" +#exe = "openmp" +#exe = "openmp2" +#exe = "uname" +#exe = "mem-test" +#exe = "checkpoint" + +physMemSize = "4GiB" + +tlbType = "simpleTLB" +mmuType = "simpleMMU" + +# Define SST core options +sst.setProgramOption("timebase", "1ps") +sst.setProgramOption("stop-at", "0 ns") + +# Tell SST what statistics handling we want +sst.setStatisticLoadLevel(4) +sst.setStatisticOutput("sst.statOutputConsole") + +full_exe_name = "../../vanadis/tests/small/basic-math/sqrt-double/riscv64/sqrt-double"#os.getenv("VANADIS_EXE", "./small/" + testDir + "/" + exe + "/" + isa + "/" + exe ) +exe_name= full_exe_name.split("/")[-1] + +verbosity = int(os.getenv("VANADIS_VERBOSE", 0)) +os_verbosity = os.getenv("VANADIS_OS_VERBOSE", verbosity) +pipe_trace_file = os.getenv("VANADIS_PIPE_TRACE", "") +lsq_ld_entries = os.getenv("VANADIS_LSQ_LD_ENTRIES", 16) +lsq_st_entries = os.getenv("VANADIS_LSQ_ST_ENTRIES", 8) + +rob_slots = os.getenv("VANADIS_ROB_SLOTS", 64) +retires_per_cycle = os.getenv("VANADIS_RETIRES_PER_CYCLE", 4) +issues_per_cycle = os.getenv("VANADIS_ISSUES_PER_CYCLE", 4) +decodes_per_cycle = os.getenv("VANADIS_DECODES_PER_CYCLE", 4) + +integer_arith_cycles = int(os.getenv("VANADIS_INTEGER_ARITH_CYCLES", 2)) +integer_arith_units = int(os.getenv("VANADIS_INTEGER_ARITH_UNITS", 2)) +fp_arith_cycles = int(os.getenv("VANADIS_FP_ARITH_CYCLES", 8)) +fp_arith_units = int(os.getenv("VANADIS_FP_ARITH_UNITS", 2)) +branch_arith_cycles = int(os.getenv("VANADIS_BRANCH_ARITH_CYCLES", 2)) + +cpu_clock = os.getenv("VANADIS_CPU_CLOCK", "2.3GHz") + +numCpus = int(os.getenv("VANADIS_NUM_CORES", 1)) +numThreads = int(os.getenv("VANADIS_NUM_HW_THREADS", 1)) + +vanadis_cpu_type = "vanadis." +vanadis_cpu_type += os.getenv("VANADIS_CPU_ELEMENT_NAME","dbg_VanadisCPU") + +if (verbosity > 0): + print("Verbosity: " + str(verbosity) + " -> loading Vanadis CPU type: " + vanadis_cpu_type) + print("Auto-clock syscalls: " + str(auto_clock_sys)) +# vanadis_cpu_type = "vanadisdbg.VanadisCPU" + +app_args = os.getenv("VANADIS_EXE_ARGS", "") + +app_params = {} +if app_args != "": + app_args_list = app_args.split(" ") + # We have a plus 1 because the executable name is arg0 + app_args_count = len( app_args_list ) + 1 + + app_params["argc"] = app_args_count + + if (verbosity > 0): + print("Identified " + str(app_args_count) + " application arguments, adding to input parameters.") + arg_start = 1 + for next_arg in app_args_list: + if (verbosity > 0): + print("arg" + str(arg_start) + " = " + next_arg) + app_params["arg" + str(arg_start)] = next_arg + arg_start = arg_start + 1 +else: + app_params["argc"] = 1 + if (verbosity > 0): + print("No application arguments found, continuing with argc=1") + +vanadis_decoder = "vanadis.Vanadis" + vanadis_isa + "Decoder" +vanadis_os_hdlr = "vanadis.Vanadis" + vanadis_isa + "OSHandler" + + +protocol="MESI" + +# OS related params +osParams = { + "processDebugLevel" : 0, + "dbgLevel" : os_verbosity, + "dbgMask" : 8, + "cores" : numCpus, + "hardwareThreadCount" : numThreads, + "page_size" : 4096, + "physMemSize" : physMemSize, + "useMMU" : True, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +processList = ( + ( 1, { + "env_count" : 1, + "env0" : "OMP_NUM_THREADS={}".format(numCpus*numThreads), + "exe" : full_exe_name, + "arg0" : exe_name, + } ), +) + +processList[0][1].update(app_params) + +osl1cacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +mmuParams = { + "debug_level": 0, + "num_cores": numCpus, + "num_threads": numThreads, + "page_size": 4096, +} + +memRtrParams ={ + "xbar_bw" : "1GB/s", + "link_bw" : "1GB/s", + "input_buf_size" : "2KB", + "num_ports" : str(numCpus+2), + "flit_size" : "72B", + "output_buf_size" : "2KB", + "id" : "0", + "topology" : "merlin.singlerouter" +} + +dirCtrlParams = { + "coherence_protocol" : protocol, + "entry_cache_size" : "1024", + "debug" : mh_debug, + "debug_level" : mh_debug_level, + "addr_range_start" : "0x0", + "addr_range_end" : "0xFFFFFFFF" +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 2, +} + +memCtrlParams = { + "clock" : cpu_clock, + "backend.mem_size" : physMemSize, + "backing" : "malloc", + "initBacking": 1, + "addr_range_start": 0, + "addr_range_end": 0xffffffff, + "debug_level" : mh_debug_level, + "debug" : mh_debug, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +memParams = { + "mem_size" : "4GiB", + "access_time" : "1 ns" +} + +# CPU related params +tlbParams = { + "debug_level": 0, + "hitLatency": 1, + "num_hardware_threads": numThreads, + "num_tlb_entries_per_thread": 64, + "tlb_set_size": 4, +} + +tlbWrapperParams = { + "debug_level": 0, +} + +decoderParams = { + "loader_mode" : loader_mode, + "uop_cache_entries" : 1536, + "predecode_cache_entries" : 4 +} + +osHdlrParams = { } + +branchPredParams = { + "branch_entries" : 32 +} + +cpuParams = { + "clock" : cpu_clock, + "verbose" : verbosity, + "hardware_threads": numThreads, + "physical_fp_registers" : 168 * numThreads, + "physical_integer_registers" : 180 * numThreads, + "integer_arith_cycles" : integer_arith_cycles, + "integer_arith_units" : integer_arith_units, + "fp_arith_cycles" : fp_arith_cycles, + "fp_arith_units" : fp_arith_units, + "branch_unit_cycles" : branch_arith_cycles, + "print_int_reg" : False, + "print_fp_reg" : False, + "pipeline_trace_file" : pipe_trace_file, + "reorder_slots" : rob_slots, + "decodes_per_cycle" : decodes_per_cycle, + "issues_per_cycle" : issues_per_cycle, + "retires_per_cycle" : retires_per_cycle, + "pause_when_retire_address" : os.getenv("VANADIS_HALT_AT_ADDRESS", 0), + "start_verbose_when_issue_address": dbgAddr, + "stop_verbose_when_retire_address": stopDbg, + "print_rob" : False, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +lsqParams = { + "verbose" : verbosity, + "address_mask" : 0xFFFFFFFF, + "max_stores" : lsq_st_entries, + "max_loads" : lsq_ld_entries, +} + +l1dcacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l1icacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "prefetcher" : "cassini.NextBlockPrefetcher", + "prefetcher.reach" : 1, + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l2cacheParams = { + "access_latency_cycles" : "14", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "16", + "cache_line_size" : "64", + "cache_size" : "1MB", + "mshr_latency_cycles": 3, + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} +busParams = { + "bus_frequency" : cpu_clock, +} + +l2memLinkParams = { + "group" : 1, + "network_bw" : "25GB/s" +} + +class CPU_Builder: + def __init__(self): + pass + + # CPU + def build( self, prefix, nodeId, cpuId ): + + if pythonDebug: + print("build {}".format(prefix) ) + + # CPU + cpu = sst.Component(prefix, vanadis_cpu_type) + cpu.addParams( cpuParams ) + cpu.addParam( "core_id", cpuId ) + cpu.enableAllStatistics() + + # CPU.decoder + for n in range(numThreads): + decode = cpu.setSubComponent( "decoder"+str(n), vanadis_decoder ) + decode.addParams( decoderParams ) + + decode.enableAllStatistics() + + # CPU.decoder.osHandler + os_hdlr = decode.setSubComponent( "os_handler", vanadis_os_hdlr ) + os_hdlr.addParams( osHdlrParams ) + + # CPU.decocer.branch_pred + branch_pred = decode.setSubComponent( "branch_unit", "vanadis.VanadisBasicBranchUnit" ) + branch_pred.addParams( branchPredParams ) + branch_pred.enableAllStatistics() + + # CPU.lsq + cpu_lsq = cpu.setSubComponent( "lsq", "vanadis.VanadisBasicLoadStoreQueue" ) + cpu_lsq.addParams(lsqParams) + cpu_lsq.enableAllStatistics() + + # CPU.lsq mem interface which connects to D-cache + cpuDcacheIf = cpu_lsq.setSubComponent( "memory_interface", "memHierarchy.standardInterface" ) + + # CPU.mem interface for I-cache + cpuIcacheIf = cpu.setSubComponent( "mem_interface_inst", "memHierarchy.standardInterface" ) + + # L1 D-cache + cpu_l1dcache = sst.Component(prefix + ".l1dcache", "memHierarchy.Cache") + cpu_l1dcache.addParams( l1dcacheParams ) + + # L2 I-cache + cpu_l1icache = sst.Component( prefix + ".l1icache", "memHierarchy.Cache") + cpu_l1icache.addParams( l1icacheParams ) + + # L2 cache + cpu_l2cache = sst.Component(prefix+".l2cache", "memHierarchy.Cache") + cpu_l2cache.addParams( l2cacheParams ) + + # L2 cache mem interface + l2cache_2_mem = cpu_l2cache.setSubComponent("lowlink", "memHierarchy.MemNIC") + l2cache_2_mem.addParams( l2memLinkParams ) + + # L1 to L2 buss + cache_bus = sst.Component(prefix+".bus", "memHierarchy.Bus") + cache_bus.addParams(busParams) + + # CPU data TLB + dtlbWrapper = sst.Component(prefix+".dtlb", "mmu.tlb_wrapper") + dtlbWrapper.addParams(tlbWrapperParams) +# dtlbWrapper.addParam( "debug_level", 0) + dtlb = dtlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + dtlb.addParams(tlbParams) + + # CPU instruction TLB + itlbWrapper = sst.Component(prefix+".itlb", "mmu.tlb_wrapper") + itlbWrapper.addParams(tlbWrapperParams) +# itlbWrapper.addParam( "debug_level", 0) + itlbWrapper.addParam("exe",True) + itlb = itlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + itlb.addParams(tlbParams) + + # CPU (data) -> TLB -> Cache + link_cpu_dtlb_link = sst.Link(prefix+".link_cpu_dtlb_link") + link_cpu_dtlb_link.connect( (cpuDcacheIf, "lowlink", "1ns"), (dtlbWrapper, "cpu_if", "1ns") ) + link_cpu_dtlb_link.setNoCut() + + # data TLB -> data L1 + link_cpu_l1dcache_link = sst.Link(prefix+".link_cpu_l1dcache_link") + link_cpu_l1dcache_link.connect( (dtlbWrapper, "cache_if", "1ns"), (cpu_l1dcache, "highlink", "1ns") ) + link_cpu_l1dcache_link.setNoCut() + + # CPU (instruction) -> TLB -> Cache + link_cpu_itlb_link = sst.Link(prefix+".link_cpu_itlb_link") + link_cpu_itlb_link.connect( (cpuIcacheIf, "lowlink", "1ns"), (itlbWrapper, "cpu_if", "1ns") ) + link_cpu_itlb_link.setNoCut() + + # instruction TLB -> instruction L1 + link_cpu_l1icache_link = sst.Link(prefix+".link_cpu_l1icache_link") + link_cpu_l1icache_link.connect( (itlbWrapper, "cache_if", "1ns"), (cpu_l1icache, "highlink", "1ns") ) + link_cpu_l1icache_link.setNoCut(); + + # data L1 -> bus + link_l1dcache_l2cache_link = sst.Link(prefix+".link_l1dcache_l2cache_link") + link_l1dcache_l2cache_link.connect( (cpu_l1dcache, "lowlink", "1ns"), (cache_bus, "highlink0", "1ns") ) + link_l1dcache_l2cache_link.setNoCut() + + # instruction L1 -> bus + link_l1icache_l2cache_link = sst.Link(prefix+".link_l1icache_l2cache_link") + link_l1icache_l2cache_link.connect( (cpu_l1icache, "lowlink", "1ns"), (cache_bus, "highlink1", "1ns") ) + link_l1icache_l2cache_link.setNoCut() + + # BUS to L2 cache + link_bus_l2cache_link = sst.Link(prefix+".link_bus_l2cache_link") + link_bus_l2cache_link.connect( (cache_bus, "lowlink0", "1ns"), (cpu_l2cache, "highlink", "1ns") ) + link_bus_l2cache_link.setNoCut() + + return (cpu, "os_link", "5ns"), (l2cache_2_mem, "port", "1ns") , (dtlb, "mmu", "1ns"), (itlb, "mmu", "1ns") + + +def addParamsPrefix(prefix,params): + #print( prefix ) + ret = {} + for key, value in params.items(): + #print( key, value ) + ret[ prefix + "." + key] = value + + #print( ret ) + return ret + +# node OS +node_os = sst.Component("os", "vanadis.VanadisNodeOS") +node_os.addParams(osParams) + +num=0 +for i,process in processList: + #print( process ) + for y in range(i): + #print( "process", num ) + node_os.addParams( addParamsPrefix( "process" + str(num), process ) ) + num+=1 + +if pythonDebug: + print('total hardware threads ' + str(num) ) + +# node OS MMU +node_os_mmu = node_os.setSubComponent( "mmu", "mmu." + mmuType ) +node_os_mmu.addParams(mmuParams) + +# node OS memory interface to L1 data cache +node_os_mem_if = node_os.setSubComponent( "mem_interface", "memHierarchy.standardInterface" ) + +# node OS l1 data cache +os_cache = sst.Component("node_os.cache", "memHierarchy.Cache") +os_cache.addParams(osl1cacheParams) +os_cache_2_mem = os_cache.setSubComponent("lowlink", "memHierarchy.MemNIC") +os_cache_2_mem.addParams( l2memLinkParams ) + +# node memory router +comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router") +comp_chiprtr.addParams(memRtrParams) +comp_chiprtr.setSubComponent("topology","merlin.singlerouter") + +# node directory controller +dirctrl = sst.Component("dirctrl", "memHierarchy.DirectoryController") +dirctrl.addParams(dirCtrlParams) + +# node directory controller port to cpu +dirNIC = dirctrl.setSubComponent("highlink", "memHierarchy.MemNIC") +dirNIC.addParams(dirNicParams) + +# node memory controller +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.addParams( memCtrlParams ) +memctrl.addPortModule("highlink", "carcosa.CorruptMemFaultInjector", { + "install_direction": "Receive", + "regions": ["4D88, 4D90", "4DA0, 4DA8"], + "debug" : 1, + "debug_level": 2 +}) + +# node memory controller backend +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams(memParams) + +# node OS data TLB +#ostlbWrapper = sst.Component("ostlb", "mmu.tlb_wrapper") +#ostlbWrapper.addParams(tlbWrapperParams) +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu.passThroughTLB" ); +#ostlb.addParams(tlbParams) + +# OS (data) -> TLB -> Cache +#link_os_ostlb_link = sst.Link("link_os_ostlb_link") +#link_os_ostlb_link.connect( (node_os_mem_if, "lowlink", "1ns"), (ostlbWrapper, "cpu_if", "1ns") ) + +# Directory controller to memory router +link_dir_2_rtr = sst.Link("link_dir_2_rtr") +link_dir_2_rtr.connect( (comp_chiprtr, "port"+str(numCpus), "1ns"), (dirNIC, "port", "1ns") ) +link_dir_2_rtr.setNoCut() + +# Directory controller to memory controller +link_dir_2_mem = sst.Link("link_dir_2_mem") +link_dir_2_mem.connect( (dirctrl, "lowlink", "1ns"), (memctrl, "highlink", "1ns") ) +link_dir_2_mem.setNoCut() + +# MMU -> ostlb +# don't need when using pass through TLB +#link_mmu_ostlb_link = sst.Link("link_mmu_ostlb_link") +#link_mmu_ostlb_link.connect( (node_os_mmu, "ostlb", "1ns"), (ostlb, "mmu", "1ns") ) + +# ostlb -> os l1 cache +link_os_cache_link = sst.Link("link_os_cache_link") +#link_os_cache_link.connect( (ostlbWrapper, "cache_if", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.connect( (node_os_mem_if, "lowlink", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.setNoCut() + +os_cache_2_rtr = sst.Link("os_cache_2_rtr") +os_cache_2_rtr.connect( (os_cache_2_mem, "port", "1ns"), (comp_chiprtr, "port"+str(numCpus+1), "1ns") ) +os_cache_2_rtr.setNoCut() + +cpuBuilder = CPU_Builder() + +# build all CPUs +nodeId = 0 +for cpu in range(numCpus): + + prefix="node" + str(nodeId) + ".cpu" + str(cpu) + os_hdlr, l2cache, dtlb, itlb = cpuBuilder.build(prefix, nodeId, cpu) + + # MMU -> dtlb + link_mmu_dtlb_link = sst.Link(prefix + ".link_mmu_dtlb_link") + link_mmu_dtlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".dtlb", "1ns"), dtlb ) + + # MMU -> itlb + link_mmu_itlb_link = sst.Link(prefix + ".link_mmu_itlb_link") + link_mmu_itlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".itlb", "1ns"), itlb ) + + # CPU os handler -> node OS + link_core_os_link = sst.Link(prefix + ".link_core_os_link") + link_core_os_link.connect( os_hdlr, (node_os, "core" + str(cpu), "5ns") ) + + # connect cpu L2 to router + link_l2cache_2_rtr = sst.Link(prefix + ".link_l2cache_2_rtr") + link_l2cache_2_rtr.connect( l2cache, (comp_chiprtr, "port" + str(cpu), "1ns") ) + diff --git a/src/sst/elements/carcosa/tests/testCorruptMemDoubleOverlap.py b/src/sst/elements/carcosa/tests/testCorruptMemDoubleOverlap.py new file mode 100644 index 0000000000..c3693a7b21 --- /dev/null +++ b/src/sst/elements/carcosa/tests/testCorruptMemDoubleOverlap.py @@ -0,0 +1,571 @@ +import os +import sst +mh_debug_level=10 +mh_debug=0 +# this has to be a string +dbgAddr="0" +stopDbg="0" + +checkpointDir = "" +checkpoint = "" + +#checkpointDir = "checkpoint0" +#checkpoint = "load" +#checkpoint = "save" + +pythonDebug=False + +vanadis_isa = os.getenv("VANADIS_ISA", "MIPS") +isa="mipsel" +vanadis_isa = os.getenv("VANADIS_ISA", "RISCV64") +isa="riscv64" + +loader_mode = os.getenv("VANADIS_LOADER_MODE", "0") + +testDir="basic-io" +exe = "hello-world" +#exe = "hello-world-cpp" +#exe = "openat" +#exe = "printf-check" +#exe = "read-write" +#exe = "fread-fwrite" +#exe = "unlink" +#exe = "unlinkat" +#exe = "lseek" + +#testDir = "basic-math" +#exe = "sqrt-double" +#exe = "sqrt-float" + +#testDir = "basic-ops" +#exe = "test-branch" +#exe = "test-shift" + +#testDir = "misc" +#exe = "mt-dgemm" +#exe = "stream" +#exe = "stream-fortran" +#exe = "gettime" +#exe = "splitLoad" +#exe = "fork" +#exe = "clone" +#exe = "pthread" +#exe = "openmp" +#exe = "openmp2" +#exe = "uname" +#exe = "mem-test" +#exe = "checkpoint" + +physMemSize = "4GiB" + +tlbType = "simpleTLB" +mmuType = "simpleMMU" + +# Define SST core options +sst.setProgramOption("timebase", "1ps") +sst.setProgramOption("stop-at", "0 ns") + +# Tell SST what statistics handling we want +sst.setStatisticLoadLevel(4) +sst.setStatisticOutput("sst.statOutputConsole") + +full_exe_name = "../../vanadis/tests/small/basic-math/sqrt-double/riscv64/sqrt-double"#os.getenv("VANADIS_EXE", "./small/" + testDir + "/" + exe + "/" + isa + "/" + exe ) +exe_name= full_exe_name.split("/")[-1] + +verbosity = int(os.getenv("VANADIS_VERBOSE", 0)) +os_verbosity = os.getenv("VANADIS_OS_VERBOSE", verbosity) +pipe_trace_file = os.getenv("VANADIS_PIPE_TRACE", "") +lsq_ld_entries = os.getenv("VANADIS_LSQ_LD_ENTRIES", 16) +lsq_st_entries = os.getenv("VANADIS_LSQ_ST_ENTRIES", 8) + +rob_slots = os.getenv("VANADIS_ROB_SLOTS", 64) +retires_per_cycle = os.getenv("VANADIS_RETIRES_PER_CYCLE", 4) +issues_per_cycle = os.getenv("VANADIS_ISSUES_PER_CYCLE", 4) +decodes_per_cycle = os.getenv("VANADIS_DECODES_PER_CYCLE", 4) + +integer_arith_cycles = int(os.getenv("VANADIS_INTEGER_ARITH_CYCLES", 2)) +integer_arith_units = int(os.getenv("VANADIS_INTEGER_ARITH_UNITS", 2)) +fp_arith_cycles = int(os.getenv("VANADIS_FP_ARITH_CYCLES", 8)) +fp_arith_units = int(os.getenv("VANADIS_FP_ARITH_UNITS", 2)) +branch_arith_cycles = int(os.getenv("VANADIS_BRANCH_ARITH_CYCLES", 2)) + +cpu_clock = os.getenv("VANADIS_CPU_CLOCK", "2.3GHz") + +numCpus = int(os.getenv("VANADIS_NUM_CORES", 1)) +numThreads = int(os.getenv("VANADIS_NUM_HW_THREADS", 1)) + +vanadis_cpu_type = "vanadis." +vanadis_cpu_type += os.getenv("VANADIS_CPU_ELEMENT_NAME","dbg_VanadisCPU") + +if (verbosity > 0): + print("Verbosity: " + str(verbosity) + " -> loading Vanadis CPU type: " + vanadis_cpu_type) + print("Auto-clock syscalls: " + str(auto_clock_sys)) +# vanadis_cpu_type = "vanadisdbg.VanadisCPU" + +app_args = os.getenv("VANADIS_EXE_ARGS", "") + +app_params = {} +if app_args != "": + app_args_list = app_args.split(" ") + # We have a plus 1 because the executable name is arg0 + app_args_count = len( app_args_list ) + 1 + + app_params["argc"] = app_args_count + + if (verbosity > 0): + print("Identified " + str(app_args_count) + " application arguments, adding to input parameters.") + arg_start = 1 + for next_arg in app_args_list: + if (verbosity > 0): + print("arg" + str(arg_start) + " = " + next_arg) + app_params["arg" + str(arg_start)] = next_arg + arg_start = arg_start + 1 +else: + app_params["argc"] = 1 + if (verbosity > 0): + print("No application arguments found, continuing with argc=1") + +vanadis_decoder = "vanadis.Vanadis" + vanadis_isa + "Decoder" +vanadis_os_hdlr = "vanadis.Vanadis" + vanadis_isa + "OSHandler" + + +protocol="MESI" + +# OS related params +osParams = { + "processDebugLevel" : 0, + "dbgLevel" : os_verbosity, + "dbgMask" : 8, + "cores" : numCpus, + "hardwareThreadCount" : numThreads, + "page_size" : 4096, + "physMemSize" : physMemSize, + "useMMU" : True, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +processList = ( + ( 1, { + "env_count" : 1, + "env0" : "OMP_NUM_THREADS={}".format(numCpus*numThreads), + "exe" : full_exe_name, + "arg0" : exe_name, + } ), +) + +processList[0][1].update(app_params) + +osl1cacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +mmuParams = { + "debug_level": 0, + "num_cores": numCpus, + "num_threads": numThreads, + "page_size": 4096, +} + +memRtrParams ={ + "xbar_bw" : "1GB/s", + "link_bw" : "1GB/s", + "input_buf_size" : "2KB", + "num_ports" : str(numCpus+2), + "flit_size" : "72B", + "output_buf_size" : "2KB", + "id" : "0", + "topology" : "merlin.singlerouter" +} + +dirCtrlParams = { + "coherence_protocol" : protocol, + "entry_cache_size" : "1024", + "debug" : mh_debug, + "debug_level" : mh_debug_level, + "addr_range_start" : "0x0", + "addr_range_end" : "0xFFFFFFFF" +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 2, +} + +memCtrlParams = { + "clock" : cpu_clock, + "backend.mem_size" : physMemSize, + "backing" : "malloc", + "initBacking": 1, + "addr_range_start": 0, + "addr_range_end": 0xffffffff, + "debug_level" : mh_debug_level, + "debug" : mh_debug, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +memParams = { + "mem_size" : "4GiB", + "access_time" : "1 ns" +} + +# CPU related params +tlbParams = { + "debug_level": 0, + "hitLatency": 1, + "num_hardware_threads": numThreads, + "num_tlb_entries_per_thread": 64, + "tlb_set_size": 4, +} + +tlbWrapperParams = { + "debug_level": 0, +} + +decoderParams = { + "loader_mode" : loader_mode, + "uop_cache_entries" : 1536, + "predecode_cache_entries" : 4 +} + +osHdlrParams = { } + +branchPredParams = { + "branch_entries" : 32 +} + +cpuParams = { + "clock" : cpu_clock, + "verbose" : verbosity, + "hardware_threads": numThreads, + "physical_fp_registers" : 168 * numThreads, + "physical_integer_registers" : 180 * numThreads, + "integer_arith_cycles" : integer_arith_cycles, + "integer_arith_units" : integer_arith_units, + "fp_arith_cycles" : fp_arith_cycles, + "fp_arith_units" : fp_arith_units, + "branch_unit_cycles" : branch_arith_cycles, + "print_int_reg" : False, + "print_fp_reg" : False, + "pipeline_trace_file" : pipe_trace_file, + "reorder_slots" : rob_slots, + "decodes_per_cycle" : decodes_per_cycle, + "issues_per_cycle" : issues_per_cycle, + "retires_per_cycle" : retires_per_cycle, + "pause_when_retire_address" : os.getenv("VANADIS_HALT_AT_ADDRESS", 0), + "start_verbose_when_issue_address": dbgAddr, + "stop_verbose_when_retire_address": stopDbg, + "print_rob" : False, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +lsqParams = { + "verbose" : verbosity, + "address_mask" : 0xFFFFFFFF, + "max_stores" : lsq_st_entries, + "max_loads" : lsq_ld_entries, +} + +l1dcacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l1icacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "prefetcher" : "cassini.NextBlockPrefetcher", + "prefetcher.reach" : 1, + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l2cacheParams = { + "access_latency_cycles" : "14", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "16", + "cache_line_size" : "64", + "cache_size" : "1MB", + "mshr_latency_cycles": 3, + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} +busParams = { + "bus_frequency" : cpu_clock, +} + +l2memLinkParams = { + "group" : 1, + "network_bw" : "25GB/s" +} + +class CPU_Builder: + def __init__(self): + pass + + # CPU + def build( self, prefix, nodeId, cpuId ): + + if pythonDebug: + print("build {}".format(prefix) ) + + # CPU + cpu = sst.Component(prefix, vanadis_cpu_type) + cpu.addParams( cpuParams ) + cpu.addParam( "core_id", cpuId ) + cpu.enableAllStatistics() + + # CPU.decoder + for n in range(numThreads): + decode = cpu.setSubComponent( "decoder"+str(n), vanadis_decoder ) + decode.addParams( decoderParams ) + + decode.enableAllStatistics() + + # CPU.decoder.osHandler + os_hdlr = decode.setSubComponent( "os_handler", vanadis_os_hdlr ) + os_hdlr.addParams( osHdlrParams ) + + # CPU.decocer.branch_pred + branch_pred = decode.setSubComponent( "branch_unit", "vanadis.VanadisBasicBranchUnit" ) + branch_pred.addParams( branchPredParams ) + branch_pred.enableAllStatistics() + + # CPU.lsq + cpu_lsq = cpu.setSubComponent( "lsq", "vanadis.VanadisBasicLoadStoreQueue" ) + cpu_lsq.addParams(lsqParams) + cpu_lsq.enableAllStatistics() + + # CPU.lsq mem interface which connects to D-cache + cpuDcacheIf = cpu_lsq.setSubComponent( "memory_interface", "memHierarchy.standardInterface" ) + + # CPU.mem interface for I-cache + cpuIcacheIf = cpu.setSubComponent( "mem_interface_inst", "memHierarchy.standardInterface" ) + + # L1 D-cache + cpu_l1dcache = sst.Component(prefix + ".l1dcache", "memHierarchy.Cache") + cpu_l1dcache.addParams( l1dcacheParams ) + + # L2 I-cache + cpu_l1icache = sst.Component( prefix + ".l1icache", "memHierarchy.Cache") + cpu_l1icache.addParams( l1icacheParams ) + + # L2 cache + cpu_l2cache = sst.Component(prefix+".l2cache", "memHierarchy.Cache") + cpu_l2cache.addParams( l2cacheParams ) + + # L2 cache mem interface + l2cache_2_mem = cpu_l2cache.setSubComponent("lowlink", "memHierarchy.MemNIC") + l2cache_2_mem.addParams( l2memLinkParams ) + + # L1 to L2 buss + cache_bus = sst.Component(prefix+".bus", "memHierarchy.Bus") + cache_bus.addParams(busParams) + + # CPU data TLB + dtlbWrapper = sst.Component(prefix+".dtlb", "mmu.tlb_wrapper") + dtlbWrapper.addParams(tlbWrapperParams) +# dtlbWrapper.addParam( "debug_level", 0) + dtlb = dtlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + dtlb.addParams(tlbParams) + + # CPU instruction TLB + itlbWrapper = sst.Component(prefix+".itlb", "mmu.tlb_wrapper") + itlbWrapper.addParams(tlbWrapperParams) +# itlbWrapper.addParam( "debug_level", 0) + itlbWrapper.addParam("exe",True) + itlb = itlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + itlb.addParams(tlbParams) + + # CPU (data) -> TLB -> Cache + link_cpu_dtlb_link = sst.Link(prefix+".link_cpu_dtlb_link") + link_cpu_dtlb_link.connect( (cpuDcacheIf, "lowlink", "1ns"), (dtlbWrapper, "cpu_if", "1ns") ) + link_cpu_dtlb_link.setNoCut() + + # data TLB -> data L1 + link_cpu_l1dcache_link = sst.Link(prefix+".link_cpu_l1dcache_link") + link_cpu_l1dcache_link.connect( (dtlbWrapper, "cache_if", "1ns"), (cpu_l1dcache, "highlink", "1ns") ) + link_cpu_l1dcache_link.setNoCut() + + # CPU (instruction) -> TLB -> Cache + link_cpu_itlb_link = sst.Link(prefix+".link_cpu_itlb_link") + link_cpu_itlb_link.connect( (cpuIcacheIf, "lowlink", "1ns"), (itlbWrapper, "cpu_if", "1ns") ) + link_cpu_itlb_link.setNoCut() + + # instruction TLB -> instruction L1 + link_cpu_l1icache_link = sst.Link(prefix+".link_cpu_l1icache_link") + link_cpu_l1icache_link.connect( (itlbWrapper, "cache_if", "1ns"), (cpu_l1icache, "highlink", "1ns") ) + link_cpu_l1icache_link.setNoCut(); + + # data L1 -> bus + link_l1dcache_l2cache_link = sst.Link(prefix+".link_l1dcache_l2cache_link") + link_l1dcache_l2cache_link.connect( (cpu_l1dcache, "lowlink", "1ns"), (cache_bus, "highlink0", "1ns") ) + link_l1dcache_l2cache_link.setNoCut() + + # instruction L1 -> bus + link_l1icache_l2cache_link = sst.Link(prefix+".link_l1icache_l2cache_link") + link_l1icache_l2cache_link.connect( (cpu_l1icache, "lowlink", "1ns"), (cache_bus, "highlink1", "1ns") ) + link_l1icache_l2cache_link.setNoCut() + + # BUS to L2 cache + link_bus_l2cache_link = sst.Link(prefix+".link_bus_l2cache_link") + link_bus_l2cache_link.connect( (cache_bus, "lowlink0", "1ns"), (cpu_l2cache, "highlink", "1ns") ) + link_bus_l2cache_link.setNoCut() + + return (cpu, "os_link", "5ns"), (l2cache_2_mem, "port", "1ns") , (dtlb, "mmu", "1ns"), (itlb, "mmu", "1ns") + + +def addParamsPrefix(prefix,params): + #print( prefix ) + ret = {} + for key, value in params.items(): + #print( key, value ) + ret[ prefix + "." + key] = value + + #print( ret ) + return ret + +# node OS +node_os = sst.Component("os", "vanadis.VanadisNodeOS") +node_os.addParams(osParams) + +num=0 +for i,process in processList: + #print( process ) + for y in range(i): + #print( "process", num ) + node_os.addParams( addParamsPrefix( "process" + str(num), process ) ) + num+=1 + +if pythonDebug: + print('total hardware threads ' + str(num) ) + +# node OS MMU +node_os_mmu = node_os.setSubComponent( "mmu", "mmu." + mmuType ) +node_os_mmu.addParams(mmuParams) + +# node OS memory interface to L1 data cache +node_os_mem_if = node_os.setSubComponent( "mem_interface", "memHierarchy.standardInterface" ) + +# node OS l1 data cache +os_cache = sst.Component("node_os.cache", "memHierarchy.Cache") +os_cache.addParams(osl1cacheParams) +os_cache_2_mem = os_cache.setSubComponent("lowlink", "memHierarchy.MemNIC") +os_cache_2_mem.addParams( l2memLinkParams ) + +# node memory router +comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router") +comp_chiprtr.addParams(memRtrParams) +comp_chiprtr.setSubComponent("topology","merlin.singlerouter") + +# node directory controller +dirctrl = sst.Component("dirctrl", "memHierarchy.DirectoryController") +dirctrl.addParams(dirCtrlParams) + +# node directory controller port to cpu +dirNIC = dirctrl.setSubComponent("highlink", "memHierarchy.MemNIC") +dirNIC.addParams(dirNicParams) + +# node memory controller +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.addParams( memCtrlParams ) +#TODO: fix bug here :) +memctrl.addPortModule("highlink", "carcosa.CorruptMemFaultInjector", { + "install_direction": "Receive", + "regions": ["4D88, 4DA0", "4D90, 4DA8"], + "debug" : 1, + "debug_level": 2 +}) + +# node memory controller backend +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams(memParams) + +# node OS data TLB +#ostlbWrapper = sst.Component("ostlb", "mmu.tlb_wrapper") +#ostlbWrapper.addParams(tlbWrapperParams) +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu.passThroughTLB" ); +#ostlb.addParams(tlbParams) + +# OS (data) -> TLB -> Cache +#link_os_ostlb_link = sst.Link("link_os_ostlb_link") +#link_os_ostlb_link.connect( (node_os_mem_if, "lowlink", "1ns"), (ostlbWrapper, "cpu_if", "1ns") ) + +# Directory controller to memory router +link_dir_2_rtr = sst.Link("link_dir_2_rtr") +link_dir_2_rtr.connect( (comp_chiprtr, "port"+str(numCpus), "1ns"), (dirNIC, "port", "1ns") ) +link_dir_2_rtr.setNoCut() + +# Directory controller to memory controller +link_dir_2_mem = sst.Link("link_dir_2_mem") +link_dir_2_mem.connect( (dirctrl, "lowlink", "1ns"), (memctrl, "highlink", "1ns") ) +link_dir_2_mem.setNoCut() + +# MMU -> ostlb +# don't need when using pass through TLB +#link_mmu_ostlb_link = sst.Link("link_mmu_ostlb_link") +#link_mmu_ostlb_link.connect( (node_os_mmu, "ostlb", "1ns"), (ostlb, "mmu", "1ns") ) + +# ostlb -> os l1 cache +link_os_cache_link = sst.Link("link_os_cache_link") +#link_os_cache_link.connect( (ostlbWrapper, "cache_if", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.connect( (node_os_mem_if, "lowlink", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.setNoCut() + +os_cache_2_rtr = sst.Link("os_cache_2_rtr") +os_cache_2_rtr.connect( (os_cache_2_mem, "port", "1ns"), (comp_chiprtr, "port"+str(numCpus+1), "1ns") ) +os_cache_2_rtr.setNoCut() + +cpuBuilder = CPU_Builder() + +# build all CPUs +nodeId = 0 +for cpu in range(numCpus): + + prefix="node" + str(nodeId) + ".cpu" + str(cpu) + os_hdlr, l2cache, dtlb, itlb = cpuBuilder.build(prefix, nodeId, cpu) + + # MMU -> dtlb + link_mmu_dtlb_link = sst.Link(prefix + ".link_mmu_dtlb_link") + link_mmu_dtlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".dtlb", "1ns"), dtlb ) + + # MMU -> itlb + link_mmu_itlb_link = sst.Link(prefix + ".link_mmu_itlb_link") + link_mmu_itlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".itlb", "1ns"), itlb ) + + # CPU os handler -> node OS + link_core_os_link = sst.Link(prefix + ".link_core_os_link") + link_core_os_link.connect( os_hdlr, (node_os, "core" + str(cpu), "5ns") ) + + # connect cpu L2 to router + link_l2cache_2_rtr = sst.Link(prefix + ".link_l2cache_2_rtr") + link_l2cache_2_rtr.connect( l2cache, (comp_chiprtr, "port" + str(cpu), "1ns") ) + diff --git a/src/sst/elements/carcosa/tests/testRandomDrop.py b/src/sst/elements/carcosa/tests/testRandomDrop.py new file mode 100644 index 0000000000..716086b8de --- /dev/null +++ b/src/sst/elements/carcosa/tests/testRandomDrop.py @@ -0,0 +1,571 @@ +import os +import sst +mh_debug_level=10 +mh_debug=0 +# this has to be a string +dbgAddr="0" +stopDbg="0" + +checkpointDir = "" +checkpoint = "" + +#checkpointDir = "checkpoint0" +#checkpoint = "load" +#checkpoint = "save" + +pythonDebug=False + +vanadis_isa = os.getenv("VANADIS_ISA", "MIPS") +isa="mipsel" +vanadis_isa = os.getenv("VANADIS_ISA", "RISCV64") +isa="riscv64" + +loader_mode = os.getenv("VANADIS_LOADER_MODE", "0") + +testDir="basic-io" +exe = "hello-world" +#exe = "hello-world-cpp" +#exe = "openat" +#exe = "printf-check" +#exe = "read-write" +#exe = "fread-fwrite" +#exe = "unlink" +#exe = "unlinkat" +#exe = "lseek" + +#testDir = "basic-math" +#exe = "sqrt-double" +#exe = "sqrt-float" + +#testDir = "basic-ops" +#exe = "test-branch" +#exe = "test-shift" + +#testDir = "misc" +#exe = "mt-dgemm" +#exe = "stream" +#exe = "stream-fortran" +#exe = "gettime" +#exe = "splitLoad" +#exe = "fork" +#exe = "clone" +#exe = "pthread" +#exe = "openmp" +#exe = "openmp2" +#exe = "uname" +#exe = "mem-test" +#exe = "checkpoint" + +physMemSize = "4GiB" + +tlbType = "simpleTLB" +mmuType = "simpleMMU" + +# Define SST core options +sst.setProgramOption("timebase", "1ps") +sst.setProgramOption("stop-at", "0 ns") + +# Tell SST what statistics handling we want +sst.setStatisticLoadLevel(4) +sst.setStatisticOutput("sst.statOutputConsole") + +full_exe_name = "../../vanadis/tests/small/basic-math/sqrt-double/riscv64/sqrt-double"#os.getenv("VANADIS_EXE", "./small/" + testDir + "/" + exe + "/" + isa + "/" + exe ) +exe_name= full_exe_name.split("/")[-1] + +verbosity = int(os.getenv("VANADIS_VERBOSE", 0)) +os_verbosity = os.getenv("VANADIS_OS_VERBOSE", verbosity) +pipe_trace_file = os.getenv("VANADIS_PIPE_TRACE", "") +lsq_ld_entries = os.getenv("VANADIS_LSQ_LD_ENTRIES", 16) +lsq_st_entries = os.getenv("VANADIS_LSQ_ST_ENTRIES", 8) + +rob_slots = os.getenv("VANADIS_ROB_SLOTS", 64) +retires_per_cycle = os.getenv("VANADIS_RETIRES_PER_CYCLE", 4) +issues_per_cycle = os.getenv("VANADIS_ISSUES_PER_CYCLE", 4) +decodes_per_cycle = os.getenv("VANADIS_DECODES_PER_CYCLE", 4) + +integer_arith_cycles = int(os.getenv("VANADIS_INTEGER_ARITH_CYCLES", 2)) +integer_arith_units = int(os.getenv("VANADIS_INTEGER_ARITH_UNITS", 2)) +fp_arith_cycles = int(os.getenv("VANADIS_FP_ARITH_CYCLES", 8)) +fp_arith_units = int(os.getenv("VANADIS_FP_ARITH_UNITS", 2)) +branch_arith_cycles = int(os.getenv("VANADIS_BRANCH_ARITH_CYCLES", 2)) + +cpu_clock = os.getenv("VANADIS_CPU_CLOCK", "2.3GHz") + +numCpus = int(os.getenv("VANADIS_NUM_CORES", 1)) +numThreads = int(os.getenv("VANADIS_NUM_HW_THREADS", 1)) + +vanadis_cpu_type = "vanadis." +vanadis_cpu_type += os.getenv("VANADIS_CPU_ELEMENT_NAME","dbg_VanadisCPU") + +if (verbosity > 0): + print("Verbosity: " + str(verbosity) + " -> loading Vanadis CPU type: " + vanadis_cpu_type) + print("Auto-clock syscalls: " + str(auto_clock_sys)) +# vanadis_cpu_type = "vanadisdbg.VanadisCPU" + +app_args = os.getenv("VANADIS_EXE_ARGS", "") + +app_params = {} +if app_args != "": + app_args_list = app_args.split(" ") + # We have a plus 1 because the executable name is arg0 + app_args_count = len( app_args_list ) + 1 + + app_params["argc"] = app_args_count + + if (verbosity > 0): + print("Identified " + str(app_args_count) + " application arguments, adding to input parameters.") + arg_start = 1 + for next_arg in app_args_list: + if (verbosity > 0): + print("arg" + str(arg_start) + " = " + next_arg) + app_params["arg" + str(arg_start)] = next_arg + arg_start = arg_start + 1 +else: + app_params["argc"] = 1 + if (verbosity > 0): + print("No application arguments found, continuing with argc=1") + +vanadis_decoder = "vanadis.Vanadis" + vanadis_isa + "Decoder" +vanadis_os_hdlr = "vanadis.Vanadis" + vanadis_isa + "OSHandler" + + +protocol="MESI" + +# OS related params +osParams = { + "processDebugLevel" : 0, + "dbgLevel" : os_verbosity, + "dbgMask" : 8, + "cores" : numCpus, + "hardwareThreadCount" : numThreads, + "page_size" : 4096, + "physMemSize" : physMemSize, + "useMMU" : True, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +processList = ( + ( 1, { + "env_count" : 1, + "env0" : "OMP_NUM_THREADS={}".format(numCpus*numThreads), + "exe" : full_exe_name, + "arg0" : exe_name, + } ), +) + +processList[0][1].update(app_params) + +osl1cacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +mmuParams = { + "debug_level": 0, + "num_cores": numCpus, + "num_threads": numThreads, + "page_size": 4096, +} + +memRtrParams ={ + "xbar_bw" : "1GB/s", + "link_bw" : "1GB/s", + "input_buf_size" : "2KB", + "num_ports" : str(numCpus+2), + "flit_size" : "72B", + "output_buf_size" : "2KB", + "id" : "0", + "topology" : "merlin.singlerouter" +} + +dirCtrlParams = { + "coherence_protocol" : protocol, + "entry_cache_size" : "1024", + "debug" : mh_debug, + "debug_level" : mh_debug_level, + "addr_range_start" : "0x0", + "addr_range_end" : "0xFFFFFFFF" +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 2, +} + +memCtrlParams = { + "clock" : cpu_clock, + "backend.mem_size" : physMemSize, + "backing" : "malloc", + "initBacking": 1, + "addr_range_start": 0, + "addr_range_end": 0xffffffff, + "debug_level" : mh_debug_level, + "debug" : mh_debug, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +memParams = { + "mem_size" : "4GiB", + "access_time" : "1 ns" +} + +# CPU related params +tlbParams = { + "debug_level": 0, + "hitLatency": 1, + "num_hardware_threads": numThreads, + "num_tlb_entries_per_thread": 64, + "tlb_set_size": 4, +} + +tlbWrapperParams = { + "debug_level": 0, +} + +decoderParams = { + "loader_mode" : loader_mode, + "uop_cache_entries" : 1536, + "predecode_cache_entries" : 4 +} + +osHdlrParams = { } + +branchPredParams = { + "branch_entries" : 32 +} + +cpuParams = { + "clock" : cpu_clock, + "verbose" : verbosity, + "hardware_threads": numThreads, + "physical_fp_registers" : 168 * numThreads, + "physical_integer_registers" : 180 * numThreads, + "integer_arith_cycles" : integer_arith_cycles, + "integer_arith_units" : integer_arith_units, + "fp_arith_cycles" : fp_arith_cycles, + "fp_arith_units" : fp_arith_units, + "branch_unit_cycles" : branch_arith_cycles, + "print_int_reg" : False, + "print_fp_reg" : False, + "pipeline_trace_file" : pipe_trace_file, + "reorder_slots" : rob_slots, + "decodes_per_cycle" : decodes_per_cycle, + "issues_per_cycle" : issues_per_cycle, + "retires_per_cycle" : retires_per_cycle, + "pause_when_retire_address" : os.getenv("VANADIS_HALT_AT_ADDRESS", 0), + "start_verbose_when_issue_address": dbgAddr, + "stop_verbose_when_retire_address": stopDbg, + "print_rob" : False, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +lsqParams = { + "verbose" : verbosity, + "address_mask" : 0xFFFFFFFF, + "max_stores" : lsq_st_entries, + "max_loads" : lsq_ld_entries, +} + +l1dcacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l1icacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "prefetcher" : "cassini.NextBlockPrefetcher", + "prefetcher.reach" : 1, + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l2cacheParams = { + "access_latency_cycles" : "14", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "16", + "cache_line_size" : "64", + "cache_size" : "1MB", + "mshr_latency_cycles": 3, + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} +busParams = { + "bus_frequency" : cpu_clock, +} + +l2memLinkParams = { + "group" : 1, + "network_bw" : "25GB/s" +} + +class CPU_Builder: + def __init__(self): + pass + + # CPU + def build( self, prefix, nodeId, cpuId ): + + if pythonDebug: + print("build {}".format(prefix) ) + + # CPU + cpu = sst.Component(prefix, vanadis_cpu_type) + cpu.addParams( cpuParams ) + cpu.addParam( "core_id", cpuId ) + cpu.enableAllStatistics() + + # CPU.decoder + for n in range(numThreads): + decode = cpu.setSubComponent( "decoder"+str(n), vanadis_decoder ) + decode.addParams( decoderParams ) + + decode.enableAllStatistics() + + # CPU.decoder.osHandler + os_hdlr = decode.setSubComponent( "os_handler", vanadis_os_hdlr ) + os_hdlr.addParams( osHdlrParams ) + + # CPU.decocer.branch_pred + branch_pred = decode.setSubComponent( "branch_unit", "vanadis.VanadisBasicBranchUnit" ) + branch_pred.addParams( branchPredParams ) + branch_pred.enableAllStatistics() + + # CPU.lsq + cpu_lsq = cpu.setSubComponent( "lsq", "vanadis.VanadisBasicLoadStoreQueue" ) + cpu_lsq.addParams(lsqParams) + cpu_lsq.enableAllStatistics() + + # CPU.lsq mem interface which connects to D-cache + cpuDcacheIf = cpu_lsq.setSubComponent( "memory_interface", "memHierarchy.standardInterface" ) + + # CPU.mem interface for I-cache + cpuIcacheIf = cpu.setSubComponent( "mem_interface_inst", "memHierarchy.standardInterface" ) + + # L1 D-cache + cpu_l1dcache = sst.Component(prefix + ".l1dcache", "memHierarchy.Cache") + cpu_l1dcache.addParams( l1dcacheParams ) + + # L2 I-cache + cpu_l1icache = sst.Component( prefix + ".l1icache", "memHierarchy.Cache") + cpu_l1icache.addParams( l1icacheParams ) + + # L2 cache + cpu_l2cache = sst.Component(prefix+".l2cache", "memHierarchy.Cache") + cpu_l2cache.addParams( l2cacheParams ) + + # L2 cache mem interface + l2cache_2_mem = cpu_l2cache.setSubComponent("lowlink", "memHierarchy.MemNIC") + l2cache_2_mem.addParams( l2memLinkParams ) + + # L1 to L2 buss + cache_bus = sst.Component(prefix+".bus", "memHierarchy.Bus") + cache_bus.addParams(busParams) + + # CPU data TLB + dtlbWrapper = sst.Component(prefix+".dtlb", "mmu.tlb_wrapper") + dtlbWrapper.addParams(tlbWrapperParams) +# dtlbWrapper.addParam( "debug_level", 0) + dtlb = dtlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + dtlb.addParams(tlbParams) + + # CPU instruction TLB + itlbWrapper = sst.Component(prefix+".itlb", "mmu.tlb_wrapper") + itlbWrapper.addParams(tlbWrapperParams) +# itlbWrapper.addParam( "debug_level", 0) + itlbWrapper.addParam("exe",True) + itlb = itlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + itlb.addParams(tlbParams) + + # CPU (data) -> TLB -> Cache + link_cpu_dtlb_link = sst.Link(prefix+".link_cpu_dtlb_link") + link_cpu_dtlb_link.connect( (cpuDcacheIf, "lowlink", "1ns"), (dtlbWrapper, "cpu_if", "1ns") ) + link_cpu_dtlb_link.setNoCut() + + # data TLB -> data L1 + link_cpu_l1dcache_link = sst.Link(prefix+".link_cpu_l1dcache_link") + link_cpu_l1dcache_link.connect( (dtlbWrapper, "cache_if", "1ns"), (cpu_l1dcache, "highlink", "1ns") ) + link_cpu_l1dcache_link.setNoCut() + + # CPU (instruction) -> TLB -> Cache + link_cpu_itlb_link = sst.Link(prefix+".link_cpu_itlb_link") + link_cpu_itlb_link.connect( (cpuIcacheIf, "lowlink", "1ns"), (itlbWrapper, "cpu_if", "1ns") ) + link_cpu_itlb_link.setNoCut() + + # instruction TLB -> instruction L1 + link_cpu_l1icache_link = sst.Link(prefix+".link_cpu_l1icache_link") + link_cpu_l1icache_link.connect( (itlbWrapper, "cache_if", "1ns"), (cpu_l1icache, "highlink", "1ns") ) + link_cpu_l1icache_link.setNoCut(); + + # data L1 -> bus + link_l1dcache_l2cache_link = sst.Link(prefix+".link_l1dcache_l2cache_link") + link_l1dcache_l2cache_link.connect( (cpu_l1dcache, "lowlink", "1ns"), (cache_bus, "highlink0", "1ns") ) + link_l1dcache_l2cache_link.setNoCut() + + # instruction L1 -> bus + link_l1icache_l2cache_link = sst.Link(prefix+".link_l1icache_l2cache_link") + link_l1icache_l2cache_link.connect( (cpu_l1icache, "lowlink", "1ns"), (cache_bus, "highlink1", "1ns") ) + link_l1icache_l2cache_link.setNoCut() + + # BUS to L2 cache + link_bus_l2cache_link = sst.Link(prefix+".link_bus_l2cache_link") + link_bus_l2cache_link.connect( (cache_bus, "lowlink0", "1ns"), (cpu_l2cache, "highlink", "1ns") ) + link_bus_l2cache_link.setNoCut() + + return (cpu, "os_link", "5ns"), (l2cache_2_mem, "port", "1ns") , (dtlb, "mmu", "1ns"), (itlb, "mmu", "1ns") + + +def addParamsPrefix(prefix,params): + #print( prefix ) + ret = {} + for key, value in params.items(): + #print( key, value ) + ret[ prefix + "." + key] = value + + #print( ret ) + return ret + +# node OS +node_os = sst.Component("os", "vanadis.VanadisNodeOS") +node_os.addParams(osParams) + +num=0 +for i,process in processList: + #print( process ) + for y in range(i): + #print( "process", num ) + node_os.addParams( addParamsPrefix( "process" + str(num), process ) ) + num+=1 + +if pythonDebug: + print('total hardware threads ' + str(num) ) + +# node OS MMU +node_os_mmu = node_os.setSubComponent( "mmu", "mmu." + mmuType ) +node_os_mmu.addParams(mmuParams) + +# node OS memory interface to L1 data cache +node_os_mem_if = node_os.setSubComponent( "mem_interface", "memHierarchy.standardInterface" ) + +# node OS l1 data cache +os_cache = sst.Component("node_os.cache", "memHierarchy.Cache") +os_cache.addParams(osl1cacheParams) +os_cache_2_mem = os_cache.setSubComponent("lowlink", "memHierarchy.MemNIC") +os_cache_2_mem.addParams( l2memLinkParams ) + +# node memory router +comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router") +comp_chiprtr.addParams(memRtrParams) +comp_chiprtr.setSubComponent("topology","merlin.singlerouter") + +# node directory controller +dirctrl = sst.Component("dirctrl", "memHierarchy.DirectoryController") +dirctrl.addParams(dirCtrlParams) + +# node directory controller port to cpu +dirNIC = dirctrl.setSubComponent("highlink", "memHierarchy.MemNIC") +dirNIC.addParams(dirNicParams) + +# node memory controller +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.addParams( memCtrlParams ) +# SHOULD FAIL TO INITIALIZE +memctrl.addPortModule("highlink", "carcosa.RandomDropFaultInjector", { + "install_direction": "Receive", + "injection_probability": 0.01, + "debug" : 1, + "debug_level": 2 +}) + +# node memory controller backend +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams(memParams) + +# node OS data TLB +#ostlbWrapper = sst.Component("ostlb", "mmu.tlb_wrapper") +#ostlbWrapper.addParams(tlbWrapperParams) +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu.passThroughTLB" ); +#ostlb.addParams(tlbParams) + +# OS (data) -> TLB -> Cache +#link_os_ostlb_link = sst.Link("link_os_ostlb_link") +#link_os_ostlb_link.connect( (node_os_mem_if, "lowlink", "1ns"), (ostlbWrapper, "cpu_if", "1ns") ) + +# Directory controller to memory router +link_dir_2_rtr = sst.Link("link_dir_2_rtr") +link_dir_2_rtr.connect( (comp_chiprtr, "port"+str(numCpus), "1ns"), (dirNIC, "port", "1ns") ) +link_dir_2_rtr.setNoCut() + +# Directory controller to memory controller +link_dir_2_mem = sst.Link("link_dir_2_mem") +link_dir_2_mem.connect( (dirctrl, "lowlink", "1ns"), (memctrl, "highlink", "1ns") ) +link_dir_2_mem.setNoCut() + +# MMU -> ostlb +# don't need when using pass through TLB +#link_mmu_ostlb_link = sst.Link("link_mmu_ostlb_link") +#link_mmu_ostlb_link.connect( (node_os_mmu, "ostlb", "1ns"), (ostlb, "mmu", "1ns") ) + +# ostlb -> os l1 cache +link_os_cache_link = sst.Link("link_os_cache_link") +#link_os_cache_link.connect( (ostlbWrapper, "cache_if", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.connect( (node_os_mem_if, "lowlink", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.setNoCut() + +os_cache_2_rtr = sst.Link("os_cache_2_rtr") +os_cache_2_rtr.connect( (os_cache_2_mem, "port", "1ns"), (comp_chiprtr, "port"+str(numCpus+1), "1ns") ) +os_cache_2_rtr.setNoCut() + +cpuBuilder = CPU_Builder() + +# build all CPUs +nodeId = 0 +for cpu in range(numCpus): + + prefix="node" + str(nodeId) + ".cpu" + str(cpu) + os_hdlr, l2cache, dtlb, itlb = cpuBuilder.build(prefix, nodeId, cpu) + + # MMU -> dtlb + link_mmu_dtlb_link = sst.Link(prefix + ".link_mmu_dtlb_link") + link_mmu_dtlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".dtlb", "1ns"), dtlb ) + + # MMU -> itlb + link_mmu_itlb_link = sst.Link(prefix + ".link_mmu_itlb_link") + link_mmu_itlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".itlb", "1ns"), itlb ) + + # CPU os handler -> node OS + link_core_os_link = sst.Link(prefix + ".link_core_os_link") + link_core_os_link.connect( os_hdlr, (node_os, "core" + str(cpu), "5ns") ) + + # connect cpu L2 to router + link_l2cache_2_rtr = sst.Link(prefix + ".link_l2cache_2_rtr") + link_l2cache_2_rtr.connect( l2cache, (comp_chiprtr, "port" + str(cpu), "1ns") ) + diff --git a/src/sst/elements/carcosa/tests/testRandomFlip.py b/src/sst/elements/carcosa/tests/testRandomFlip.py new file mode 100644 index 0000000000..cd251ef2c8 --- /dev/null +++ b/src/sst/elements/carcosa/tests/testRandomFlip.py @@ -0,0 +1,572 @@ +import os +import sst +mh_debug_level=10 +mh_debug=0 +# this has to be a string +dbgAddr="0" +stopDbg="0" + +checkpointDir = "" +checkpoint = "" + +#checkpointDir = "checkpoint0" +#checkpoint = "load" +#checkpoint = "save" + +pythonDebug=False + +vanadis_isa = os.getenv("VANADIS_ISA", "MIPS") +isa="mipsel" +vanadis_isa = os.getenv("VANADIS_ISA", "RISCV64") +isa="riscv64" + +loader_mode = os.getenv("VANADIS_LOADER_MODE", "0") + +testDir="basic-io" +exe = "hello-world" +#exe = "hello-world-cpp" +#exe = "openat" +#exe = "printf-check" +#exe = "read-write" +#exe = "fread-fwrite" +#exe = "unlink" +#exe = "unlinkat" +#exe = "lseek" + +#testDir = "basic-math" +#exe = "sqrt-double" +#exe = "sqrt-float" + +#testDir = "basic-ops" +#exe = "test-branch" +#exe = "test-shift" + +#testDir = "misc" +#exe = "mt-dgemm" +#exe = "stream" +#exe = "stream-fortran" +#exe = "gettime" +#exe = "splitLoad" +#exe = "fork" +#exe = "clone" +#exe = "pthread" +#exe = "openmp" +#exe = "openmp2" +#exe = "uname" +#exe = "mem-test" +#exe = "checkpoint" + +physMemSize = "4GiB" + +tlbType = "simpleTLB" +mmuType = "simpleMMU" + +# Define SST core options +sst.setProgramOption("timebase", "1ps") +sst.setProgramOption("stop-at", "0 ns") + +# Tell SST what statistics handling we want +sst.setStatisticLoadLevel(4) +sst.setStatisticOutput("sst.statOutputConsole") + +full_exe_name = "../../vanadis/tests/small/basic-math/sqrt-double/riscv64/sqrt-double"#os.getenv("VANADIS_EXE", "./small/" + testDir + "/" + exe + "/" + isa + "/" + exe ) +exe_name= full_exe_name.split("/")[-1] + +verbosity = int(os.getenv("VANADIS_VERBOSE", 0)) +os_verbosity = os.getenv("VANADIS_OS_VERBOSE", verbosity) +pipe_trace_file = os.getenv("VANADIS_PIPE_TRACE", "") +lsq_ld_entries = os.getenv("VANADIS_LSQ_LD_ENTRIES", 16) +lsq_st_entries = os.getenv("VANADIS_LSQ_ST_ENTRIES", 8) + +rob_slots = os.getenv("VANADIS_ROB_SLOTS", 64) +retires_per_cycle = os.getenv("VANADIS_RETIRES_PER_CYCLE", 4) +issues_per_cycle = os.getenv("VANADIS_ISSUES_PER_CYCLE", 4) +decodes_per_cycle = os.getenv("VANADIS_DECODES_PER_CYCLE", 4) + +integer_arith_cycles = int(os.getenv("VANADIS_INTEGER_ARITH_CYCLES", 2)) +integer_arith_units = int(os.getenv("VANADIS_INTEGER_ARITH_UNITS", 2)) +fp_arith_cycles = int(os.getenv("VANADIS_FP_ARITH_CYCLES", 8)) +fp_arith_units = int(os.getenv("VANADIS_FP_ARITH_UNITS", 2)) +branch_arith_cycles = int(os.getenv("VANADIS_BRANCH_ARITH_CYCLES", 2)) + +cpu_clock = os.getenv("VANADIS_CPU_CLOCK", "2.3GHz") + +numCpus = int(os.getenv("VANADIS_NUM_CORES", 1)) +numThreads = int(os.getenv("VANADIS_NUM_HW_THREADS", 1)) + +vanadis_cpu_type = "vanadis." +vanadis_cpu_type += os.getenv("VANADIS_CPU_ELEMENT_NAME","dbg_VanadisCPU") + +if (verbosity > 0): + print("Verbosity: " + str(verbosity) + " -> loading Vanadis CPU type: " + vanadis_cpu_type) + print("Auto-clock syscalls: " + str(auto_clock_sys)) +# vanadis_cpu_type = "vanadisdbg.VanadisCPU" + +app_args = os.getenv("VANADIS_EXE_ARGS", "") + +app_params = {} +if app_args != "": + app_args_list = app_args.split(" ") + # We have a plus 1 because the executable name is arg0 + app_args_count = len( app_args_list ) + 1 + + app_params["argc"] = app_args_count + + if (verbosity > 0): + print("Identified " + str(app_args_count) + " application arguments, adding to input parameters.") + arg_start = 1 + for next_arg in app_args_list: + if (verbosity > 0): + print("arg" + str(arg_start) + " = " + next_arg) + app_params["arg" + str(arg_start)] = next_arg + arg_start = arg_start + 1 +else: + app_params["argc"] = 1 + if (verbosity > 0): + print("No application arguments found, continuing with argc=1") + +vanadis_decoder = "vanadis.Vanadis" + vanadis_isa + "Decoder" +vanadis_os_hdlr = "vanadis.Vanadis" + vanadis_isa + "OSHandler" + + +protocol="MESI" + +# OS related params +osParams = { + "processDebugLevel" : 0, + "dbgLevel" : os_verbosity, + "dbgMask" : 8, + "cores" : numCpus, + "hardwareThreadCount" : numThreads, + "page_size" : 4096, + "physMemSize" : physMemSize, + "useMMU" : True, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +processList = ( + ( 1, { + "env_count" : 1, + "env0" : "OMP_NUM_THREADS={}".format(numCpus*numThreads), + "exe" : full_exe_name, + "arg0" : exe_name, + } ), +) + +processList[0][1].update(app_params) + +osl1cacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +mmuParams = { + "debug_level": 0, + "num_cores": numCpus, + "num_threads": numThreads, + "page_size": 4096, +} + +memRtrParams ={ + "xbar_bw" : "1GB/s", + "link_bw" : "1GB/s", + "input_buf_size" : "2KB", + "num_ports" : str(numCpus+2), + "flit_size" : "72B", + "output_buf_size" : "2KB", + "id" : "0", + "topology" : "merlin.singlerouter" +} + +dirCtrlParams = { + "coherence_protocol" : protocol, + "entry_cache_size" : "1024", + "debug" : mh_debug, + "debug_level" : mh_debug_level, + "addr_range_start" : "0x0", + "addr_range_end" : "0xFFFFFFFF" +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 2, +} + +memCtrlParams = { + "clock" : cpu_clock, + "backend.mem_size" : physMemSize, + "backing" : "malloc", + "initBacking": 1, + "addr_range_start": 0, + "addr_range_end": 0xffffffff, + "debug_level" : mh_debug_level, + "debug" : mh_debug, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +memParams = { + "mem_size" : "4GiB", + "access_time" : "1 ns" +} + +# CPU related params +tlbParams = { + "debug_level": 0, + "hitLatency": 1, + "num_hardware_threads": numThreads, + "num_tlb_entries_per_thread": 64, + "tlb_set_size": 4, +} + +tlbWrapperParams = { + "debug_level": 0, +} + +decoderParams = { + "loader_mode" : loader_mode, + "uop_cache_entries" : 1536, + "predecode_cache_entries" : 4 +} + +osHdlrParams = { } + +branchPredParams = { + "branch_entries" : 32 +} + +cpuParams = { + "clock" : cpu_clock, + "verbose" : verbosity, + "hardware_threads": numThreads, + "physical_fp_registers" : 168 * numThreads, + "physical_integer_registers" : 180 * numThreads, + "integer_arith_cycles" : integer_arith_cycles, + "integer_arith_units" : integer_arith_units, + "fp_arith_cycles" : fp_arith_cycles, + "fp_arith_units" : fp_arith_units, + "branch_unit_cycles" : branch_arith_cycles, + "print_int_reg" : False, + "print_fp_reg" : False, + "pipeline_trace_file" : pipe_trace_file, + "reorder_slots" : rob_slots, + "decodes_per_cycle" : decodes_per_cycle, + "issues_per_cycle" : issues_per_cycle, + "retires_per_cycle" : retires_per_cycle, + "pause_when_retire_address" : os.getenv("VANADIS_HALT_AT_ADDRESS", 0), + "start_verbose_when_issue_address": dbgAddr, + "stop_verbose_when_retire_address": stopDbg, + "print_rob" : False, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +lsqParams = { + "verbose" : verbosity, + "address_mask" : 0xFFFFFFFF, + "max_stores" : lsq_st_entries, + "max_loads" : lsq_ld_entries, +} + +l1dcacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l1icacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "prefetcher" : "cassini.NextBlockPrefetcher", + "prefetcher.reach" : 1, + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l2cacheParams = { + "access_latency_cycles" : "14", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "16", + "cache_line_size" : "64", + "cache_size" : "1MB", + "mshr_latency_cycles": 3, + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} +busParams = { + "bus_frequency" : cpu_clock, +} + +l2memLinkParams = { + "group" : 1, + "network_bw" : "25GB/s" +} + +class CPU_Builder: + def __init__(self): + pass + + # CPU + def build( self, prefix, nodeId, cpuId ): + + if pythonDebug: + print("build {}".format(prefix) ) + + # CPU + cpu = sst.Component(prefix, vanadis_cpu_type) + cpu.addParams( cpuParams ) + cpu.addParam( "core_id", cpuId ) + cpu.enableAllStatistics() + + # CPU.decoder + for n in range(numThreads): + decode = cpu.setSubComponent( "decoder"+str(n), vanadis_decoder ) + decode.addParams( decoderParams ) + + decode.enableAllStatistics() + + # CPU.decoder.osHandler + os_hdlr = decode.setSubComponent( "os_handler", vanadis_os_hdlr ) + os_hdlr.addParams( osHdlrParams ) + + # CPU.decocer.branch_pred + branch_pred = decode.setSubComponent( "branch_unit", "vanadis.VanadisBasicBranchUnit" ) + branch_pred.addParams( branchPredParams ) + branch_pred.enableAllStatistics() + + # CPU.lsq + cpu_lsq = cpu.setSubComponent( "lsq", "vanadis.VanadisBasicLoadStoreQueue" ) + cpu_lsq.addParams(lsqParams) + cpu_lsq.enableAllStatistics() + + # CPU.lsq mem interface which connects to D-cache + cpuDcacheIf = cpu_lsq.setSubComponent( "memory_interface", "memHierarchy.standardInterface" ) + + # CPU.mem interface for I-cache + cpuIcacheIf = cpu.setSubComponent( "mem_interface_inst", "memHierarchy.standardInterface" ) + + # L1 D-cache + cpu_l1dcache = sst.Component(prefix + ".l1dcache", "memHierarchy.Cache") + cpu_l1dcache.addParams( l1dcacheParams ) + + # L2 I-cache + cpu_l1icache = sst.Component( prefix + ".l1icache", "memHierarchy.Cache") + cpu_l1icache.addParams( l1icacheParams ) + + # L2 cache + cpu_l2cache = sst.Component(prefix+".l2cache", "memHierarchy.Cache") + cpu_l2cache.addParams( l2cacheParams ) + + # L2 cache mem interface + l2cache_2_mem = cpu_l2cache.setSubComponent("lowlink", "memHierarchy.MemNIC") + l2cache_2_mem.addParams( l2memLinkParams ) + + # L1 to L2 buss + cache_bus = sst.Component(prefix+".bus", "memHierarchy.Bus") + cache_bus.addParams(busParams) + + # CPU data TLB + dtlbWrapper = sst.Component(prefix+".dtlb", "mmu.tlb_wrapper") + dtlbWrapper.addParams(tlbWrapperParams) +# dtlbWrapper.addParam( "debug_level", 0) + dtlb = dtlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + dtlb.addParams(tlbParams) + + # CPU instruction TLB + itlbWrapper = sst.Component(prefix+".itlb", "mmu.tlb_wrapper") + itlbWrapper.addParams(tlbWrapperParams) +# itlbWrapper.addParam( "debug_level", 0) + itlbWrapper.addParam("exe",True) + itlb = itlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + itlb.addParams(tlbParams) + + # CPU (data) -> TLB -> Cache + link_cpu_dtlb_link = sst.Link(prefix+".link_cpu_dtlb_link") + link_cpu_dtlb_link.connect( (cpuDcacheIf, "lowlink", "1ns"), (dtlbWrapper, "cpu_if", "1ns") ) + link_cpu_dtlb_link.setNoCut() + + # data TLB -> data L1 + link_cpu_l1dcache_link = sst.Link(prefix+".link_cpu_l1dcache_link") + link_cpu_l1dcache_link.connect( (dtlbWrapper, "cache_if", "1ns"), (cpu_l1dcache, "highlink", "1ns") ) + link_cpu_l1dcache_link.setNoCut() + + # CPU (instruction) -> TLB -> Cache + link_cpu_itlb_link = sst.Link(prefix+".link_cpu_itlb_link") + link_cpu_itlb_link.connect( (cpuIcacheIf, "lowlink", "1ns"), (itlbWrapper, "cpu_if", "1ns") ) + link_cpu_itlb_link.setNoCut() + + # instruction TLB -> instruction L1 + link_cpu_l1icache_link = sst.Link(prefix+".link_cpu_l1icache_link") + link_cpu_l1icache_link.connect( (itlbWrapper, "cache_if", "1ns"), (cpu_l1icache, "highlink", "1ns") ) + link_cpu_l1icache_link.setNoCut(); + + # data L1 -> bus + link_l1dcache_l2cache_link = sst.Link(prefix+".link_l1dcache_l2cache_link") + link_l1dcache_l2cache_link.connect( (cpu_l1dcache, "lowlink", "1ns"), (cache_bus, "highlink0", "1ns") ) + link_l1dcache_l2cache_link.setNoCut() + + # instruction L1 -> bus + link_l1icache_l2cache_link = sst.Link(prefix+".link_l1icache_l2cache_link") + link_l1icache_l2cache_link.connect( (cpu_l1icache, "lowlink", "1ns"), (cache_bus, "highlink1", "1ns") ) + link_l1icache_l2cache_link.setNoCut() + + # BUS to L2 cache + link_bus_l2cache_link = sst.Link(prefix+".link_bus_l2cache_link") + link_bus_l2cache_link.connect( (cache_bus, "lowlink0", "1ns"), (cpu_l2cache, "highlink", "1ns") ) + link_bus_l2cache_link.setNoCut() + + return (cpu, "os_link", "5ns"), (l2cache_2_mem, "port", "1ns") , (dtlb, "mmu", "1ns"), (itlb, "mmu", "1ns") + + +def addParamsPrefix(prefix,params): + #print( prefix ) + ret = {} + for key, value in params.items(): + #print( key, value ) + ret[ prefix + "." + key] = value + + #print( ret ) + return ret + +# node OS +node_os = sst.Component("os", "vanadis.VanadisNodeOS") +node_os.addParams(osParams) + +num=0 +for i,process in processList: + #print( process ) + for y in range(i): + #print( "process", num ) + node_os.addParams( addParamsPrefix( "process" + str(num), process ) ) + num+=1 + +if pythonDebug: + print('total hardware threads ' + str(num) ) + +# node OS MMU +node_os_mmu = node_os.setSubComponent( "mmu", "mmu." + mmuType ) +node_os_mmu.addParams(mmuParams) + +# node OS memory interface to L1 data cache +node_os_mem_if = node_os.setSubComponent( "mem_interface", "memHierarchy.standardInterface" ) + +# node OS l1 data cache +os_cache = sst.Component("node_os.cache", "memHierarchy.Cache") +os_cache.addParams(osl1cacheParams) +os_cache_2_mem = os_cache.setSubComponent("lowlink", "memHierarchy.MemNIC") +os_cache_2_mem.addParams( l2memLinkParams ) + +# node memory router +comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router") +comp_chiprtr.addParams(memRtrParams) +comp_chiprtr.setSubComponent("topology","merlin.singlerouter") + +# node directory controller +dirctrl = sst.Component("dirctrl", "memHierarchy.DirectoryController") +dirctrl.addParams(dirCtrlParams) + +# node directory controller port to cpu +dirNIC = dirctrl.setSubComponent("highlink", "memHierarchy.MemNIC") +dirNIC.addParams(dirNicParams) + +# node memory controller +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.addParams( memCtrlParams ) +# SHOULD FAIL TO INITIALIZE +memctrl.addPortModule("highlink", "carcosa.RandomFlipFaultInjector", { + "install_direction": "Receive", + "injection_probability": 0.001, + #"seed": 156, + "debug" : 1, + "debug_level": 1 +}) + +# node memory controller backend +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams(memParams) + +# node OS data TLB +#ostlbWrapper = sst.Component("ostlb", "mmu.tlb_wrapper") +#ostlbWrapper.addParams(tlbWrapperParams) +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu.passThroughTLB" ); +#ostlb.addParams(tlbParams) + +# OS (data) -> TLB -> Cache +#link_os_ostlb_link = sst.Link("link_os_ostlb_link") +#link_os_ostlb_link.connect( (node_os_mem_if, "lowlink", "1ns"), (ostlbWrapper, "cpu_if", "1ns") ) + +# Directory controller to memory router +link_dir_2_rtr = sst.Link("link_dir_2_rtr") +link_dir_2_rtr.connect( (comp_chiprtr, "port"+str(numCpus), "1ns"), (dirNIC, "port", "1ns") ) +link_dir_2_rtr.setNoCut() + +# Directory controller to memory controller +link_dir_2_mem = sst.Link("link_dir_2_mem") +link_dir_2_mem.connect( (dirctrl, "lowlink", "1ns"), (memctrl, "highlink", "1ns") ) +link_dir_2_mem.setNoCut() + +# MMU -> ostlb +# don't need when using pass through TLB +#link_mmu_ostlb_link = sst.Link("link_mmu_ostlb_link") +#link_mmu_ostlb_link.connect( (node_os_mmu, "ostlb", "1ns"), (ostlb, "mmu", "1ns") ) + +# ostlb -> os l1 cache +link_os_cache_link = sst.Link("link_os_cache_link") +#link_os_cache_link.connect( (ostlbWrapper, "cache_if", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.connect( (node_os_mem_if, "lowlink", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.setNoCut() + +os_cache_2_rtr = sst.Link("os_cache_2_rtr") +os_cache_2_rtr.connect( (os_cache_2_mem, "port", "1ns"), (comp_chiprtr, "port"+str(numCpus+1), "1ns") ) +os_cache_2_rtr.setNoCut() + +cpuBuilder = CPU_Builder() + +# build all CPUs +nodeId = 0 +for cpu in range(numCpus): + + prefix="node" + str(nodeId) + ".cpu" + str(cpu) + os_hdlr, l2cache, dtlb, itlb = cpuBuilder.build(prefix, nodeId, cpu) + + # MMU -> dtlb + link_mmu_dtlb_link = sst.Link(prefix + ".link_mmu_dtlb_link") + link_mmu_dtlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".dtlb", "1ns"), dtlb ) + + # MMU -> itlb + link_mmu_itlb_link = sst.Link(prefix + ".link_mmu_itlb_link") + link_mmu_itlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".itlb", "1ns"), itlb ) + + # CPU os handler -> node OS + link_core_os_link = sst.Link(prefix + ".link_core_os_link") + link_core_os_link.connect( os_hdlr, (node_os, "core" + str(cpu), "5ns") ) + + # connect cpu L2 to router + link_l2cache_2_rtr = sst.Link(prefix + ".link_l2cache_2_rtr") + link_l2cache_2_rtr.connect( l2cache, (comp_chiprtr, "port" + str(cpu), "1ns") ) + diff --git a/src/sst/elements/carcosa/tests/testStuckAtBasic.py b/src/sst/elements/carcosa/tests/testStuckAtBasic.py new file mode 100644 index 0000000000..29ece08d5f --- /dev/null +++ b/src/sst/elements/carcosa/tests/testStuckAtBasic.py @@ -0,0 +1,570 @@ +import os +import sst +mh_debug_level=10 +mh_debug=0 +# this has to be a string +dbgAddr="0" +stopDbg="0" + +checkpointDir = "" +checkpoint = "" + +#checkpointDir = "checkpoint0" +#checkpoint = "load" +#checkpoint = "save" + +pythonDebug=False + +vanadis_isa = os.getenv("VANADIS_ISA", "MIPS") +isa="mipsel" +vanadis_isa = os.getenv("VANADIS_ISA", "RISCV64") +isa="riscv64" + +loader_mode = os.getenv("VANADIS_LOADER_MODE", "0") + +testDir="basic-io" +exe = "hello-world" +#exe = "hello-world-cpp" +#exe = "openat" +#exe = "printf-check" +#exe = "read-write" +#exe = "fread-fwrite" +#exe = "unlink" +#exe = "unlinkat" +#exe = "lseek" + +#testDir = "basic-math" +#exe = "sqrt-double" +#exe = "sqrt-float" + +#testDir = "basic-ops" +#exe = "test-branch" +#exe = "test-shift" + +#testDir = "misc" +#exe = "mt-dgemm" +#exe = "stream" +#exe = "stream-fortran" +#exe = "gettime" +#exe = "splitLoad" +#exe = "fork" +#exe = "clone" +#exe = "pthread" +#exe = "openmp" +#exe = "openmp2" +#exe = "uname" +#exe = "mem-test" +#exe = "checkpoint" + +physMemSize = "4GiB" + +tlbType = "simpleTLB" +mmuType = "simpleMMU" + +# Define SST core options +sst.setProgramOption("timebase", "1ps") +sst.setProgramOption("stop-at", "0 ns") + +# Tell SST what statistics handling we want +sst.setStatisticLoadLevel(4) +sst.setStatisticOutput("sst.statOutputConsole") + +full_exe_name = "../../vanadis/tests/small/basic-math/sqrt-double/riscv64/sqrt-double"#os.getenv("VANADIS_EXE", "./small/" + testDir + "/" + exe + "/" + isa + "/" + exe ) +exe_name= full_exe_name.split("/")[-1] + +verbosity = int(os.getenv("VANADIS_VERBOSE", 0)) +os_verbosity = os.getenv("VANADIS_OS_VERBOSE", verbosity) +pipe_trace_file = os.getenv("VANADIS_PIPE_TRACE", "") +lsq_ld_entries = os.getenv("VANADIS_LSQ_LD_ENTRIES", 16) +lsq_st_entries = os.getenv("VANADIS_LSQ_ST_ENTRIES", 8) + +rob_slots = os.getenv("VANADIS_ROB_SLOTS", 64) +retires_per_cycle = os.getenv("VANADIS_RETIRES_PER_CYCLE", 4) +issues_per_cycle = os.getenv("VANADIS_ISSUES_PER_CYCLE", 4) +decodes_per_cycle = os.getenv("VANADIS_DECODES_PER_CYCLE", 4) + +integer_arith_cycles = int(os.getenv("VANADIS_INTEGER_ARITH_CYCLES", 2)) +integer_arith_units = int(os.getenv("VANADIS_INTEGER_ARITH_UNITS", 2)) +fp_arith_cycles = int(os.getenv("VANADIS_FP_ARITH_CYCLES", 8)) +fp_arith_units = int(os.getenv("VANADIS_FP_ARITH_UNITS", 2)) +branch_arith_cycles = int(os.getenv("VANADIS_BRANCH_ARITH_CYCLES", 2)) + +cpu_clock = os.getenv("VANADIS_CPU_CLOCK", "2.3GHz") + +numCpus = int(os.getenv("VANADIS_NUM_CORES", 1)) +numThreads = int(os.getenv("VANADIS_NUM_HW_THREADS", 1)) + +vanadis_cpu_type = "vanadis." +vanadis_cpu_type += os.getenv("VANADIS_CPU_ELEMENT_NAME","dbg_VanadisCPU") + +if (verbosity > 0): + print("Verbosity: " + str(verbosity) + " -> loading Vanadis CPU type: " + vanadis_cpu_type) + print("Auto-clock syscalls: " + str(auto_clock_sys)) +# vanadis_cpu_type = "vanadisdbg.VanadisCPU" + +app_args = os.getenv("VANADIS_EXE_ARGS", "") + +app_params = {} +if app_args != "": + app_args_list = app_args.split(" ") + # We have a plus 1 because the executable name is arg0 + app_args_count = len( app_args_list ) + 1 + + app_params["argc"] = app_args_count + + if (verbosity > 0): + print("Identified " + str(app_args_count) + " application arguments, adding to input parameters.") + arg_start = 1 + for next_arg in app_args_list: + if (verbosity > 0): + print("arg" + str(arg_start) + " = " + next_arg) + app_params["arg" + str(arg_start)] = next_arg + arg_start = arg_start + 1 +else: + app_params["argc"] = 1 + if (verbosity > 0): + print("No application arguments found, continuing with argc=1") + +vanadis_decoder = "vanadis.Vanadis" + vanadis_isa + "Decoder" +vanadis_os_hdlr = "vanadis.Vanadis" + vanadis_isa + "OSHandler" + + +protocol="MESI" + +# OS related params +osParams = { + "processDebugLevel" : 0, + "dbgLevel" : os_verbosity, + "dbgMask" : 8, + "cores" : numCpus, + "hardwareThreadCount" : numThreads, + "page_size" : 4096, + "physMemSize" : physMemSize, + "useMMU" : True, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +processList = ( + ( 1, { + "env_count" : 1, + "env0" : "OMP_NUM_THREADS={}".format(numCpus*numThreads), + "exe" : full_exe_name, + "arg0" : exe_name, + } ), +) + +processList[0][1].update(app_params) + +osl1cacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +mmuParams = { + "debug_level": 0, + "num_cores": numCpus, + "num_threads": numThreads, + "page_size": 4096, +} + +memRtrParams ={ + "xbar_bw" : "1GB/s", + "link_bw" : "1GB/s", + "input_buf_size" : "2KB", + "num_ports" : str(numCpus+2), + "flit_size" : "72B", + "output_buf_size" : "2KB", + "id" : "0", + "topology" : "merlin.singlerouter" +} + +dirCtrlParams = { + "coherence_protocol" : protocol, + "entry_cache_size" : "1024", + "debug" : mh_debug, + "debug_level" : mh_debug_level, + "addr_range_start" : "0x0", + "addr_range_end" : "0xFFFFFFFF" +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 2, +} + +memCtrlParams = { + "clock" : cpu_clock, + "backend.mem_size" : physMemSize, + "backing" : "malloc", + "initBacking": 1, + "addr_range_start": 0, + "addr_range_end": 0xffffffff, + "debug_level" : mh_debug_level, + "debug" : mh_debug, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +memParams = { + "mem_size" : "4GiB", + "access_time" : "1 ns" +} + +# CPU related params +tlbParams = { + "debug_level": 0, + "hitLatency": 1, + "num_hardware_threads": numThreads, + "num_tlb_entries_per_thread": 64, + "tlb_set_size": 4, +} + +tlbWrapperParams = { + "debug_level": 0, +} + +decoderParams = { + "loader_mode" : loader_mode, + "uop_cache_entries" : 1536, + "predecode_cache_entries" : 4 +} + +osHdlrParams = { } + +branchPredParams = { + "branch_entries" : 32 +} + +cpuParams = { + "clock" : cpu_clock, + "verbose" : verbosity, + "hardware_threads": numThreads, + "physical_fp_registers" : 168 * numThreads, + "physical_integer_registers" : 180 * numThreads, + "integer_arith_cycles" : integer_arith_cycles, + "integer_arith_units" : integer_arith_units, + "fp_arith_cycles" : fp_arith_cycles, + "fp_arith_units" : fp_arith_units, + "branch_unit_cycles" : branch_arith_cycles, + "print_int_reg" : False, + "print_fp_reg" : False, + "pipeline_trace_file" : pipe_trace_file, + "reorder_slots" : rob_slots, + "decodes_per_cycle" : decodes_per_cycle, + "issues_per_cycle" : issues_per_cycle, + "retires_per_cycle" : retires_per_cycle, + "pause_when_retire_address" : os.getenv("VANADIS_HALT_AT_ADDRESS", 0), + "start_verbose_when_issue_address": dbgAddr, + "stop_verbose_when_retire_address": stopDbg, + "print_rob" : False, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +lsqParams = { + "verbose" : verbosity, + "address_mask" : 0xFFFFFFFF, + "max_stores" : lsq_st_entries, + "max_loads" : lsq_ld_entries, +} + +l1dcacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l1icacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "prefetcher" : "cassini.NextBlockPrefetcher", + "prefetcher.reach" : 1, + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l2cacheParams = { + "access_latency_cycles" : "14", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "16", + "cache_line_size" : "64", + "cache_size" : "1MB", + "mshr_latency_cycles": 3, + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} +busParams = { + "bus_frequency" : cpu_clock, +} + +l2memLinkParams = { + "group" : 1, + "network_bw" : "25GB/s" +} + +class CPU_Builder: + def __init__(self): + pass + + # CPU + def build( self, prefix, nodeId, cpuId ): + + if pythonDebug: + print("build {}".format(prefix) ) + + # CPU + cpu = sst.Component(prefix, vanadis_cpu_type) + cpu.addParams( cpuParams ) + cpu.addParam( "core_id", cpuId ) + cpu.enableAllStatistics() + + # CPU.decoder + for n in range(numThreads): + decode = cpu.setSubComponent( "decoder"+str(n), vanadis_decoder ) + decode.addParams( decoderParams ) + + decode.enableAllStatistics() + + # CPU.decoder.osHandler + os_hdlr = decode.setSubComponent( "os_handler", vanadis_os_hdlr ) + os_hdlr.addParams( osHdlrParams ) + + # CPU.decocer.branch_pred + branch_pred = decode.setSubComponent( "branch_unit", "vanadis.VanadisBasicBranchUnit" ) + branch_pred.addParams( branchPredParams ) + branch_pred.enableAllStatistics() + + # CPU.lsq + cpu_lsq = cpu.setSubComponent( "lsq", "vanadis.VanadisBasicLoadStoreQueue" ) + cpu_lsq.addParams(lsqParams) + cpu_lsq.enableAllStatistics() + + # CPU.lsq mem interface which connects to D-cache + cpuDcacheIf = cpu_lsq.setSubComponent( "memory_interface", "memHierarchy.standardInterface" ) + + # CPU.mem interface for I-cache + cpuIcacheIf = cpu.setSubComponent( "mem_interface_inst", "memHierarchy.standardInterface" ) + + # L1 D-cache + cpu_l1dcache = sst.Component(prefix + ".l1dcache", "memHierarchy.Cache") + cpu_l1dcache.addParams( l1dcacheParams ) + + # L2 I-cache + cpu_l1icache = sst.Component( prefix + ".l1icache", "memHierarchy.Cache") + cpu_l1icache.addParams( l1icacheParams ) + + # L2 cache + cpu_l2cache = sst.Component(prefix+".l2cache", "memHierarchy.Cache") + cpu_l2cache.addParams( l2cacheParams ) + + # L2 cache mem interface + l2cache_2_mem = cpu_l2cache.setSubComponent("lowlink", "memHierarchy.MemNIC") + l2cache_2_mem.addParams( l2memLinkParams ) + + # L1 to L2 buss + cache_bus = sst.Component(prefix+".bus", "memHierarchy.Bus") + cache_bus.addParams(busParams) + + # CPU data TLB + dtlbWrapper = sst.Component(prefix+".dtlb", "mmu.tlb_wrapper") + dtlbWrapper.addParams(tlbWrapperParams) +# dtlbWrapper.addParam( "debug_level", 0) + dtlb = dtlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + dtlb.addParams(tlbParams) + + # CPU instruction TLB + itlbWrapper = sst.Component(prefix+".itlb", "mmu.tlb_wrapper") + itlbWrapper.addParams(tlbWrapperParams) +# itlbWrapper.addParam( "debug_level", 0) + itlbWrapper.addParam("exe",True) + itlb = itlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + itlb.addParams(tlbParams) + + # CPU (data) -> TLB -> Cache + link_cpu_dtlb_link = sst.Link(prefix+".link_cpu_dtlb_link") + link_cpu_dtlb_link.connect( (cpuDcacheIf, "lowlink", "1ns"), (dtlbWrapper, "cpu_if", "1ns") ) + link_cpu_dtlb_link.setNoCut() + + # data TLB -> data L1 + link_cpu_l1dcache_link = sst.Link(prefix+".link_cpu_l1dcache_link") + link_cpu_l1dcache_link.connect( (dtlbWrapper, "cache_if", "1ns"), (cpu_l1dcache, "highlink", "1ns") ) + link_cpu_l1dcache_link.setNoCut() + + # CPU (instruction) -> TLB -> Cache + link_cpu_itlb_link = sst.Link(prefix+".link_cpu_itlb_link") + link_cpu_itlb_link.connect( (cpuIcacheIf, "lowlink", "1ns"), (itlbWrapper, "cpu_if", "1ns") ) + link_cpu_itlb_link.setNoCut() + + # instruction TLB -> instruction L1 + link_cpu_l1icache_link = sst.Link(prefix+".link_cpu_l1icache_link") + link_cpu_l1icache_link.connect( (itlbWrapper, "cache_if", "1ns"), (cpu_l1icache, "highlink", "1ns") ) + link_cpu_l1icache_link.setNoCut(); + + # data L1 -> bus + link_l1dcache_l2cache_link = sst.Link(prefix+".link_l1dcache_l2cache_link") + link_l1dcache_l2cache_link.connect( (cpu_l1dcache, "lowlink", "1ns"), (cache_bus, "highlink0", "1ns") ) + link_l1dcache_l2cache_link.setNoCut() + + # instruction L1 -> bus + link_l1icache_l2cache_link = sst.Link(prefix+".link_l1icache_l2cache_link") + link_l1icache_l2cache_link.connect( (cpu_l1icache, "lowlink", "1ns"), (cache_bus, "highlink1", "1ns") ) + link_l1icache_l2cache_link.setNoCut() + + # BUS to L2 cache + link_bus_l2cache_link = sst.Link(prefix+".link_bus_l2cache_link") + link_bus_l2cache_link.connect( (cache_bus, "lowlink0", "1ns"), (cpu_l2cache, "highlink", "1ns") ) + link_bus_l2cache_link.setNoCut() + + return (cpu, "os_link", "5ns"), (l2cache_2_mem, "port", "1ns") , (dtlb, "mmu", "1ns"), (itlb, "mmu", "1ns") + + +def addParamsPrefix(prefix,params): + #print( prefix ) + ret = {} + for key, value in params.items(): + #print( key, value ) + ret[ prefix + "." + key] = value + + #print( ret ) + return ret + +# node OS +node_os = sst.Component("os", "vanadis.VanadisNodeOS") +node_os.addParams(osParams) + +num=0 +for i,process in processList: + #print( process ) + for y in range(i): + #print( "process", num ) + node_os.addParams( addParamsPrefix( "process" + str(num), process ) ) + num+=1 + +if pythonDebug: + print('total hardware threads ' + str(num) ) + +# node OS MMU +node_os_mmu = node_os.setSubComponent( "mmu", "mmu." + mmuType ) +node_os_mmu.addParams(mmuParams) + +# node OS memory interface to L1 data cache +node_os_mem_if = node_os.setSubComponent( "mem_interface", "memHierarchy.standardInterface" ) + +# node OS l1 data cache +os_cache = sst.Component("node_os.cache", "memHierarchy.Cache") +os_cache.addParams(osl1cacheParams) +os_cache_2_mem = os_cache.setSubComponent("lowlink", "memHierarchy.MemNIC") +os_cache_2_mem.addParams( l2memLinkParams ) + +# node memory router +comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router") +comp_chiprtr.addParams(memRtrParams) +comp_chiprtr.setSubComponent("topology","merlin.singlerouter") + +# node directory controller +dirctrl = sst.Component("dirctrl", "memHierarchy.DirectoryController") +dirctrl.addParams(dirCtrlParams) + +# node directory controller port to cpu +dirNIC = dirctrl.setSubComponent("highlink", "memHierarchy.MemNIC") +dirNIC.addParams(dirNicParams) + +# node memory controller +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.addParams( memCtrlParams ) +memctrl.addPortModule("highlink", "carcosa.StuckAtFaultInjector", { + "install_direction": "Receive", + "masks": ["4D88, 3, 11110000, 00001111"], + "debug" : 1, + "debug_level": 2 +}) + +# node memory controller backend +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams(memParams) + +# node OS data TLB +#ostlbWrapper = sst.Component("ostlb", "mmu.tlb_wrapper") +#ostlbWrapper.addParams(tlbWrapperParams) +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu.passThroughTLB" ); +#ostlb.addParams(tlbParams) + +# OS (data) -> TLB -> Cache +#link_os_ostlb_link = sst.Link("link_os_ostlb_link") +#link_os_ostlb_link.connect( (node_os_mem_if, "lowlink", "1ns"), (ostlbWrapper, "cpu_if", "1ns") ) + +# Directory controller to memory router +link_dir_2_rtr = sst.Link("link_dir_2_rtr") +link_dir_2_rtr.connect( (comp_chiprtr, "port"+str(numCpus), "1ns"), (dirNIC, "port", "1ns") ) +link_dir_2_rtr.setNoCut() + +# Directory controller to memory controller +link_dir_2_mem = sst.Link("link_dir_2_mem") +link_dir_2_mem.connect( (dirctrl, "lowlink", "1ns"), (memctrl, "highlink", "1ns") ) +link_dir_2_mem.setNoCut() + +# MMU -> ostlb +# don't need when using pass through TLB +#link_mmu_ostlb_link = sst.Link("link_mmu_ostlb_link") +#link_mmu_ostlb_link.connect( (node_os_mmu, "ostlb", "1ns"), (ostlb, "mmu", "1ns") ) + +# ostlb -> os l1 cache +link_os_cache_link = sst.Link("link_os_cache_link") +#link_os_cache_link.connect( (ostlbWrapper, "cache_if", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.connect( (node_os_mem_if, "lowlink", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.setNoCut() + +os_cache_2_rtr = sst.Link("os_cache_2_rtr") +os_cache_2_rtr.connect( (os_cache_2_mem, "port", "1ns"), (comp_chiprtr, "port"+str(numCpus+1), "1ns") ) +os_cache_2_rtr.setNoCut() + +cpuBuilder = CPU_Builder() + +# build all CPUs +nodeId = 0 +for cpu in range(numCpus): + + prefix="node" + str(nodeId) + ".cpu" + str(cpu) + os_hdlr, l2cache, dtlb, itlb = cpuBuilder.build(prefix, nodeId, cpu) + + # MMU -> dtlb + link_mmu_dtlb_link = sst.Link(prefix + ".link_mmu_dtlb_link") + link_mmu_dtlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".dtlb", "1ns"), dtlb ) + + # MMU -> itlb + link_mmu_itlb_link = sst.Link(prefix + ".link_mmu_itlb_link") + link_mmu_itlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".itlb", "1ns"), itlb ) + + # CPU os handler -> node OS + link_core_os_link = sst.Link(prefix + ".link_core_os_link") + link_core_os_link.connect( os_hdlr, (node_os, "core" + str(cpu), "5ns") ) + + # connect cpu L2 to router + link_l2cache_2_rtr = sst.Link(prefix + ".link_l2cache_2_rtr") + link_l2cache_2_rtr.connect( l2cache, (comp_chiprtr, "port" + str(cpu), "1ns") ) + diff --git a/src/sst/elements/carcosa/tests/testStuckAtMultiple.py b/src/sst/elements/carcosa/tests/testStuckAtMultiple.py new file mode 100644 index 0000000000..467a22c30f --- /dev/null +++ b/src/sst/elements/carcosa/tests/testStuckAtMultiple.py @@ -0,0 +1,571 @@ +import os +import sst +mh_debug_level=10 +mh_debug=0 +# this has to be a string +dbgAddr="0" +stopDbg="0" + +checkpointDir = "" +checkpoint = "" + +#checkpointDir = "checkpoint0" +#checkpoint = "load" +#checkpoint = "save" + +pythonDebug=False + +vanadis_isa = os.getenv("VANADIS_ISA", "MIPS") +isa="mipsel" +vanadis_isa = os.getenv("VANADIS_ISA", "RISCV64") +isa="riscv64" + +loader_mode = os.getenv("VANADIS_LOADER_MODE", "0") + +testDir="basic-io" +exe = "hello-world" +#exe = "hello-world-cpp" +#exe = "openat" +#exe = "printf-check" +#exe = "read-write" +#exe = "fread-fwrite" +#exe = "unlink" +#exe = "unlinkat" +#exe = "lseek" + +#testDir = "basic-math" +#exe = "sqrt-double" +#exe = "sqrt-float" + +#testDir = "basic-ops" +#exe = "test-branch" +#exe = "test-shift" + +#testDir = "misc" +#exe = "mt-dgemm" +#exe = "stream" +#exe = "stream-fortran" +#exe = "gettime" +#exe = "splitLoad" +#exe = "fork" +#exe = "clone" +#exe = "pthread" +#exe = "openmp" +#exe = "openmp2" +#exe = "uname" +#exe = "mem-test" +#exe = "checkpoint" + +physMemSize = "4GiB" + +tlbType = "simpleTLB" +mmuType = "simpleMMU" + +# Define SST core options +sst.setProgramOption("timebase", "1ps") +sst.setProgramOption("stop-at", "0 ns") + +# Tell SST what statistics handling we want +sst.setStatisticLoadLevel(4) +sst.setStatisticOutput("sst.statOutputConsole") + +full_exe_name = "../../vanadis/tests/small/basic-math/sqrt-double/riscv64/sqrt-double"#os.getenv("VANADIS_EXE", "./small/" + testDir + "/" + exe + "/" + isa + "/" + exe ) +exe_name= full_exe_name.split("/")[-1] + +verbosity = int(os.getenv("VANADIS_VERBOSE", 0)) +os_verbosity = os.getenv("VANADIS_OS_VERBOSE", verbosity) +pipe_trace_file = os.getenv("VANADIS_PIPE_TRACE", "") +lsq_ld_entries = os.getenv("VANADIS_LSQ_LD_ENTRIES", 16) +lsq_st_entries = os.getenv("VANADIS_LSQ_ST_ENTRIES", 8) + +rob_slots = os.getenv("VANADIS_ROB_SLOTS", 64) +retires_per_cycle = os.getenv("VANADIS_RETIRES_PER_CYCLE", 4) +issues_per_cycle = os.getenv("VANADIS_ISSUES_PER_CYCLE", 4) +decodes_per_cycle = os.getenv("VANADIS_DECODES_PER_CYCLE", 4) + +integer_arith_cycles = int(os.getenv("VANADIS_INTEGER_ARITH_CYCLES", 2)) +integer_arith_units = int(os.getenv("VANADIS_INTEGER_ARITH_UNITS", 2)) +fp_arith_cycles = int(os.getenv("VANADIS_FP_ARITH_CYCLES", 8)) +fp_arith_units = int(os.getenv("VANADIS_FP_ARITH_UNITS", 2)) +branch_arith_cycles = int(os.getenv("VANADIS_BRANCH_ARITH_CYCLES", 2)) + +cpu_clock = os.getenv("VANADIS_CPU_CLOCK", "2.3GHz") + +numCpus = int(os.getenv("VANADIS_NUM_CORES", 1)) +numThreads = int(os.getenv("VANADIS_NUM_HW_THREADS", 1)) + +vanadis_cpu_type = "vanadis." +vanadis_cpu_type += os.getenv("VANADIS_CPU_ELEMENT_NAME","dbg_VanadisCPU") + +if (verbosity > 0): + print("Verbosity: " + str(verbosity) + " -> loading Vanadis CPU type: " + vanadis_cpu_type) + print("Auto-clock syscalls: " + str(auto_clock_sys)) +# vanadis_cpu_type = "vanadisdbg.VanadisCPU" + +app_args = os.getenv("VANADIS_EXE_ARGS", "") + +app_params = {} +if app_args != "": + app_args_list = app_args.split(" ") + # We have a plus 1 because the executable name is arg0 + app_args_count = len( app_args_list ) + 1 + + app_params["argc"] = app_args_count + + if (verbosity > 0): + print("Identified " + str(app_args_count) + " application arguments, adding to input parameters.") + arg_start = 1 + for next_arg in app_args_list: + if (verbosity > 0): + print("arg" + str(arg_start) + " = " + next_arg) + app_params["arg" + str(arg_start)] = next_arg + arg_start = arg_start + 1 +else: + app_params["argc"] = 1 + if (verbosity > 0): + print("No application arguments found, continuing with argc=1") + +vanadis_decoder = "vanadis.Vanadis" + vanadis_isa + "Decoder" +vanadis_os_hdlr = "vanadis.Vanadis" + vanadis_isa + "OSHandler" + + +protocol="MESI" + +# OS related params +osParams = { + "processDebugLevel" : 0, + "dbgLevel" : os_verbosity, + "dbgMask" : 8, + "cores" : numCpus, + "hardwareThreadCount" : numThreads, + "page_size" : 4096, + "physMemSize" : physMemSize, + "useMMU" : True, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +processList = ( + ( 1, { + "env_count" : 1, + "env0" : "OMP_NUM_THREADS={}".format(numCpus*numThreads), + "exe" : full_exe_name, + "arg0" : exe_name, + } ), +) + +processList[0][1].update(app_params) + +osl1cacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +mmuParams = { + "debug_level": 0, + "num_cores": numCpus, + "num_threads": numThreads, + "page_size": 4096, +} + +memRtrParams ={ + "xbar_bw" : "1GB/s", + "link_bw" : "1GB/s", + "input_buf_size" : "2KB", + "num_ports" : str(numCpus+2), + "flit_size" : "72B", + "output_buf_size" : "2KB", + "id" : "0", + "topology" : "merlin.singlerouter" +} + +dirCtrlParams = { + "coherence_protocol" : protocol, + "entry_cache_size" : "1024", + "debug" : mh_debug, + "debug_level" : mh_debug_level, + "addr_range_start" : "0x0", + "addr_range_end" : "0xFFFFFFFF" +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 2, +} + +memCtrlParams = { + "clock" : cpu_clock, + "backend.mem_size" : physMemSize, + "backing" : "malloc", + "initBacking": 1, + "addr_range_start": 0, + "addr_range_end": 0xffffffff, + "debug_level" : mh_debug_level, + "debug" : mh_debug, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +memParams = { + "mem_size" : "4GiB", + "access_time" : "1 ns" +} + +# CPU related params +tlbParams = { + "debug_level": 0, + "hitLatency": 1, + "num_hardware_threads": numThreads, + "num_tlb_entries_per_thread": 64, + "tlb_set_size": 4, +} + +tlbWrapperParams = { + "debug_level": 0, +} + +decoderParams = { + "loader_mode" : loader_mode, + "uop_cache_entries" : 1536, + "predecode_cache_entries" : 4 +} + +osHdlrParams = { } + +branchPredParams = { + "branch_entries" : 32 +} + +cpuParams = { + "clock" : cpu_clock, + "verbose" : verbosity, + "hardware_threads": numThreads, + "physical_fp_registers" : 168 * numThreads, + "physical_integer_registers" : 180 * numThreads, + "integer_arith_cycles" : integer_arith_cycles, + "integer_arith_units" : integer_arith_units, + "fp_arith_cycles" : fp_arith_cycles, + "fp_arith_units" : fp_arith_units, + "branch_unit_cycles" : branch_arith_cycles, + "print_int_reg" : False, + "print_fp_reg" : False, + "pipeline_trace_file" : pipe_trace_file, + "reorder_slots" : rob_slots, + "decodes_per_cycle" : decodes_per_cycle, + "issues_per_cycle" : issues_per_cycle, + "retires_per_cycle" : retires_per_cycle, + "pause_when_retire_address" : os.getenv("VANADIS_HALT_AT_ADDRESS", 0), + "start_verbose_when_issue_address": dbgAddr, + "stop_verbose_when_retire_address": stopDbg, + "print_rob" : False, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +lsqParams = { + "verbose" : verbosity, + "address_mask" : 0xFFFFFFFF, + "max_stores" : lsq_st_entries, + "max_loads" : lsq_ld_entries, +} + +l1dcacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l1icacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "prefetcher" : "cassini.NextBlockPrefetcher", + "prefetcher.reach" : 1, + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l2cacheParams = { + "access_latency_cycles" : "14", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "16", + "cache_line_size" : "64", + "cache_size" : "1MB", + "mshr_latency_cycles": 3, + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} +busParams = { + "bus_frequency" : cpu_clock, +} + +l2memLinkParams = { + "group" : 1, + "network_bw" : "25GB/s" +} + +class CPU_Builder: + def __init__(self): + pass + + # CPU + def build( self, prefix, nodeId, cpuId ): + + if pythonDebug: + print("build {}".format(prefix) ) + + # CPU + cpu = sst.Component(prefix, vanadis_cpu_type) + cpu.addParams( cpuParams ) + cpu.addParam( "core_id", cpuId ) + cpu.enableAllStatistics() + + # CPU.decoder + for n in range(numThreads): + decode = cpu.setSubComponent( "decoder"+str(n), vanadis_decoder ) + decode.addParams( decoderParams ) + + decode.enableAllStatistics() + + # CPU.decoder.osHandler + os_hdlr = decode.setSubComponent( "os_handler", vanadis_os_hdlr ) + os_hdlr.addParams( osHdlrParams ) + + # CPU.decocer.branch_pred + branch_pred = decode.setSubComponent( "branch_unit", "vanadis.VanadisBasicBranchUnit" ) + branch_pred.addParams( branchPredParams ) + branch_pred.enableAllStatistics() + + # CPU.lsq + cpu_lsq = cpu.setSubComponent( "lsq", "vanadis.VanadisBasicLoadStoreQueue" ) + cpu_lsq.addParams(lsqParams) + cpu_lsq.enableAllStatistics() + + # CPU.lsq mem interface which connects to D-cache + cpuDcacheIf = cpu_lsq.setSubComponent( "memory_interface", "memHierarchy.standardInterface" ) + + # CPU.mem interface for I-cache + cpuIcacheIf = cpu.setSubComponent( "mem_interface_inst", "memHierarchy.standardInterface" ) + + # L1 D-cache + cpu_l1dcache = sst.Component(prefix + ".l1dcache", "memHierarchy.Cache") + cpu_l1dcache.addParams( l1dcacheParams ) + + # L2 I-cache + cpu_l1icache = sst.Component( prefix + ".l1icache", "memHierarchy.Cache") + cpu_l1icache.addParams( l1icacheParams ) + + # L2 cache + cpu_l2cache = sst.Component(prefix+".l2cache", "memHierarchy.Cache") + cpu_l2cache.addParams( l2cacheParams ) + + # L2 cache mem interface + l2cache_2_mem = cpu_l2cache.setSubComponent("lowlink", "memHierarchy.MemNIC") + l2cache_2_mem.addParams( l2memLinkParams ) + + # L1 to L2 buss + cache_bus = sst.Component(prefix+".bus", "memHierarchy.Bus") + cache_bus.addParams(busParams) + + # CPU data TLB + dtlbWrapper = sst.Component(prefix+".dtlb", "mmu.tlb_wrapper") + dtlbWrapper.addParams(tlbWrapperParams) +# dtlbWrapper.addParam( "debug_level", 0) + dtlb = dtlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + dtlb.addParams(tlbParams) + + # CPU instruction TLB + itlbWrapper = sst.Component(prefix+".itlb", "mmu.tlb_wrapper") + itlbWrapper.addParams(tlbWrapperParams) +# itlbWrapper.addParam( "debug_level", 0) + itlbWrapper.addParam("exe",True) + itlb = itlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + itlb.addParams(tlbParams) + + # CPU (data) -> TLB -> Cache + link_cpu_dtlb_link = sst.Link(prefix+".link_cpu_dtlb_link") + link_cpu_dtlb_link.connect( (cpuDcacheIf, "lowlink", "1ns"), (dtlbWrapper, "cpu_if", "1ns") ) + link_cpu_dtlb_link.setNoCut() + + # data TLB -> data L1 + link_cpu_l1dcache_link = sst.Link(prefix+".link_cpu_l1dcache_link") + link_cpu_l1dcache_link.connect( (dtlbWrapper, "cache_if", "1ns"), (cpu_l1dcache, "highlink", "1ns") ) + link_cpu_l1dcache_link.setNoCut() + + # CPU (instruction) -> TLB -> Cache + link_cpu_itlb_link = sst.Link(prefix+".link_cpu_itlb_link") + link_cpu_itlb_link.connect( (cpuIcacheIf, "lowlink", "1ns"), (itlbWrapper, "cpu_if", "1ns") ) + link_cpu_itlb_link.setNoCut() + + # instruction TLB -> instruction L1 + link_cpu_l1icache_link = sst.Link(prefix+".link_cpu_l1icache_link") + link_cpu_l1icache_link.connect( (itlbWrapper, "cache_if", "1ns"), (cpu_l1icache, "highlink", "1ns") ) + link_cpu_l1icache_link.setNoCut(); + + # data L1 -> bus + link_l1dcache_l2cache_link = sst.Link(prefix+".link_l1dcache_l2cache_link") + link_l1dcache_l2cache_link.connect( (cpu_l1dcache, "lowlink", "1ns"), (cache_bus, "highlink0", "1ns") ) + link_l1dcache_l2cache_link.setNoCut() + + # instruction L1 -> bus + link_l1icache_l2cache_link = sst.Link(prefix+".link_l1icache_l2cache_link") + link_l1icache_l2cache_link.connect( (cpu_l1icache, "lowlink", "1ns"), (cache_bus, "highlink1", "1ns") ) + link_l1icache_l2cache_link.setNoCut() + + # BUS to L2 cache + link_bus_l2cache_link = sst.Link(prefix+".link_bus_l2cache_link") + link_bus_l2cache_link.connect( (cache_bus, "lowlink0", "1ns"), (cpu_l2cache, "highlink", "1ns") ) + link_bus_l2cache_link.setNoCut() + + return (cpu, "os_link", "5ns"), (l2cache_2_mem, "port", "1ns") , (dtlb, "mmu", "1ns"), (itlb, "mmu", "1ns") + + +def addParamsPrefix(prefix,params): + #print( prefix ) + ret = {} + for key, value in params.items(): + #print( key, value ) + ret[ prefix + "." + key] = value + + #print( ret ) + return ret + +# node OS +node_os = sst.Component("os", "vanadis.VanadisNodeOS") +node_os.addParams(osParams) + +num=0 +for i,process in processList: + #print( process ) + for y in range(i): + #print( "process", num ) + node_os.addParams( addParamsPrefix( "process" + str(num), process ) ) + num+=1 + +if pythonDebug: + print('total hardware threads ' + str(num) ) + +# node OS MMU +node_os_mmu = node_os.setSubComponent( "mmu", "mmu." + mmuType ) +node_os_mmu.addParams(mmuParams) + +# node OS memory interface to L1 data cache +node_os_mem_if = node_os.setSubComponent( "mem_interface", "memHierarchy.standardInterface" ) + +# node OS l1 data cache +os_cache = sst.Component("node_os.cache", "memHierarchy.Cache") +os_cache.addParams(osl1cacheParams) +os_cache_2_mem = os_cache.setSubComponent("lowlink", "memHierarchy.MemNIC") +os_cache_2_mem.addParams( l2memLinkParams ) + +# node memory router +comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router") +comp_chiprtr.addParams(memRtrParams) +comp_chiprtr.setSubComponent("topology","merlin.singlerouter") + +# node directory controller +dirctrl = sst.Component("dirctrl", "memHierarchy.DirectoryController") +dirctrl.addParams(dirCtrlParams) + +# node directory controller port to cpu +dirNIC = dirctrl.setSubComponent("highlink", "memHierarchy.MemNIC") +dirNIC.addParams(dirNicParams) + +# node memory controller +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.addParams( memCtrlParams ) +# SHOULD FAIL TO INITIALIZE +memctrl.addPortModule("highlink", "carcosa.StuckAtFaultInjector", { + "intall_direction": "Receive", + "masks": ["4D88, 3, 11110000, 00001111", "4D90, 3, 11110000, 00001111"], + "debug" : 1, + "debug_level": 2 +}) + +# node memory controller backend +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams(memParams) + +# node OS data TLB +#ostlbWrapper = sst.Component("ostlb", "mmu.tlb_wrapper") +#ostlbWrapper.addParams(tlbWrapperParams) +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu.passThroughTLB" ); +#ostlb.addParams(tlbParams) + +# OS (data) -> TLB -> Cache +#link_os_ostlb_link = sst.Link("link_os_ostlb_link") +#link_os_ostlb_link.connect( (node_os_mem_if, "lowlink", "1ns"), (ostlbWrapper, "cpu_if", "1ns") ) + +# Directory controller to memory router +link_dir_2_rtr = sst.Link("link_dir_2_rtr") +link_dir_2_rtr.connect( (comp_chiprtr, "port"+str(numCpus), "1ns"), (dirNIC, "port", "1ns") ) +link_dir_2_rtr.setNoCut() + +# Directory controller to memory controller +link_dir_2_mem = sst.Link("link_dir_2_mem") +link_dir_2_mem.connect( (dirctrl, "lowlink", "1ns"), (memctrl, "highlink", "1ns") ) +link_dir_2_mem.setNoCut() + +# MMU -> ostlb +# don't need when using pass through TLB +#link_mmu_ostlb_link = sst.Link("link_mmu_ostlb_link") +#link_mmu_ostlb_link.connect( (node_os_mmu, "ostlb", "1ns"), (ostlb, "mmu", "1ns") ) + +# ostlb -> os l1 cache +link_os_cache_link = sst.Link("link_os_cache_link") +#link_os_cache_link.connect( (ostlbWrapper, "cache_if", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.connect( (node_os_mem_if, "lowlink", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.setNoCut() + +os_cache_2_rtr = sst.Link("os_cache_2_rtr") +os_cache_2_rtr.connect( (os_cache_2_mem, "port", "1ns"), (comp_chiprtr, "port"+str(numCpus+1), "1ns") ) +os_cache_2_rtr.setNoCut() + +cpuBuilder = CPU_Builder() + +# build all CPUs +nodeId = 0 +for cpu in range(numCpus): + + prefix="node" + str(nodeId) + ".cpu" + str(cpu) + os_hdlr, l2cache, dtlb, itlb = cpuBuilder.build(prefix, nodeId, cpu) + + # MMU -> dtlb + link_mmu_dtlb_link = sst.Link(prefix + ".link_mmu_dtlb_link") + link_mmu_dtlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".dtlb", "1ns"), dtlb ) + + # MMU -> itlb + link_mmu_itlb_link = sst.Link(prefix + ".link_mmu_itlb_link") + link_mmu_itlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".itlb", "1ns"), itlb ) + + # CPU os handler -> node OS + link_core_os_link = sst.Link(prefix + ".link_core_os_link") + link_core_os_link.connect( os_hdlr, (node_os, "core" + str(cpu), "5ns") ) + + # connect cpu L2 to router + link_l2cache_2_rtr = sst.Link(prefix + ".link_l2cache_2_rtr") + link_l2cache_2_rtr.connect( l2cache, (comp_chiprtr, "port" + str(cpu), "1ns") ) + diff --git a/src/sst/elements/carcosa/tests/testStuckAtOverlap.py b/src/sst/elements/carcosa/tests/testStuckAtOverlap.py new file mode 100644 index 0000000000..945e8880b5 --- /dev/null +++ b/src/sst/elements/carcosa/tests/testStuckAtOverlap.py @@ -0,0 +1,571 @@ +import os +import sst +mh_debug_level=10 +mh_debug=0 +# this has to be a string +dbgAddr="0" +stopDbg="0" + +checkpointDir = "" +checkpoint = "" + +#checkpointDir = "checkpoint0" +#checkpoint = "load" +#checkpoint = "save" + +pythonDebug=False + +vanadis_isa = os.getenv("VANADIS_ISA", "MIPS") +isa="mipsel" +vanadis_isa = os.getenv("VANADIS_ISA", "RISCV64") +isa="riscv64" + +loader_mode = os.getenv("VANADIS_LOADER_MODE", "0") + +testDir="basic-io" +exe = "hello-world" +#exe = "hello-world-cpp" +#exe = "openat" +#exe = "printf-check" +#exe = "read-write" +#exe = "fread-fwrite" +#exe = "unlink" +#exe = "unlinkat" +#exe = "lseek" + +#testDir = "basic-math" +#exe = "sqrt-double" +#exe = "sqrt-float" + +#testDir = "basic-ops" +#exe = "test-branch" +#exe = "test-shift" + +#testDir = "misc" +#exe = "mt-dgemm" +#exe = "stream" +#exe = "stream-fortran" +#exe = "gettime" +#exe = "splitLoad" +#exe = "fork" +#exe = "clone" +#exe = "pthread" +#exe = "openmp" +#exe = "openmp2" +#exe = "uname" +#exe = "mem-test" +#exe = "checkpoint" + +physMemSize = "4GiB" + +tlbType = "simpleTLB" +mmuType = "simpleMMU" + +# Define SST core options +sst.setProgramOption("timebase", "1ps") +sst.setProgramOption("stop-at", "0 ns") + +# Tell SST what statistics handling we want +sst.setStatisticLoadLevel(4) +sst.setStatisticOutput("sst.statOutputConsole") + +full_exe_name = "../../vanadis/tests/small/basic-math/sqrt-double/riscv64/sqrt-double"#os.getenv("VANADIS_EXE", "./small/" + testDir + "/" + exe + "/" + isa + "/" + exe ) +exe_name= full_exe_name.split("/")[-1] + +verbosity = int(os.getenv("VANADIS_VERBOSE", 0)) +os_verbosity = os.getenv("VANADIS_OS_VERBOSE", verbosity) +pipe_trace_file = os.getenv("VANADIS_PIPE_TRACE", "") +lsq_ld_entries = os.getenv("VANADIS_LSQ_LD_ENTRIES", 16) +lsq_st_entries = os.getenv("VANADIS_LSQ_ST_ENTRIES", 8) + +rob_slots = os.getenv("VANADIS_ROB_SLOTS", 64) +retires_per_cycle = os.getenv("VANADIS_RETIRES_PER_CYCLE", 4) +issues_per_cycle = os.getenv("VANADIS_ISSUES_PER_CYCLE", 4) +decodes_per_cycle = os.getenv("VANADIS_DECODES_PER_CYCLE", 4) + +integer_arith_cycles = int(os.getenv("VANADIS_INTEGER_ARITH_CYCLES", 2)) +integer_arith_units = int(os.getenv("VANADIS_INTEGER_ARITH_UNITS", 2)) +fp_arith_cycles = int(os.getenv("VANADIS_FP_ARITH_CYCLES", 8)) +fp_arith_units = int(os.getenv("VANADIS_FP_ARITH_UNITS", 2)) +branch_arith_cycles = int(os.getenv("VANADIS_BRANCH_ARITH_CYCLES", 2)) + +cpu_clock = os.getenv("VANADIS_CPU_CLOCK", "2.3GHz") + +numCpus = int(os.getenv("VANADIS_NUM_CORES", 1)) +numThreads = int(os.getenv("VANADIS_NUM_HW_THREADS", 1)) + +vanadis_cpu_type = "vanadis." +vanadis_cpu_type += os.getenv("VANADIS_CPU_ELEMENT_NAME","dbg_VanadisCPU") + +if (verbosity > 0): + print("Verbosity: " + str(verbosity) + " -> loading Vanadis CPU type: " + vanadis_cpu_type) + print("Auto-clock syscalls: " + str(auto_clock_sys)) +# vanadis_cpu_type = "vanadisdbg.VanadisCPU" + +app_args = os.getenv("VANADIS_EXE_ARGS", "") + +app_params = {} +if app_args != "": + app_args_list = app_args.split(" ") + # We have a plus 1 because the executable name is arg0 + app_args_count = len( app_args_list ) + 1 + + app_params["argc"] = app_args_count + + if (verbosity > 0): + print("Identified " + str(app_args_count) + " application arguments, adding to input parameters.") + arg_start = 1 + for next_arg in app_args_list: + if (verbosity > 0): + print("arg" + str(arg_start) + " = " + next_arg) + app_params["arg" + str(arg_start)] = next_arg + arg_start = arg_start + 1 +else: + app_params["argc"] = 1 + if (verbosity > 0): + print("No application arguments found, continuing with argc=1") + +vanadis_decoder = "vanadis.Vanadis" + vanadis_isa + "Decoder" +vanadis_os_hdlr = "vanadis.Vanadis" + vanadis_isa + "OSHandler" + + +protocol="MESI" + +# OS related params +osParams = { + "processDebugLevel" : 0, + "dbgLevel" : os_verbosity, + "dbgMask" : 8, + "cores" : numCpus, + "hardwareThreadCount" : numThreads, + "page_size" : 4096, + "physMemSize" : physMemSize, + "useMMU" : True, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +processList = ( + ( 1, { + "env_count" : 1, + "env0" : "OMP_NUM_THREADS={}".format(numCpus*numThreads), + "exe" : full_exe_name, + "arg0" : exe_name, + } ), +) + +processList[0][1].update(app_params) + +osl1cacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +mmuParams = { + "debug_level": 0, + "num_cores": numCpus, + "num_threads": numThreads, + "page_size": 4096, +} + +memRtrParams ={ + "xbar_bw" : "1GB/s", + "link_bw" : "1GB/s", + "input_buf_size" : "2KB", + "num_ports" : str(numCpus+2), + "flit_size" : "72B", + "output_buf_size" : "2KB", + "id" : "0", + "topology" : "merlin.singlerouter" +} + +dirCtrlParams = { + "coherence_protocol" : protocol, + "entry_cache_size" : "1024", + "debug" : mh_debug, + "debug_level" : mh_debug_level, + "addr_range_start" : "0x0", + "addr_range_end" : "0xFFFFFFFF" +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 2, +} + +memCtrlParams = { + "clock" : cpu_clock, + "backend.mem_size" : physMemSize, + "backing" : "malloc", + "initBacking": 1, + "addr_range_start": 0, + "addr_range_end": 0xffffffff, + "debug_level" : mh_debug_level, + "debug" : mh_debug, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +memParams = { + "mem_size" : "4GiB", + "access_time" : "1 ns" +} + +# CPU related params +tlbParams = { + "debug_level": 0, + "hitLatency": 1, + "num_hardware_threads": numThreads, + "num_tlb_entries_per_thread": 64, + "tlb_set_size": 4, +} + +tlbWrapperParams = { + "debug_level": 0, +} + +decoderParams = { + "loader_mode" : loader_mode, + "uop_cache_entries" : 1536, + "predecode_cache_entries" : 4 +} + +osHdlrParams = { } + +branchPredParams = { + "branch_entries" : 32 +} + +cpuParams = { + "clock" : cpu_clock, + "verbose" : verbosity, + "hardware_threads": numThreads, + "physical_fp_registers" : 168 * numThreads, + "physical_integer_registers" : 180 * numThreads, + "integer_arith_cycles" : integer_arith_cycles, + "integer_arith_units" : integer_arith_units, + "fp_arith_cycles" : fp_arith_cycles, + "fp_arith_units" : fp_arith_units, + "branch_unit_cycles" : branch_arith_cycles, + "print_int_reg" : False, + "print_fp_reg" : False, + "pipeline_trace_file" : pipe_trace_file, + "reorder_slots" : rob_slots, + "decodes_per_cycle" : decodes_per_cycle, + "issues_per_cycle" : issues_per_cycle, + "retires_per_cycle" : retires_per_cycle, + "pause_when_retire_address" : os.getenv("VANADIS_HALT_AT_ADDRESS", 0), + "start_verbose_when_issue_address": dbgAddr, + "stop_verbose_when_retire_address": stopDbg, + "print_rob" : False, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +lsqParams = { + "verbose" : verbosity, + "address_mask" : 0xFFFFFFFF, + "max_stores" : lsq_st_entries, + "max_loads" : lsq_ld_entries, +} + +l1dcacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l1icacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "prefetcher" : "cassini.NextBlockPrefetcher", + "prefetcher.reach" : 1, + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l2cacheParams = { + "access_latency_cycles" : "14", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "16", + "cache_line_size" : "64", + "cache_size" : "1MB", + "mshr_latency_cycles": 3, + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} +busParams = { + "bus_frequency" : cpu_clock, +} + +l2memLinkParams = { + "group" : 1, + "network_bw" : "25GB/s" +} + +class CPU_Builder: + def __init__(self): + pass + + # CPU + def build( self, prefix, nodeId, cpuId ): + + if pythonDebug: + print("build {}".format(prefix) ) + + # CPU + cpu = sst.Component(prefix, vanadis_cpu_type) + cpu.addParams( cpuParams ) + cpu.addParam( "core_id", cpuId ) + cpu.enableAllStatistics() + + # CPU.decoder + for n in range(numThreads): + decode = cpu.setSubComponent( "decoder"+str(n), vanadis_decoder ) + decode.addParams( decoderParams ) + + decode.enableAllStatistics() + + # CPU.decoder.osHandler + os_hdlr = decode.setSubComponent( "os_handler", vanadis_os_hdlr ) + os_hdlr.addParams( osHdlrParams ) + + # CPU.decocer.branch_pred + branch_pred = decode.setSubComponent( "branch_unit", "vanadis.VanadisBasicBranchUnit" ) + branch_pred.addParams( branchPredParams ) + branch_pred.enableAllStatistics() + + # CPU.lsq + cpu_lsq = cpu.setSubComponent( "lsq", "vanadis.VanadisBasicLoadStoreQueue" ) + cpu_lsq.addParams(lsqParams) + cpu_lsq.enableAllStatistics() + + # CPU.lsq mem interface which connects to D-cache + cpuDcacheIf = cpu_lsq.setSubComponent( "memory_interface", "memHierarchy.standardInterface" ) + + # CPU.mem interface for I-cache + cpuIcacheIf = cpu.setSubComponent( "mem_interface_inst", "memHierarchy.standardInterface" ) + + # L1 D-cache + cpu_l1dcache = sst.Component(prefix + ".l1dcache", "memHierarchy.Cache") + cpu_l1dcache.addParams( l1dcacheParams ) + + # L2 I-cache + cpu_l1icache = sst.Component( prefix + ".l1icache", "memHierarchy.Cache") + cpu_l1icache.addParams( l1icacheParams ) + + # L2 cache + cpu_l2cache = sst.Component(prefix+".l2cache", "memHierarchy.Cache") + cpu_l2cache.addParams( l2cacheParams ) + + # L2 cache mem interface + l2cache_2_mem = cpu_l2cache.setSubComponent("lowlink", "memHierarchy.MemNIC") + l2cache_2_mem.addParams( l2memLinkParams ) + + # L1 to L2 buss + cache_bus = sst.Component(prefix+".bus", "memHierarchy.Bus") + cache_bus.addParams(busParams) + + # CPU data TLB + dtlbWrapper = sst.Component(prefix+".dtlb", "mmu.tlb_wrapper") + dtlbWrapper.addParams(tlbWrapperParams) +# dtlbWrapper.addParam( "debug_level", 0) + dtlb = dtlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + dtlb.addParams(tlbParams) + + # CPU instruction TLB + itlbWrapper = sst.Component(prefix+".itlb", "mmu.tlb_wrapper") + itlbWrapper.addParams(tlbWrapperParams) +# itlbWrapper.addParam( "debug_level", 0) + itlbWrapper.addParam("exe",True) + itlb = itlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + itlb.addParams(tlbParams) + + # CPU (data) -> TLB -> Cache + link_cpu_dtlb_link = sst.Link(prefix+".link_cpu_dtlb_link") + link_cpu_dtlb_link.connect( (cpuDcacheIf, "lowlink", "1ns"), (dtlbWrapper, "cpu_if", "1ns") ) + link_cpu_dtlb_link.setNoCut() + + # data TLB -> data L1 + link_cpu_l1dcache_link = sst.Link(prefix+".link_cpu_l1dcache_link") + link_cpu_l1dcache_link.connect( (dtlbWrapper, "cache_if", "1ns"), (cpu_l1dcache, "highlink", "1ns") ) + link_cpu_l1dcache_link.setNoCut() + + # CPU (instruction) -> TLB -> Cache + link_cpu_itlb_link = sst.Link(prefix+".link_cpu_itlb_link") + link_cpu_itlb_link.connect( (cpuIcacheIf, "lowlink", "1ns"), (itlbWrapper, "cpu_if", "1ns") ) + link_cpu_itlb_link.setNoCut() + + # instruction TLB -> instruction L1 + link_cpu_l1icache_link = sst.Link(prefix+".link_cpu_l1icache_link") + link_cpu_l1icache_link.connect( (itlbWrapper, "cache_if", "1ns"), (cpu_l1icache, "highlink", "1ns") ) + link_cpu_l1icache_link.setNoCut(); + + # data L1 -> bus + link_l1dcache_l2cache_link = sst.Link(prefix+".link_l1dcache_l2cache_link") + link_l1dcache_l2cache_link.connect( (cpu_l1dcache, "lowlink", "1ns"), (cache_bus, "highlink0", "1ns") ) + link_l1dcache_l2cache_link.setNoCut() + + # instruction L1 -> bus + link_l1icache_l2cache_link = sst.Link(prefix+".link_l1icache_l2cache_link") + link_l1icache_l2cache_link.connect( (cpu_l1icache, "lowlink", "1ns"), (cache_bus, "highlink1", "1ns") ) + link_l1icache_l2cache_link.setNoCut() + + # BUS to L2 cache + link_bus_l2cache_link = sst.Link(prefix+".link_bus_l2cache_link") + link_bus_l2cache_link.connect( (cache_bus, "lowlink0", "1ns"), (cpu_l2cache, "highlink", "1ns") ) + link_bus_l2cache_link.setNoCut() + + return (cpu, "os_link", "5ns"), (l2cache_2_mem, "port", "1ns") , (dtlb, "mmu", "1ns"), (itlb, "mmu", "1ns") + + +def addParamsPrefix(prefix,params): + #print( prefix ) + ret = {} + for key, value in params.items(): + #print( key, value ) + ret[ prefix + "." + key] = value + + #print( ret ) + return ret + +# node OS +node_os = sst.Component("os", "vanadis.VanadisNodeOS") +node_os.addParams(osParams) + +num=0 +for i,process in processList: + #print( process ) + for y in range(i): + #print( "process", num ) + node_os.addParams( addParamsPrefix( "process" + str(num), process ) ) + num+=1 + +if pythonDebug: + print('total hardware threads ' + str(num) ) + +# node OS MMU +node_os_mmu = node_os.setSubComponent( "mmu", "mmu." + mmuType ) +node_os_mmu.addParams(mmuParams) + +# node OS memory interface to L1 data cache +node_os_mem_if = node_os.setSubComponent( "mem_interface", "memHierarchy.standardInterface" ) + +# node OS l1 data cache +os_cache = sst.Component("node_os.cache", "memHierarchy.Cache") +os_cache.addParams(osl1cacheParams) +os_cache_2_mem = os_cache.setSubComponent("lowlink", "memHierarchy.MemNIC") +os_cache_2_mem.addParams( l2memLinkParams ) + +# node memory router +comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router") +comp_chiprtr.addParams(memRtrParams) +comp_chiprtr.setSubComponent("topology","merlin.singlerouter") + +# node directory controller +dirctrl = sst.Component("dirctrl", "memHierarchy.DirectoryController") +dirctrl.addParams(dirCtrlParams) + +# node directory controller port to cpu +dirNIC = dirctrl.setSubComponent("highlink", "memHierarchy.MemNIC") +dirNIC.addParams(dirNicParams) + +# node memory controller +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.addParams( memCtrlParams ) +# SHOULD FAIL TO INITIALIZE +memctrl.addPortModule("highlink", "carcosa.StuckAtFaultInjector", { + "install_direction": "Receive", + "masks": ["4D88, 3, 11111111, 11111111"], + "debug" : 1, + "debug_level": 2 +}) + +# node memory controller backend +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams(memParams) + +# node OS data TLB +#ostlbWrapper = sst.Component("ostlb", "mmu.tlb_wrapper") +#ostlbWrapper.addParams(tlbWrapperParams) +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu.passThroughTLB" ); +#ostlb.addParams(tlbParams) + +# OS (data) -> TLB -> Cache +#link_os_ostlb_link = sst.Link("link_os_ostlb_link") +#link_os_ostlb_link.connect( (node_os_mem_if, "lowlink", "1ns"), (ostlbWrapper, "cpu_if", "1ns") ) + +# Directory controller to memory router +link_dir_2_rtr = sst.Link("link_dir_2_rtr") +link_dir_2_rtr.connect( (comp_chiprtr, "port"+str(numCpus), "1ns"), (dirNIC, "port", "1ns") ) +link_dir_2_rtr.setNoCut() + +# Directory controller to memory controller +link_dir_2_mem = sst.Link("link_dir_2_mem") +link_dir_2_mem.connect( (dirctrl, "lowlink", "1ns"), (memctrl, "highlink", "1ns") ) +link_dir_2_mem.setNoCut() + +# MMU -> ostlb +# don't need when using pass through TLB +#link_mmu_ostlb_link = sst.Link("link_mmu_ostlb_link") +#link_mmu_ostlb_link.connect( (node_os_mmu, "ostlb", "1ns"), (ostlb, "mmu", "1ns") ) + +# ostlb -> os l1 cache +link_os_cache_link = sst.Link("link_os_cache_link") +#link_os_cache_link.connect( (ostlbWrapper, "cache_if", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.connect( (node_os_mem_if, "lowlink", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.setNoCut() + +os_cache_2_rtr = sst.Link("os_cache_2_rtr") +os_cache_2_rtr.connect( (os_cache_2_mem, "port", "1ns"), (comp_chiprtr, "port"+str(numCpus+1), "1ns") ) +os_cache_2_rtr.setNoCut() + +cpuBuilder = CPU_Builder() + +# build all CPUs +nodeId = 0 +for cpu in range(numCpus): + + prefix="node" + str(nodeId) + ".cpu" + str(cpu) + os_hdlr, l2cache, dtlb, itlb = cpuBuilder.build(prefix, nodeId, cpu) + + # MMU -> dtlb + link_mmu_dtlb_link = sst.Link(prefix + ".link_mmu_dtlb_link") + link_mmu_dtlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".dtlb", "1ns"), dtlb ) + + # MMU -> itlb + link_mmu_itlb_link = sst.Link(prefix + ".link_mmu_itlb_link") + link_mmu_itlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".itlb", "1ns"), itlb ) + + # CPU os handler -> node OS + link_core_os_link = sst.Link(prefix + ".link_core_os_link") + link_core_os_link.connect( os_hdlr, (node_os, "core" + str(cpu), "5ns") ) + + # connect cpu L2 to router + link_l2cache_2_rtr = sst.Link(prefix + ".link_l2cache_2_rtr") + link_l2cache_2_rtr.connect( l2cache, (comp_chiprtr, "port" + str(cpu), "1ns") ) + diff --git a/src/sst/elements/carcosa/tests/testStuckAtSameByte.py b/src/sst/elements/carcosa/tests/testStuckAtSameByte.py new file mode 100644 index 0000000000..11b7b86987 --- /dev/null +++ b/src/sst/elements/carcosa/tests/testStuckAtSameByte.py @@ -0,0 +1,571 @@ +import os +import sst +mh_debug_level=10 +mh_debug=0 +# this has to be a string +dbgAddr="0" +stopDbg="0" + +checkpointDir = "" +checkpoint = "" + +#checkpointDir = "checkpoint0" +#checkpoint = "load" +#checkpoint = "save" + +pythonDebug=False + +vanadis_isa = os.getenv("VANADIS_ISA", "MIPS") +isa="mipsel" +vanadis_isa = os.getenv("VANADIS_ISA", "RISCV64") +isa="riscv64" + +loader_mode = os.getenv("VANADIS_LOADER_MODE", "0") + +testDir="basic-io" +exe = "hello-world" +#exe = "hello-world-cpp" +#exe = "openat" +#exe = "printf-check" +#exe = "read-write" +#exe = "fread-fwrite" +#exe = "unlink" +#exe = "unlinkat" +#exe = "lseek" + +#testDir = "basic-math" +#exe = "sqrt-double" +#exe = "sqrt-float" + +#testDir = "basic-ops" +#exe = "test-branch" +#exe = "test-shift" + +#testDir = "misc" +#exe = "mt-dgemm" +#exe = "stream" +#exe = "stream-fortran" +#exe = "gettime" +#exe = "splitLoad" +#exe = "fork" +#exe = "clone" +#exe = "pthread" +#exe = "openmp" +#exe = "openmp2" +#exe = "uname" +#exe = "mem-test" +#exe = "checkpoint" + +physMemSize = "4GiB" + +tlbType = "simpleTLB" +mmuType = "simpleMMU" + +# Define SST core options +sst.setProgramOption("timebase", "1ps") +sst.setProgramOption("stop-at", "0 ns") + +# Tell SST what statistics handling we want +sst.setStatisticLoadLevel(4) +sst.setStatisticOutput("sst.statOutputConsole") + +full_exe_name = "../../vanadis/tests/small/basic-math/sqrt-double/riscv64/sqrt-double"#os.getenv("VANADIS_EXE", "./small/" + testDir + "/" + exe + "/" + isa + "/" + exe ) +exe_name= full_exe_name.split("/")[-1] + +verbosity = int(os.getenv("VANADIS_VERBOSE", 0)) +os_verbosity = os.getenv("VANADIS_OS_VERBOSE", verbosity) +pipe_trace_file = os.getenv("VANADIS_PIPE_TRACE", "") +lsq_ld_entries = os.getenv("VANADIS_LSQ_LD_ENTRIES", 16) +lsq_st_entries = os.getenv("VANADIS_LSQ_ST_ENTRIES", 8) + +rob_slots = os.getenv("VANADIS_ROB_SLOTS", 64) +retires_per_cycle = os.getenv("VANADIS_RETIRES_PER_CYCLE", 4) +issues_per_cycle = os.getenv("VANADIS_ISSUES_PER_CYCLE", 4) +decodes_per_cycle = os.getenv("VANADIS_DECODES_PER_CYCLE", 4) + +integer_arith_cycles = int(os.getenv("VANADIS_INTEGER_ARITH_CYCLES", 2)) +integer_arith_units = int(os.getenv("VANADIS_INTEGER_ARITH_UNITS", 2)) +fp_arith_cycles = int(os.getenv("VANADIS_FP_ARITH_CYCLES", 8)) +fp_arith_units = int(os.getenv("VANADIS_FP_ARITH_UNITS", 2)) +branch_arith_cycles = int(os.getenv("VANADIS_BRANCH_ARITH_CYCLES", 2)) + +cpu_clock = os.getenv("VANADIS_CPU_CLOCK", "2.3GHz") + +numCpus = int(os.getenv("VANADIS_NUM_CORES", 1)) +numThreads = int(os.getenv("VANADIS_NUM_HW_THREADS", 1)) + +vanadis_cpu_type = "vanadis." +vanadis_cpu_type += os.getenv("VANADIS_CPU_ELEMENT_NAME","dbg_VanadisCPU") + +if (verbosity > 0): + print("Verbosity: " + str(verbosity) + " -> loading Vanadis CPU type: " + vanadis_cpu_type) + print("Auto-clock syscalls: " + str(auto_clock_sys)) +# vanadis_cpu_type = "vanadisdbg.VanadisCPU" + +app_args = os.getenv("VANADIS_EXE_ARGS", "") + +app_params = {} +if app_args != "": + app_args_list = app_args.split(" ") + # We have a plus 1 because the executable name is arg0 + app_args_count = len( app_args_list ) + 1 + + app_params["argc"] = app_args_count + + if (verbosity > 0): + print("Identified " + str(app_args_count) + " application arguments, adding to input parameters.") + arg_start = 1 + for next_arg in app_args_list: + if (verbosity > 0): + print("arg" + str(arg_start) + " = " + next_arg) + app_params["arg" + str(arg_start)] = next_arg + arg_start = arg_start + 1 +else: + app_params["argc"] = 1 + if (verbosity > 0): + print("No application arguments found, continuing with argc=1") + +vanadis_decoder = "vanadis.Vanadis" + vanadis_isa + "Decoder" +vanadis_os_hdlr = "vanadis.Vanadis" + vanadis_isa + "OSHandler" + + +protocol="MESI" + +# OS related params +osParams = { + "processDebugLevel" : 0, + "dbgLevel" : os_verbosity, + "dbgMask" : 8, + "cores" : numCpus, + "hardwareThreadCount" : numThreads, + "page_size" : 4096, + "physMemSize" : physMemSize, + "useMMU" : True, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +processList = ( + ( 1, { + "env_count" : 1, + "env0" : "OMP_NUM_THREADS={}".format(numCpus*numThreads), + "exe" : full_exe_name, + "arg0" : exe_name, + } ), +) + +processList[0][1].update(app_params) + +osl1cacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +mmuParams = { + "debug_level": 0, + "num_cores": numCpus, + "num_threads": numThreads, + "page_size": 4096, +} + +memRtrParams ={ + "xbar_bw" : "1GB/s", + "link_bw" : "1GB/s", + "input_buf_size" : "2KB", + "num_ports" : str(numCpus+2), + "flit_size" : "72B", + "output_buf_size" : "2KB", + "id" : "0", + "topology" : "merlin.singlerouter" +} + +dirCtrlParams = { + "coherence_protocol" : protocol, + "entry_cache_size" : "1024", + "debug" : mh_debug, + "debug_level" : mh_debug_level, + "addr_range_start" : "0x0", + "addr_range_end" : "0xFFFFFFFF" +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 2, +} + +memCtrlParams = { + "clock" : cpu_clock, + "backend.mem_size" : physMemSize, + "backing" : "malloc", + "initBacking": 1, + "addr_range_start": 0, + "addr_range_end": 0xffffffff, + "debug_level" : mh_debug_level, + "debug" : mh_debug, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +memParams = { + "mem_size" : "4GiB", + "access_time" : "1 ns" +} + +# CPU related params +tlbParams = { + "debug_level": 0, + "hitLatency": 1, + "num_hardware_threads": numThreads, + "num_tlb_entries_per_thread": 64, + "tlb_set_size": 4, +} + +tlbWrapperParams = { + "debug_level": 0, +} + +decoderParams = { + "loader_mode" : loader_mode, + "uop_cache_entries" : 1536, + "predecode_cache_entries" : 4 +} + +osHdlrParams = { } + +branchPredParams = { + "branch_entries" : 32 +} + +cpuParams = { + "clock" : cpu_clock, + "verbose" : verbosity, + "hardware_threads": numThreads, + "physical_fp_registers" : 168 * numThreads, + "physical_integer_registers" : 180 * numThreads, + "integer_arith_cycles" : integer_arith_cycles, + "integer_arith_units" : integer_arith_units, + "fp_arith_cycles" : fp_arith_cycles, + "fp_arith_units" : fp_arith_units, + "branch_unit_cycles" : branch_arith_cycles, + "print_int_reg" : False, + "print_fp_reg" : False, + "pipeline_trace_file" : pipe_trace_file, + "reorder_slots" : rob_slots, + "decodes_per_cycle" : decodes_per_cycle, + "issues_per_cycle" : issues_per_cycle, + "retires_per_cycle" : retires_per_cycle, + "pause_when_retire_address" : os.getenv("VANADIS_HALT_AT_ADDRESS", 0), + "start_verbose_when_issue_address": dbgAddr, + "stop_verbose_when_retire_address": stopDbg, + "print_rob" : False, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +lsqParams = { + "verbose" : verbosity, + "address_mask" : 0xFFFFFFFF, + "max_stores" : lsq_st_entries, + "max_loads" : lsq_ld_entries, +} + +l1dcacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l1icacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "prefetcher" : "cassini.NextBlockPrefetcher", + "prefetcher.reach" : 1, + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l2cacheParams = { + "access_latency_cycles" : "14", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "16", + "cache_line_size" : "64", + "cache_size" : "1MB", + "mshr_latency_cycles": 3, + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} +busParams = { + "bus_frequency" : cpu_clock, +} + +l2memLinkParams = { + "group" : 1, + "network_bw" : "25GB/s" +} + +class CPU_Builder: + def __init__(self): + pass + + # CPU + def build( self, prefix, nodeId, cpuId ): + + if pythonDebug: + print("build {}".format(prefix) ) + + # CPU + cpu = sst.Component(prefix, vanadis_cpu_type) + cpu.addParams( cpuParams ) + cpu.addParam( "core_id", cpuId ) + cpu.enableAllStatistics() + + # CPU.decoder + for n in range(numThreads): + decode = cpu.setSubComponent( "decoder"+str(n), vanadis_decoder ) + decode.addParams( decoderParams ) + + decode.enableAllStatistics() + + # CPU.decoder.osHandler + os_hdlr = decode.setSubComponent( "os_handler", vanadis_os_hdlr ) + os_hdlr.addParams( osHdlrParams ) + + # CPU.decocer.branch_pred + branch_pred = decode.setSubComponent( "branch_unit", "vanadis.VanadisBasicBranchUnit" ) + branch_pred.addParams( branchPredParams ) + branch_pred.enableAllStatistics() + + # CPU.lsq + cpu_lsq = cpu.setSubComponent( "lsq", "vanadis.VanadisBasicLoadStoreQueue" ) + cpu_lsq.addParams(lsqParams) + cpu_lsq.enableAllStatistics() + + # CPU.lsq mem interface which connects to D-cache + cpuDcacheIf = cpu_lsq.setSubComponent( "memory_interface", "memHierarchy.standardInterface" ) + + # CPU.mem interface for I-cache + cpuIcacheIf = cpu.setSubComponent( "mem_interface_inst", "memHierarchy.standardInterface" ) + + # L1 D-cache + cpu_l1dcache = sst.Component(prefix + ".l1dcache", "memHierarchy.Cache") + cpu_l1dcache.addParams( l1dcacheParams ) + + # L2 I-cache + cpu_l1icache = sst.Component( prefix + ".l1icache", "memHierarchy.Cache") + cpu_l1icache.addParams( l1icacheParams ) + + # L2 cache + cpu_l2cache = sst.Component(prefix+".l2cache", "memHierarchy.Cache") + cpu_l2cache.addParams( l2cacheParams ) + + # L2 cache mem interface + l2cache_2_mem = cpu_l2cache.setSubComponent("lowlink", "memHierarchy.MemNIC") + l2cache_2_mem.addParams( l2memLinkParams ) + + # L1 to L2 buss + cache_bus = sst.Component(prefix+".bus", "memHierarchy.Bus") + cache_bus.addParams(busParams) + + # CPU data TLB + dtlbWrapper = sst.Component(prefix+".dtlb", "mmu.tlb_wrapper") + dtlbWrapper.addParams(tlbWrapperParams) +# dtlbWrapper.addParam( "debug_level", 0) + dtlb = dtlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + dtlb.addParams(tlbParams) + + # CPU instruction TLB + itlbWrapper = sst.Component(prefix+".itlb", "mmu.tlb_wrapper") + itlbWrapper.addParams(tlbWrapperParams) +# itlbWrapper.addParam( "debug_level", 0) + itlbWrapper.addParam("exe",True) + itlb = itlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + itlb.addParams(tlbParams) + + # CPU (data) -> TLB -> Cache + link_cpu_dtlb_link = sst.Link(prefix+".link_cpu_dtlb_link") + link_cpu_dtlb_link.connect( (cpuDcacheIf, "lowlink", "1ns"), (dtlbWrapper, "cpu_if", "1ns") ) + link_cpu_dtlb_link.setNoCut() + + # data TLB -> data L1 + link_cpu_l1dcache_link = sst.Link(prefix+".link_cpu_l1dcache_link") + link_cpu_l1dcache_link.connect( (dtlbWrapper, "cache_if", "1ns"), (cpu_l1dcache, "highlink", "1ns") ) + link_cpu_l1dcache_link.setNoCut() + + # CPU (instruction) -> TLB -> Cache + link_cpu_itlb_link = sst.Link(prefix+".link_cpu_itlb_link") + link_cpu_itlb_link.connect( (cpuIcacheIf, "lowlink", "1ns"), (itlbWrapper, "cpu_if", "1ns") ) + link_cpu_itlb_link.setNoCut() + + # instruction TLB -> instruction L1 + link_cpu_l1icache_link = sst.Link(prefix+".link_cpu_l1icache_link") + link_cpu_l1icache_link.connect( (itlbWrapper, "cache_if", "1ns"), (cpu_l1icache, "highlink", "1ns") ) + link_cpu_l1icache_link.setNoCut(); + + # data L1 -> bus + link_l1dcache_l2cache_link = sst.Link(prefix+".link_l1dcache_l2cache_link") + link_l1dcache_l2cache_link.connect( (cpu_l1dcache, "lowlink", "1ns"), (cache_bus, "highlink0", "1ns") ) + link_l1dcache_l2cache_link.setNoCut() + + # instruction L1 -> bus + link_l1icache_l2cache_link = sst.Link(prefix+".link_l1icache_l2cache_link") + link_l1icache_l2cache_link.connect( (cpu_l1icache, "lowlink", "1ns"), (cache_bus, "highlink1", "1ns") ) + link_l1icache_l2cache_link.setNoCut() + + # BUS to L2 cache + link_bus_l2cache_link = sst.Link(prefix+".link_bus_l2cache_link") + link_bus_l2cache_link.connect( (cache_bus, "lowlink0", "1ns"), (cpu_l2cache, "highlink", "1ns") ) + link_bus_l2cache_link.setNoCut() + + return (cpu, "os_link", "5ns"), (l2cache_2_mem, "port", "1ns") , (dtlb, "mmu", "1ns"), (itlb, "mmu", "1ns") + + +def addParamsPrefix(prefix,params): + #print( prefix ) + ret = {} + for key, value in params.items(): + #print( key, value ) + ret[ prefix + "." + key] = value + + #print( ret ) + return ret + +# node OS +node_os = sst.Component("os", "vanadis.VanadisNodeOS") +node_os.addParams(osParams) + +num=0 +for i,process in processList: + #print( process ) + for y in range(i): + #print( "process", num ) + node_os.addParams( addParamsPrefix( "process" + str(num), process ) ) + num+=1 + +if pythonDebug: + print('total hardware threads ' + str(num) ) + +# node OS MMU +node_os_mmu = node_os.setSubComponent( "mmu", "mmu." + mmuType ) +node_os_mmu.addParams(mmuParams) + +# node OS memory interface to L1 data cache +node_os_mem_if = node_os.setSubComponent( "mem_interface", "memHierarchy.standardInterface" ) + +# node OS l1 data cache +os_cache = sst.Component("node_os.cache", "memHierarchy.Cache") +os_cache.addParams(osl1cacheParams) +os_cache_2_mem = os_cache.setSubComponent("lowlink", "memHierarchy.MemNIC") +os_cache_2_mem.addParams( l2memLinkParams ) + +# node memory router +comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router") +comp_chiprtr.addParams(memRtrParams) +comp_chiprtr.setSubComponent("topology","merlin.singlerouter") + +# node directory controller +dirctrl = sst.Component("dirctrl", "memHierarchy.DirectoryController") +dirctrl.addParams(dirCtrlParams) + +# node directory controller port to cpu +dirNIC = dirctrl.setSubComponent("highlink", "memHierarchy.MemNIC") +dirNIC.addParams(dirNicParams) + +# node memory controller +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.addParams( memCtrlParams ) +# SHOULD FAIL TO INITIALIZE +memctrl.addPortModule("highlink", "carcosa.StuckAtFaultInjector", { + "install_direction": "Receive", + "masks": ["4D88, 3, 11110000, 00001111", "4D88, 5, 11110000, 00001111"], + "debug" : 1, + "debug_level": 2 +}) + +# node memory controller backend +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams(memParams) + +# node OS data TLB +#ostlbWrapper = sst.Component("ostlb", "mmu.tlb_wrapper") +#ostlbWrapper.addParams(tlbWrapperParams) +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu.passThroughTLB" ); +#ostlb.addParams(tlbParams) + +# OS (data) -> TLB -> Cache +#link_os_ostlb_link = sst.Link("link_os_ostlb_link") +#link_os_ostlb_link.connect( (node_os_mem_if, "lowlink", "1ns"), (ostlbWrapper, "cpu_if", "1ns") ) + +# Directory controller to memory router +link_dir_2_rtr = sst.Link("link_dir_2_rtr") +link_dir_2_rtr.connect( (comp_chiprtr, "port"+str(numCpus), "1ns"), (dirNIC, "port", "1ns") ) +link_dir_2_rtr.setNoCut() + +# Directory controller to memory controller +link_dir_2_mem = sst.Link("link_dir_2_mem") +link_dir_2_mem.connect( (dirctrl, "lowlink", "1ns"), (memctrl, "highlink", "1ns") ) +link_dir_2_mem.setNoCut() + +# MMU -> ostlb +# don't need when using pass through TLB +#link_mmu_ostlb_link = sst.Link("link_mmu_ostlb_link") +#link_mmu_ostlb_link.connect( (node_os_mmu, "ostlb", "1ns"), (ostlb, "mmu", "1ns") ) + +# ostlb -> os l1 cache +link_os_cache_link = sst.Link("link_os_cache_link") +#link_os_cache_link.connect( (ostlbWrapper, "cache_if", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.connect( (node_os_mem_if, "lowlink", "1ns"), (os_cache, "highlink", "1ns") ) +link_os_cache_link.setNoCut() + +os_cache_2_rtr = sst.Link("os_cache_2_rtr") +os_cache_2_rtr.connect( (os_cache_2_mem, "port", "1ns"), (comp_chiprtr, "port"+str(numCpus+1), "1ns") ) +os_cache_2_rtr.setNoCut() + +cpuBuilder = CPU_Builder() + +# build all CPUs +nodeId = 0 +for cpu in range(numCpus): + + prefix="node" + str(nodeId) + ".cpu" + str(cpu) + os_hdlr, l2cache, dtlb, itlb = cpuBuilder.build(prefix, nodeId, cpu) + + # MMU -> dtlb + link_mmu_dtlb_link = sst.Link(prefix + ".link_mmu_dtlb_link") + link_mmu_dtlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".dtlb", "1ns"), dtlb ) + + # MMU -> itlb + link_mmu_itlb_link = sst.Link(prefix + ".link_mmu_itlb_link") + link_mmu_itlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".itlb", "1ns"), itlb ) + + # CPU os handler -> node OS + link_core_os_link = sst.Link(prefix + ".link_core_os_link") + link_core_os_link.connect( os_hdlr, (node_os, "core" + str(cpu), "5ns") ) + + # connect cpu L2 to router + link_l2cache_2_rtr = sst.Link(prefix + ".link_l2cache_2_rtr") + link_l2cache_2_rtr.connect( l2cache, (comp_chiprtr, "port" + str(cpu), "1ns") ) +