Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
549eb05
initial commit with barebones header on new element
mgoldstein322 May 8, 2025
62f6f4d
add files necessary for compilation
mgoldstein322 May 8, 2025
71be5f9
fix some basic syntax issues
mgoldstein322 May 8, 2025
2a156b3
create impl file for baseline injector
mgoldstein322 May 8, 2025
84f2e18
add enum for basic logic selection
mgoldstein322 May 8, 2025
d0fef34
block out empty basic fault functions
mgoldstein322 May 22, 2025
c307c1f
split fault logic code off into subclasses -- only stuck-at is curren…
mgoldstein322 Jul 3, 2025
aad5d2a
add another layer of abstraction so that the fault itself is its own …
mgoldstein322 Jul 3, 2025
491038a
add todo note to faultBase.h
mgoldstein322 Jul 3, 2025
2aa8bba
fix compile errors NOT arising from serialization
mgoldstein322 Sep 4, 2025
9e763fb
sidestep serialization compile issue
mgoldstein322 Sep 4, 2025
43184d4
I have defeated the compiler
mgoldstein322 Sep 5, 2025
8aae820
use correct memEvent and add some base logic -- need to figure out ho…
mgoldstein322 Sep 5, 2025
1f03f65
prototype stuckAtFault written--currently untested (and very ugly)
mgoldstein322 Sep 11, 2025
a55a254
something is DEFINITELY broken here
mgoldstein322 Sep 15, 2025
ddcfc2e
stuckAtFault appears functional -- testing likely required, and there…
mgoldstein322 Sep 16, 2025
c7e44a2
corruptMemRegion written
mgoldstein322 Sep 18, 2025
072837f
COMPILER DEFEATED
mgoldstein322 Sep 18, 2025
7338bb6
randomFlip files added
mgoldstein322 Sep 29, 2025
fe572d7
fix faultlogic parameters on randomFlip
mgoldstein322 Sep 29, 2025
7616b28
all basic faults built
mgoldstein322 Oct 2, 2025
a04017a
add missed file
mgoldstein322 Oct 2, 2025
e3527e3
remove inlining
mgoldstein322 Oct 2, 2025
af1f5dd
major refactor skeletonized--time to make it work
mgoldstein322 Oct 3, 2025
1c89ea4
large number of compile errors fixed--many more to fix on monday :)
mgoldstein322 Oct 3, 2025
84acc39
most compilation issues fixed
mgoldstein322 Oct 6, 2025
55a6c06
refactor nearly complete
mgoldstein322 Oct 7, 2025
f072173
refactor should be complete
mgoldstein322 Oct 8, 2025
8bdc46d
add todos to codebase
mgoldstein322 Oct 8, 2025
50c11cc
fix some ELI weirdness
mgoldstein322 Oct 17, 2025
06612b0
95% sure stuckAtFault logic is working as intended
mgoldstein322 Oct 20, 2025
87293b4
remove todos from stuckAtFault and make endianness member consistent …
mgoldstein322 Oct 20, 2025
b96ebfb
95% certain that corruptMemFault is working as intended
mgoldstein322 Oct 21, 2025
29f7035
add test suite for corruptMemFault (overlap fails currently)
mgoldstein322 Oct 22, 2025
107945a
remove dummy test
mgoldstein322 Oct 22, 2025
be41c66
randomFlip and randomDrop tests added
mgoldstein322 Oct 23, 2025
272f66d
randomDrop ELI fixed
mgoldstein322 Oct 23, 2025
6062ef5
corruptMem index computation fixed and corruption slightly optimized
mgoldstein322 Oct 23, 2025
1bcfa27
tests added to EXTRA_DIST in makefile
mgoldstein322 Oct 23, 2025
f76e70a
add manual seed argument and identify seed which causes crash in rand…
mgoldstein322 Oct 24, 2025
62eb7c0
remove trailing white space
mgoldstein322 Oct 24, 2025
336815c
Merge branch 'sstsimulator:devel' into devel
mgoldstein322 Jan 29, 2026
5436f8b
move faultInjectorBase into injectors dir
mgoldstein322 Jan 29, 2026
acfda65
rename doubleFaultInjector to dropFlipFaultInjector
mgoldstein322 Jan 29, 2026
8e3c56e
All PR changes made -- thorough test needed
mgoldstein322 Jan 29, 2026
0fe6ca7
hopefully fixed trailing white space
mgoldstein322 Jan 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions src/sst/elements/carcosa/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# -*- Makefile -*-
#
#

AM_CPPFLAGS += \
$(MPI_CPPFLAGS) \
-I$(top_srcdir)/src

compdir = $(pkglibdir)
comp_LTLIBRARIES = libcarcosa.la
libcarcosa_la_SOURCES = \
injectors/faultInjectorBase.cc \
injectors/faultInjectorBase.h \
injectors/stuckAtFaultInjector.cc \
injectors/stuckAtFaultInjector.h \
injectors/corruptMemFaultInjector.cc \
injectors/corruptMemFaultInjector.h \
injectors/randomDropFaultInjector.cc \
injectors/randomDropFaultInjector.h \
injectors/randomFlipFaultInjector.cc \
injectors/randomFlipFaultInjector.h \
injectors/dropFlipFaultInjector.cc \
injectors/dropFlipFaultInjector.h \
faultlogic/faultBase.cc \
faultlogic/faultBase.h \
faultlogic/stuckAtFault.cc \
faultlogic/stuckAtFault.h \
faultlogic/corruptMemFault.cc \
faultlogic/corruptMemFault.h \
faultlogic/randomDropFault.cc \
faultlogic/randomDropFault.h \
faultlogic/randomFlipFault.cc \
faultlogic/randomFlipFault.h

EXTRA_DIST = \
tests/testCorruptMemBasic.py \
tests/testCorruptMemDouble.py \
tests/testCorruptMemDoubleOverlap.py \
tests/testRandomDrop.py \
tests/testRandomFlip.py \
tests/testStuckAtBasic.py \
tests/testStuckAtMultiple.py \
tests/testStuckAtOverlap.py \
tests/testStuckAtSameByte.py

sstdir = $(includedir)/sst/elements/carcosa
nobase_sst_HEADERS = \
injectors/faultInjectorBase.h \
injectors/stuckAtFaultInjector.h \
injectors/corruptMemFaultInjector.h \
injectors/randomDropFaultInjector.h \
injectors/randomFlipFaultInjector.h \
injectors/dropFlipFaultInjector.h \
faultlogic/faultBase.h \
faultlogic/stuckAtFault.h \
faultlogic/corruptMemFault.h \
faultlogic/randomDropFault.h \
faultlogic/randomFlipFault.h

libcarcosa_la_LDFLAGS = -module -avoid-version
libcarcosa_la_LIBADD =

AM_CPPFLAGS += $(HMC_FLAG)
install-exec-hook:
$(SST_REGISTER_TOOL) SST_ELEMENT_SOURCE carcosa=$(abs_srcdir)
$(SST_REGISTER_TOOL) SST_ELEMENT_TESTS carcosa=$(abs_srcdir)/tests
9 changes: 9 additions & 0 deletions src/sst/elements/carcosa/configure.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
dnl -*- Autoconf -*-
dnl vim:ft=config
dnl

AC_DEFUN([SST_carcosa_CONFIG], [
carcosa_happy="yes"

AS_IF([test "$carcosa_happy" = "yes"], [$1], [$2])
])
125 changes: 125 additions & 0 deletions src/sst/elements/carcosa/faultlogic/corruptMemFault.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
// Copyright 2009-2025 NTESS. Under the terms
// of Contract DE-NA0003525 with NTESS, the U.S.
// Government retains certain rights in this software.
//
// Copyright (c) 2009-2025, NTESS
// All rights reserved.
//
// This file is part of the SST software package. For license
// information, see the LICENSE file in the top level directory of the
// distribution.

#include "sst/elements/carcosa/faultlogic/corruptMemFault.h"

using namespace SST::Carcosa;

CorruptMemFault::CorruptMemFault(Params& params, FaultInjectorBase* injector) : FaultBase(params, injector) {
#ifdef __SST_DEBUG_OUTPUT__
getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Fault type: Corrupt Memory Region\n");
#endif
// read in data regions
std::vector<std::string> regionVec;

// parameter format: {"regions": ["start_addr0, end_addr0", "start_addr1, end_addr1",...]}
params.find_array<std::string>("regions", regionVec);

// process entries into region
for (std::string region: regionVec) {
std::pair<uint64_t,uint64_t> region_pair = convertString(region);

// check validity
if (region_pair.first > region_pair.second) {
getSimulationOutput()->fatal(CALL_INFO_LONG, -1, "Invalid corruption region: [0x%zx, 0x%zx].\n",
region_pair.first, region_pair.second);
}

corruptionRegions_.push_back(region_pair);
#ifdef __SST_DEBUG_OUTPUT__
getSimulationDebug()->debug(CALL_INFO_LONG, 1, 0, "Inserted corruption region: [0x%zx, 0x%zx]\n",
region_pair.first, region_pair.second);
#endif
}
}

bool CorruptMemFault::faultLogic(Event*& ev) {
SST::MemHierarchy::MemEvent* mem_ev = convertMemEvent(ev);

Addr base_addr = mem_ev->getBaseAddr();
dataVec original_payload = mem_ev->getPayload();
dataVec new_payload(original_payload);
for (int r: regionsToUse_) {
auto& region = corruptionRegions_[r];
size_t payload_sz = mem_ev->getPayloadSize();
int32_t start = computeStartIndex(base_addr, payload_sz, region.first);
if (start < 0) {
getSimulationOutput()->fatal(CALL_INFO_LONG, -1, "No valid start index for corruption.\n");
}
int32_t end = computeEndIndex(base_addr, payload_sz, region.second);
if (end < 0) {
getSimulationOutput()->fatal(CALL_INFO_LONG, -1, "No valid start index for corruption.\n");
}
for (int i = start; i < end; i++) {
new_payload[i] = static_cast<uint8_t>(injector_->randUInt32(0,255));
}
}
setMemEventPayload(ev, new_payload);
return true;
}

std::pair<uint64_t,uint64_t> CorruptMemFault::convertString(std::string& region) {
std::stringstream ss(region);
uint64_t addr0, addr1;

ss >> std::hex >> addr0;
if (ss.peek() == ','){
ss.ignore();
}
ss >> std::hex >> addr1;

#ifdef __SST_DEBUG_OUTPUT__
getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "Extracted region pair: [0x%zx, 0x%zx]\n",
addr0, addr1);
#endif
return make_pair(addr0, addr1);
}

int32_t CorruptMemFault::computeStartIndex(Addr base_addr, size_t payload_sz, Addr region_start) {
// start index is always the first byte of this payload in the corruption region
int payload_bytes = payload_sz / 8;
Addr addr = base_addr;
for (int i = 0; i < payload_bytes; i++, addr+=8) {
if (addr >= region_start) {
return addr - base_addr;
}
}
return -1;
}

int32_t CorruptMemFault::computeEndIndex(Addr base_addr, size_t payload_sz, Addr region_end) {
// end index is either the final addr's final byte, or the region end's addr's final byte
int payload_bytes = payload_sz / 8;
Addr addr = base_addr + ((payload_bytes - 1) * 8);
for (int i = payload_bytes; i >= 0; i--, addr-=8) {
if (addr <= region_end) {
return addr - base_addr + 8;
}
}
return -1;
}

std::vector<uint32_t>* CorruptMemFault::checkAddrUsage(Event*& ev) {
Addr base_addr = convertMemEvent(ev)->getBaseAddr();
for (int i = 0; i < corruptionRegions_.size(); i++) {
auto& region = corruptionRegions_[i];
// check if message contains ANY address in this region
int payload_bytes = convertMemEvent(ev)->getPayloadSize() / 8;
Addr addr = base_addr;
for (int j = 0; j < payload_bytes; addr+=8, j++) {
if ((addr >= region.first) && (addr <= region.second)) {
regionsToUse_.push_back(i);
break;
}
}
}
return &regionsToUse_;
}
72 changes: 72 additions & 0 deletions src/sst/elements/carcosa/faultlogic/corruptMemFault.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Copyright 2009-2025 NTESS. Under the terms
// of Contract DE-NA0003525 with NTESS, the U.S.
// Government retains certain rights in this software.
//
// Copyright (c) 2009-2025, NTESS
// All rights reserved.
//
// This file is part of the SST software package. For license
// information, see the LICENSE file in the top level directory of the
// distribution.

#ifndef SST_ELEMENTS_CARCOSA_CORRUPTMEMFAULT_H
#define SST_ELEMENTS_CARCOSA_CORRUPTMEMFAULT_H

#include "sst/elements/carcosa/faultlogic/faultBase.h"
#include "sst/core/rng/mersenne.h"
#include <vector>
#include <utility>
#include <string>
#include <sstream>

namespace SST::Carcosa {

typedef std::vector<uint8_t> dataVec;
typedef SST::MemHierarchy::Addr Addr;

/**
* This fault is intended to be placed on the input/output ports
* of memory components such as DRAM or HBM. Events that pass through
* it, and whose data addresses fall within the ranges set in this
* module's parameters, will have their payloads randomly altered
* to simulate corruption in the affected region of memory.
*/
class CorruptMemFault : public FaultBase
{
public:

CorruptMemFault(Params& params, FaultInjectorBase* injector);

CorruptMemFault() = default;
~CorruptMemFault() {}

/**
* 1. Read in event
* 2. Test if event is in specified region
* 3. Corrupt event payload if necessary
* 4. Replace payload
*/
bool faultLogic(Event*& ev) override;

std::vector<uint32_t>* checkAddrUsage(Event*& ev);
protected:

std::vector<std::pair<uint64_t, uint64_t>> corruptionRegions_;

std::vector<uint32_t> regionsToUse_;

std::pair<uint64_t,uint64_t> convertString(std::string& region);

int32_t computeStartIndex(Addr base_addr, size_t payload_sz, Addr region_start);
int32_t computeEndIndex(Addr base_addr, size_t payload_sz, Addr region_end);

void serialize_order(SST::Core::Serialization::serializer& ser) override {
FaultBase::serialize_order(ser);
SST_SER(corruptionRegions_);
SST_SER(regionsToUse_);
}
ImplementVirtualSerializable(CorruptMemFault)
}; // CorruptMemFault
} // namespace SST::Carcosa

#endif // SST_ELEMENTS_CARCOSA_CORRUPTMEMFAULT_H
77 changes: 77 additions & 0 deletions src/sst/elements/carcosa/faultlogic/faultBase.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Copyright 2009-2025 NTESS. Under the terms
// of Contract DE-NA0003525 with NTESS, the U.S.
// Government retains certain rights in this software.
//
// Copyright (c) 2009-2025, NTESS
// All rights reserved.
//
// This file is part of the SST software package. For license
// information, see the LICENSE file in the top level directory of the
// distribution.

#include "sst/elements/carcosa/faultlogic/faultBase.h"

using namespace SST::Carcosa;

FaultBase::FaultBase(Params& params, FaultInjectorBase* injector) : injector_(injector) {
//
}

bool FaultBase::faultLogic(Event*& ev) {
return true;
}

SST::Output* FaultBase::getSimulationOutput() {
return injector_->getOutput();
}

SST::Output* FaultBase::getSimulationDebug() {
return injector_->getDebug();
}

SST::MemHierarchy::MemEvent* FaultBase::convertMemEvent(Event*& ev) {
SST::MemHierarchy::MemEvent* mem_ev = dynamic_cast<SST::MemHierarchy::MemEvent*>(ev);

if (mem_ev == nullptr) {
getSimulationOutput()->fatal(CALL_INFO_LONG, -1, "Attempting to inject mem fault on a non-MemEvent type.\n");
}

#ifdef __SST_DEBUG_OUTPUT__
getSimulationDebug()->debug(CALL_INFO_LONG, 3, 0, "Intercepted event %zu/%d\n", mem_ev->getID().first, mem_ev->getID().second);
#endif
return mem_ev;
}

dataVec& FaultBase::getMemEventPayload(Event*& ev) {
return convertMemEvent(ev)->getPayload();
}

void FaultBase::setMemEventPayload(Event*& ev, dataVec newPayload) {
#ifdef __SST_DEBUG_OUTPUT__
getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "Payload before replacement:\n");
SST::MemHierarchy::MemEvent* mem_ev = convertMemEvent(ev);
dataVec payload = getMemEventPayload(ev);
for (int i = 0; i < payload.size(); i+=8) {
getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "\n0x%zx: [\t", mem_ev->getBaseAddr() + i);
for (int j = i; j < (i+8); j++) {
getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "%d\t", payload[j]);
}
getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "]\n");
}
#endif
convertMemEvent(ev)->setPayload(newPayload);

#ifdef __SST_DEBUG_OUTPUT__
getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "Payload after replacement:\n");
mem_ev = convertMemEvent(ev);
payload = getMemEventPayload(ev);
for (int i = 0; i < payload.size(); i+=8) {
getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "\n0x%zx: [\t", mem_ev->getBaseAddr() + i);
for (int j = i; j < (i+8); j++) {
getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "%d\t", payload[j]);
}
getSimulationDebug()->debug(CALL_INFO_LONG, 2, 0, "]\n");
}
#endif
}

Loading
Loading