From 56c2f2a2ad2cc9e9bdb5e892edfc2027f840f773 Mon Sep 17 00:00:00 2001 From: Jacob Michael Fustos Date: Fri, 20 Sep 2024 13:21:08 -0600 Subject: [PATCH] First attempt at implementing gdb for Vanadis. Testing has been limited, only interger regiseter supported --- src/sst/elements/vanadis/decoder/vdecoder.h | 18 +- .../elements/vanadis/decoder/vmipsdecoder.h | 5 +- .../vanadis/decoder/vriscv64decoder.cc | 1 + .../vanadis/decoder/vriscv64decoder.h | 117 +- .../elements/vanadis/decoder/vriscv64gdb.h | 1483 +++++++++++++++++ .../elements/vanadis/inst/vgetregcallable.h | 74 + src/sst/elements/vanadis/inst/vinstall.h | 1 + .../elements/vanadis/inst/vsetregcallable.h | 11 +- .../vanadis/tests/basic_vanadis_gdb.py | 600 +++++++ src/sst/elements/vanadis/vanadis.cc | 10 +- 10 files changed, 2264 insertions(+), 56 deletions(-) create mode 100644 src/sst/elements/vanadis/decoder/vriscv64gdb.h create mode 100644 src/sst/elements/vanadis/inst/vgetregcallable.h create mode 100644 src/sst/elements/vanadis/tests/basic_vanadis_gdb.py diff --git a/src/sst/elements/vanadis/decoder/vdecoder.h b/src/sst/elements/vanadis/decoder/vdecoder.h index e88228b3f3..188e75d570 100644 --- a/src/sst/elements/vanadis/decoder/vdecoder.h +++ b/src/sst/elements/vanadis/decoder/vdecoder.h @@ -214,8 +214,18 @@ class VanadisDecoder : public SST::SubComponent void setThreadLocalStoragePointer(uint64_t new_tls) { tls_ptr = new_tls; } - uint64_t getThreadLocalStoragePointer() const { return tls_ptr; } - uint64_t getCycleCount() const { return cycle_count; } + static void * getThreadLocalStoragePointer_stub( void * arg_input ) { + VanadisDecoder * me = (VanadisDecoder *)arg_input; + return me->getThreadLocalStoragePointer( ); + } + + void * getThreadLocalStoragePointer() { return (void *)&tls_ptr; } + static void * getCycleCount_stub( void * arg_input ) { + VanadisDecoder * me = (VanadisDecoder *)arg_input; + return me->getCycleCount( ); + } + + void * getCycleCount() { return (void *)&cycle_count; } // VanadisCircularQueue* getDecodedQueue() { return // decoded_q; } @@ -232,6 +242,10 @@ class VanadisDecoder : public SST::SubComponent VanadisBranchUnit* getBranchPredictor() { return branch_predictor; } virtual VanadisCPUOSHandler* getOSHandler() { return os_handler; } + virtual void gdb_tick() {} + virtual void register_pipeline( SST::Output* ) {} + virtual bool is_rob_empty( ) { return false; } + virtual void instruction_to_rob( SST::Output*, VanadisInstruction*, bool *, VanadisInstructionBundle* ) {} protected: virtual void clearDecoderAfterMisspeculate(SST::Output* output) {}; diff --git a/src/sst/elements/vanadis/decoder/vmipsdecoder.h b/src/sst/elements/vanadis/decoder/vmipsdecoder.h index 749e6608e5..e7d5e59948 100644 --- a/src/sst/elements/vanadis/decoder/vmipsdecoder.h +++ b/src/sst/elements/vanadis/decoder/vmipsdecoder.h @@ -2068,11 +2068,10 @@ class VanadisMIPSDecoder : public VanadisDecoder switch ( rd ) { case 29: - auto thread_call = std::bind(&VanadisMIPSDecoder::getThreadLocalStoragePointer, this); - bundle->addInstruction( new VanadisSetRegisterByCallInstruction( - ins_addr, hw_thr, options, target_reg, thread_call)); + ins_addr, hw_thr, options, target_reg, + &VanadisMIPSDecoder::getThreadLocalStoragePointer_stub, (void *)this)); insertDecodeFault = false; break; } diff --git a/src/sst/elements/vanadis/decoder/vriscv64decoder.cc b/src/sst/elements/vanadis/decoder/vriscv64decoder.cc index ecd1b55f50..152a86b345 100644 --- a/src/sst/elements/vanadis/decoder/vriscv64decoder.cc +++ b/src/sst/elements/vanadis/decoder/vriscv64decoder.cc @@ -16,5 +16,6 @@ #include +#include "decoder/vriscv64gdb.h" #include "decoder/vriscv64decoder.h" diff --git a/src/sst/elements/vanadis/decoder/vriscv64decoder.h b/src/sst/elements/vanadis/decoder/vriscv64decoder.h index 670bb2a5ef..3cece5196f 100644 --- a/src/sst/elements/vanadis/decoder/vriscv64decoder.h +++ b/src/sst/elements/vanadis/decoder/vriscv64decoder.h @@ -49,7 +49,7 @@ class VanadisRISCV64Decoder : public VanadisDecoder SST_ELI_ELEMENT_VERSION(1, 0, 0), "Implements a RISCV64-compatible decoder for Vanadis CPU processing.", SST::Vanadis::VanadisDecoder) - + SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS({ "gdb_unit", "GDB Server for Vanadis", "SST::Vanadis::VanadisRISCV64GDB" }) SST_ELI_DOCUMENT_PARAMS( {"decode_max_ins_per_cycle", "Maximum number of instructions that can be " "decoded and issued per cycle", "2"}, @@ -71,6 +71,7 @@ class VanadisRISCV64Decoder : public VanadisDecoder fatal_decode_fault = params.find("halt_on_decode_fault", false); + gdb = loadUserSubComponent("gdb_unit"); } ~VanadisRISCV64Decoder() {} @@ -127,6 +128,66 @@ class VanadisRISCV64Decoder : public VanadisDecoder regFile->setIntReg(sp_phys_reg, value); } + void gdb_tick() { + if ( gdb != nullptr ) { + gdb->tick(); + } + } + + void register_pipeline( SST::Output* output_p ) { + if ( gdb != nullptr ) { + gdb->register_pipeline( output_p, this ); + } + } + + bool is_rob_empty() { + return thread_rob->size() == 0; + } + + void instruction_to_rob( SST::Output* output, VanadisInstruction* next_ins, bool * bundle_has_branch, VanadisInstructionBundle* bundle ) { + // Note: Caller must confirm there is space in the rob before calling + // Note: last 2 arguments not used if instruction is not a branch + if ( next_ins->getInstFuncType() == INST_BRANCH ) { + VanadisSpeculatedInstruction* next_spec_ins = + dynamic_cast(next_ins); + + if ( branch_predictor->contains(ip) ) { + // We have an address predicton from the branching unit + const uint64_t predicted_address = branch_predictor->predictAddress(ip); + next_spec_ins->setSpeculatedAddress(predicted_address); + + if(output->getVerboseLevel() >= 16) { + output->verbose( + CALL_INFO, 16, 0, + "----> contains a branch: 0x%" PRI_ADDR " / predicted " + "(found in predictor): 0x%" PRI_ADDR "\n", + ip, predicted_address); + } + + ip = predicted_address; + *bundle_has_branch = true; + } + else { + // We don't have an address prediction + // so just speculate that we are going to drop through to the + // next instruction as we aren't sure where this will go yet + + if(output->getVerboseLevel() >= 16) { + output->verbose( + CALL_INFO, 16, 0, + "----> contains a branch: 0x%" PRI_ADDR " / predicted " + "(not-found in predictor): 0x%" PRI_ADDR ", pc-increment: %" PRIu64 "\n", + ip, ip + 4, bundle->pcIncrement()); + } + + ip += bundle->pcIncrement(); + next_spec_ins->setSpeculatedAddress(ip); + *bundle_has_branch = true; + } + } + + thread_rob->push(next_ins->clone()); + } void tick(SST::Output* output, uint64_t cycle) override { @@ -145,6 +206,12 @@ class VanadisRISCV64Decoder : public VanadisDecoder output->verbose( CALL_INFO, 16, 0, "---> Found uop bundle for ip=0x%" PRI_ADDR ", loading from cache...\n", ip); } + + if ( gdb != nullptr && gdb->should_stall(ip) ) { + // We are stopped at a breakpoint, so stall this cycle + break; + } + stat_uop_hit->addData(1); VanadisInstructionBundle* bundle = ins_loader->getBundleAt(ip); @@ -163,46 +230,7 @@ class VanadisRISCV64Decoder : public VanadisDecoder for ( uint32_t i = 0; i < bundle->getInstructionCount(); ++i ) { VanadisInstruction* next_ins = bundle->getInstructionByIndex(i); - if ( next_ins->getInstFuncType() == INST_BRANCH ) { - VanadisSpeculatedInstruction* next_spec_ins = - dynamic_cast(next_ins); - - if ( branch_predictor->contains(ip) ) { - // We have an address predicton from the branching unit - const uint64_t predicted_address = branch_predictor->predictAddress(ip); - next_spec_ins->setSpeculatedAddress(predicted_address); - - if(output->getVerboseLevel() >= 16) { - output->verbose( - CALL_INFO, 16, 0, - "----> contains a branch: 0x%" PRI_ADDR " / predicted " - "(found in predictor): 0x%" PRI_ADDR "\n", - ip, predicted_address); - } - - ip = predicted_address; - bundle_has_branch = true; - } - else { - // We don't have an address prediction - // so just speculate that we are going to drop through to the - // next instruction as we aren't sure where this will go yet - - if(output->getVerboseLevel() >= 16) { - output->verbose( - CALL_INFO, 16, 0, - "----> contains a branch: 0x%" PRI_ADDR " / predicted " - "(not-found in predictor): 0x%" PRI_ADDR ", pc-increment: %" PRIu64 "\n", - ip, ip + 4, bundle->pcIncrement()); - } - - ip += bundle->pcIncrement(); - next_spec_ins->setSpeculatedAddress(ip); - bundle_has_branch = true; - } - } - - thread_rob->push(next_ins->clone()); + instruction_to_rob( output, next_ins, &bundle_has_branch, bundle ); } // Move to the next address, if we had a branch we should have @@ -304,6 +332,7 @@ class VanadisRISCV64Decoder : public VanadisDecoder uint16_t icache_max_bytes_per_cycle; uint16_t max_decodes_per_cycle; uint16_t decode_buffer_max_entries; + VanadisRISCV64GDB* gdb; void decode(SST::Output* output, const uint64_t ins_address, const uint32_t ins, VanadisInstructionBundle* bundle) { @@ -1112,8 +1141,10 @@ class VanadisRISCV64Decoder : public VanadisDecoder case 0xc00: { if ( 0 == rs1 ) { - auto thread_call = std::bind(&VanadisRISCV64Decoder::getCycleCount, this); - bundle->addInstruction( new VanadisSetRegisterByCallInstruction( ins_address, hw_thr, options, rd, thread_call)); + bundle->addInstruction( + new VanadisSetRegisterByCallInstruction( + ins_address, hw_thr, options, rd, + &VanadisRISCV64Decoder::getCycleCount_stub, (void *)this)); decode_fault = false; } } break; diff --git a/src/sst/elements/vanadis/decoder/vriscv64gdb.h b/src/sst/elements/vanadis/decoder/vriscv64gdb.h new file mode 100644 index 0000000000..ca0c063977 --- /dev/null +++ b/src/sst/elements/vanadis/decoder/vriscv64gdb.h @@ -0,0 +1,1483 @@ +// Copyright 2009-2024 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2024, NTESS +// All rights reserved. +// +// Portions are copyright of other developers: +// See the file CONTRIBUTORS.TXT in the top level directory +// of the distribution for more information. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef _H_VANADIS_RISCV64_GDB +#define _H_VANADIS_RISCV64_GDB + +#include "decoder/vdecoder.h" +#include "inst/vinstall.h" +#include + +extern "C" { +#include +#include +#include +#include +#include +#include +} + +/* arbitrary buffer length of 32KB */ +#define BUFFER_LEN 0x8000 + +namespace SST { +namespace Vanadis { + +/* + * This Class implements a GDB server for an individual vanadis hardware thread. + * + * This implementation is considered to be in extreme beta. Something is probably + * going to be wrong. + * + * We currently implement this all for RISCV 64 bit, but most of the code can + * probably be moved to a generic file for both MIPS and RISCV when we get time. + * + * We do not use GDB's internal threads, each hardware thread must set its own + * unique local port, and connect to the server with its own GDB client. + * + * We do not halt the simulation on breakpoints. If SST is slowing down your + * system, it will continue to slow down your system at the breakpoint. + * This is so other cores and the memory system can continue to make progress. + * + * In the same vein, certain gdb commands may take longer than expected as vanadis + * may be waiting for memory accesses that miss in the caches, try to give commands + * 20-30 seconds before you assume something went wrong. + * + * This is only meant for architectural debugging. You need to use gdb on SST + * or print statements to debug the micro-arch. + * + * You CANNOT load binaries or change code through the GDB client!!!!! + * Vanadis currently has no working FENCE.I instruction. + * + * Tested on gdb-multiarch for ubuntu 20.04 and + * riscv-none-embed-gdb from xpack-riscv-none-embed-gcc-10.2.0-1.2-linux-x64 + * + * Example: May differ based on different gdb clients + * gdb-multiarch + * set architecture riscv:rv64 + * file path_to_elf_file_with_debug_symbols + * ### Do this to allow commands extra time to return + * set remotetimeout 1000 + * ### change 0x10000000 to be low memory boundary your program uses + * ### change 0x10000000 to be high memory boundary your program uses + * mem 0x10000000 0x20000000 rw + * target remote 127.0.0.1:port_number_you_configured + * You should now see GDB interface that you know and love + * */ +class VanadisRISCV64GDB : public SST::SubComponent +{ +public: + SST_ELI_REGISTER_SUBCOMPONENT_API(SST::Vanadis::VanadisRISCV64GDB) + SST_ELI_REGISTER_SUBCOMPONENT( + VanadisRISCV64GDB, "vanadis", "VanadisRISCV64GDB", + SST_ELI_ELEMENT_VERSION(1, 0, 0), + "Implements a RISCV64-compatible GDB server compatible for running GDB client on a single Vanadis hardware thread.", + SST::Vanadis::VanadisRISCV64GDB) + + SST_ELI_DOCUMENT_PARAMS({ "gdb_active", "Should this hardware thread use GDB" }, + { "gdb_port", "What local host port number should this thread's GDB server listen on." }, + { "gdb_break_at_start", "Should GDB server break on the first instruction of the decoder." }, + { "gdb_initial_break_addr", "Address of extra breakpoint to add at start. 0x0 == no break." }, + { "gdb_debug_comms", "Print input and output traffic between client and server." }, + { "gdb_break_limit", "How many total breakpoints are supported for this thread." }) + + VanadisRISCV64GDB(ComponentId_t id, Params& params) : SubComponent(id) { + active = params.find("gdb_active", false); + port = params.find("gdb_port", 0); + break_at_start = params.find("gdb_break_at_start", false); + break_limit = params.find("gdb_break_limit", 50); + initial_break_addr = params.find("gdb_initial_break_addr", 0); + debug_comms = params.find("gdb_debug_comms", false); + + if ( initial_break_addr != 0 ) { + insert_breakpoint( initial_break_addr ); + } + + if ( break_at_start ) { + interrupt = true; + } + + state_m = &poll_for_packet_stub; + } + + virtual ~VanadisRISCV64GDB() { + breakpoints.clear(); + } + + bool is_bp_at( uint64_t address ) { + return (breakpoints.find(address) != breakpoints.end()); + } + + void remove_breakpoint( uint64_t address ) { + if ( is_bp_at(address) ) { + breakpoints.erase(address); + } + } + + void insert_breakpoint( uint64_t address ) { + if ( is_bp_at(address) ) { + return; + } + + if ( breakpoints.size() >= break_limit ) { + output->fatal(CALL_INFO, -1, "Error - Trying to insert breakpoint at: 0x%llx exceeds limit of %d.\n", address, break_limit); + } + + breakpoints.insert(std::pair(address, true)); + } + + /* + * Called by the decoder before it inserts an instruction into the back-end + * + * Detects breakpoints that are reached in running software + * */ + bool should_stall( uint64_t address ) { + if ( active ) { + if ( in_break && !pass_1 ) { + // Always stall inside a breakpoint until pass_1 is set + return true; + } + + bool wait_for_empty = false; + + if ( pass_1 && current_break_addr != address ) { + // In a breakpoint and we want to step to the next instruction + // and we have found a new address + wait_for_empty = true; + } + + if ( !in_break && is_bp_at(address) ) { + // We are not in a breakpoint and the next instruction + // matches a breakpoint address + wait_for_empty = true; + } + + if ( interrupt ) { + // We got an interrupt from the client + // flush the pipeline so we can break somewhere + wait_for_empty = true; + } + + if ( wait_for_empty ) { + if ( !decoder->is_rob_empty() ) { + // Hold back instructions until pipeline is empty + // There could be a jump in the pipeline + wait_for_squash_count = 0; + return true; + } + + if ( wait_for_squash_count < 20 ) { + // The ROB is empty + // The last instruction to retire could have been a jump + // Wait for the decoder to have a chance to internally + // redirect to the correct IP + wait_for_squash_count++; + return true; + } + + // Done waiting, ip should be real next instruction address + wait_for_squash_count = 0; + + if ( is_bp_at(address) || pass_from_step || interrupt ) { + pass_1 = false; + in_break = true; + current_break_addr = address; + // tell the client we found the next breakpoint + send_packet("S05"); + output->verbose(CALL_INFO, 0, 0, "Stopping at address |0x%llx| for breakpoint.\n", current_break_addr); + if ( interrupt ) { + output->verbose(CALL_INFO, 0, 0, "Stop caused by interrupt.\n"); + interrupt = false; + } + if ( pass_from_step ) { + output->verbose(CALL_INFO, 0, 0, "Stop caused by stepping.\n"); + pass_from_step = false; + } + return true; + } + } + } + + // Keep inserting instructions into the pipeline + return false; + } + + /* + * Called from pipeline every cycle to tick our time + * */ + void tick() { + if ( active ) { + if ( in_break ) { + // In a breakpoint, talk to the client + (*state_m)(this); + } + else { + // Not in a breakpoint, but check if the client + // wants us to stop somewhere + if ( poll_for_interrupt() ) { + interrupt = true; + } + } + } + } + + /* + * We got a valid packet from the client + * service the request + * */ + void do_packet() { + char type = pkt_buf[0]; + char * payload = (char *)&pkt_buf[1]; + + switch (type) { + case 'c': { + // Continue command + if ( strlen(payload) != 0 ) { + // We do not support fancy continues + send_packet("E95"); + } + else { + pass_1 = true; + pass_from_step = false; + } + state_m = &poll_for_packet_stub; + } + break; + case 'g': { + // Dump all GP registers + read_reg_index = -1; + + state_m = &read_all_reg_stub; + + data_buf_len = 0; + + read_all_reg(); + } + break; + case 'm': { + // Read memory + data_buf_len = 0; + + int args_found = sscanf(payload, "%lx,%lx", &mem_addr, &mem_len); + + if ( args_found == 2 ) { + if ((mem_len * 2) + 1 <= BUFFER_LEN && mem_len >= 1 ) { + state_m = &read_mem_bytes_stub; + mem_first_byte = true; + + read_mem_bytes(); + break; + } + } + + output->verbose(CALL_INFO, 0, 0, "Invalid memory packet from client\n"); + send_packet("E98"); + state_m = &poll_for_packet_stub; + break; + } + break; + case 'M': { + // Write memory + data_buf_len = 0; + + int args_found = sscanf(payload, "%lx,%lx", &mem_addr, &mem_len); + char * hex_coded = strchr(payload, ':') + 1; + + if ( args_found == 2 && hex_coded != NULL ) { + if ((mem_len * 2) + 1 <= BUFFER_LEN && mem_len >= 1 ) { + state_m = &write_mem_bytes_stub; + mem_first_byte = true; + mem_index = 0; + + read_hex_to_databuff( (uint8_t *)hex_coded, mem_len ); + break; + } + } + + output->verbose(CALL_INFO, 0, 0, "Invalid memory write packet from client\n"); + send_packet("E94"); + state_m = &poll_for_packet_stub; + break; + } + break; + case 'p': { + // Read register, register #32 is the program counter + int i = strtol(payload, NULL, 16); + if ( i >= 32 && i != 32 ) + { + send_packet("E01"); + state_m = &poll_for_packet_stub; + break; + } + + read_reg_index = i; + + state_m = &read_one_reg_stub; + + data_buf_len = 0; + + read_one_reg(); + } + break; + case 'P': { + // Write register, program counter is not allowed, sorry + char * equals = strchr(payload, '='); + *equals = '\0'; + int i = strtol(payload, NULL, 16); + char * hex_coded = equals + 1; + if ( (i >= 32 ) || hex_coded == NULL) + { + send_packet("E01"); + state_m = &poll_for_packet_stub; + break; + } + + data_buf_len = 0; + + read_hex_to_databuff( (uint8_t *)hex_coded, 8 ); + + if ( state_m == &poll_for_packet_stub ) { + break; + } + + write_reg_value = *(uint64_t *)data_buf; + + write_reg_index = i; + + write_reg_ret = &poll_for_packet_stub; + + state_m = &write_reg_A_stub; + + send_packet("OK"); + } + break; + case 'q': { + // Query packet, we don't support much + // While we technically supprt software breakpoints we prefer hardware + // Tell the client we don't do software, but they don't seem to care + if ( !strncmp( payload, "Supported:", strlen("Supported:") ) ) { + send_packet("PacketSize=8000;qXfer:features:read-;multiprocess-;swbreak-;hwbreak+;vContSupported-"); + } + else { + send_packet(""); + } + state_m = &poll_for_packet_stub; + } + break; + case 's': { + // Step command, no fancy arguments allowed + if ( strlen(payload) != 0 ) { + send_packet("E99"); + } + else { + pass_1 = true; + pass_from_step = true; + } + state_m = &poll_for_packet_stub; + } + break; + case 'X': { + // Write memory in binary + data_buf_len = 0; + + int args_found = sscanf(payload, "%lx,%lx", &mem_addr, &mem_len); + char * bin_coded = strchr(payload, ':') + 1; + + if ( args_found == 2 && bin_coded != NULL ) { + if (mem_len < BUFFER_LEN && mem_len >= 1 ) { + state_m = &write_mem_bytes_stub; + mem_first_byte = true; + mem_index = 0; + + int skipped_bytes = (bin_coded - payload) + 1; + + read_bin_to_databuff( (uint8_t *)bin_coded, pkt_buf_len - skipped_bytes ); + if ( mem_len != data_buf_len ) { + output->verbose(CALL_INFO, 0, 0, "Invalid memory write packet length after filter from client\n"); + send_packet("E91"); + state_m = &poll_for_packet_stub; + break; + } + break; + } + } + + output->verbose(CALL_INFO, 0, 0, "Invalid memory write packet from client\n"); + send_packet("E92"); + state_m = &poll_for_packet_stub; + break; + } + break; + case 'Z': { + // Insert breakpoint + // We accept both software and hardware, but we treat them the same + uint64_t type, addr, length; + int args_found = sscanf(payload, "%lx,%lx,%lx", &type, &addr, &length); + + if ( args_found == 3 ) { + if ( (type == 1 || type == 0) && (length == 2 || length == 4) ) { + insert_breakpoint( addr ); + state_m = &poll_for_packet_stub; + send_packet("OK"); + break; + } + } + + output->verbose(CALL_INFO, 0, 0, "Invalid insert breakpoint packet\n"); + send_packet("E97"); + state_m = &poll_for_packet_stub; + } + break; + case 'z': { + // Remove breakpoint + // We accept both software and hardware, but we treat them the same + uint64_t type, addr, length; + int args_found = sscanf(payload, "%lx,%lx,%lx", &type, &addr, &length); + + if ( args_found == 3 ) { + if ( (type == 0 || type == 1) && (length == 2 || length == 4) ) { + remove_breakpoint( addr ); + state_m = &poll_for_packet_stub; + send_packet("OK"); + break; + } + } + + output->verbose(CALL_INFO, 0, 0, "Invalid remove breakpoint packet\n"); + send_packet("E96"); + state_m = &poll_for_packet_stub; + } + break; + case '?': + // Get break reason, always send back we were interrupted + send_packet("S05"); + state_m = &poll_for_packet_stub; + break; + default: + // We don't support the packet, respond empty + send_packet(""); + state_m = &poll_for_packet_stub; + break; + } + } + + /* + * Process for writing a memory byte + * ## Save user value of register x1 + * write_mem_byte_A => read_reg_A => read_reg_B => write_mem_byte_B + * + * ## Write input value into register x1 + * write_mem_byte_B => write_reg_A => write_reg_B => write_mem_byte_C + * + * ## Inject sb x1 (x0 + addr) into the pipeline + * write_mem_byte_C => write_mem_byte_D + * + * ## Re-write saved user x1 value to x1 + * write_mem_byte_D => write_reg_A => write_reg_B => return + * */ + static void write_mem_byte_A_stub( VanadisRISCV64GDB * me ) { + me->write_mem_byte_A(); + } + + /* + * Save user value of register x1 + * */ + void write_mem_byte_A() { + read_reg_index = 1; + + read_reg_ret = &write_mem_byte_B_stub; + + state_m = &read_reg_A_stub; + + return; + } + + static void write_mem_byte_B_stub( VanadisRISCV64GDB * me ) { + me->write_mem_byte_B(); + } + + /* + * Write input value into register x1 + * */ + void write_mem_byte_B() { + mem_saved_reg = read_reg_value; + + write_reg_value = data_buf[mem_index]; + + write_reg_index = 1; + + write_reg_ret = write_mem_byte_C_stub; + + state_m = &write_reg_A_stub; + + return; + } + + static void write_mem_byte_C_stub( VanadisRISCV64GDB * me ) { + me->write_mem_byte_C(); + } + + /* + * Inject sb x1 (x0 + addr) into the pipeline + * */ + void write_mem_byte_C() { + if ( decoder->is_rob_empty() ) { + VanadisInstruction* next_ins = new VanadisStoreInstruction( + current_break_addr, decoder->getHardwareThread(), + decoder->getDecoderOptions(), 0, mem_addr, 1, 1, + MEM_TRANSACTION_NONE, STORE_INT_REGISTER); + + decoder->instruction_to_rob( output, next_ins, NULL, NULL ); + + state_m = &write_mem_byte_D_stub; + } + + return; + } + + static void write_mem_byte_D_stub( VanadisRISCV64GDB * me ) { + me->write_mem_byte_D(); + } + + /* + * Re-write saved user x1 value to x1 + * */ + void write_mem_byte_D() { + write_reg_value = mem_saved_reg; + + write_reg_index = 1; + + write_reg_ret = write_mem_byte_ret; + + state_m = &write_reg_A_stub; + + return; + } + + static void write_mem_bytes_stub( VanadisRISCV64GDB * me ) { + me->write_mem_bytes(); + } + + /* + * Write to memory 1 byte at a time + * + * Respond with the OK packet once done + * + * Vanadis will just die if something goes wrong + * */ + void write_mem_bytes() { + if ( !mem_first_byte ) { + mem_len--; + mem_addr++; + mem_index++; + } + + if ( mem_len <= 0 ) { + send_packet("OK");; + + state_m = &poll_for_packet_stub; + } + else { + mem_first_byte = false; + write_mem_byte_ret = &write_mem_bytes_stub; + state_m = &write_mem_byte_A_stub; + } + + return; + } + + /* + * Process for reading a memory byte + * ## Save user value of register x1 + * read_mem_byte_A => read_reg_A => read_reg_B => read_mem_byte_B + * + * ## Inject lb x1 (x0 + addr) into the pipeline + * read_mem_byte_B => read_mem_byte_C + * + * ## Get the read in byte from x1 + * read_mem_byte_C => read_reg_A => read_reg_B => read_mem_byte_D + * + * ## Re-write saved user x1 value to x1 + * read_mem_byte_D => write_reg_A => write_reg_B => return + * */ + static void read_mem_byte_A_stub( VanadisRISCV64GDB * me ) { + me->read_mem_byte_A(); + } + + /* + * Save user value of register x1 + * */ + void read_mem_byte_A() { + read_reg_index = 1; + + read_reg_ret = &read_mem_byte_B_stub; + + state_m = &read_reg_A_stub; + + return; + } + + static void read_mem_byte_B_stub( VanadisRISCV64GDB * me ) { + me->read_mem_byte_B(); + } + + /* + * Inject lb x1 (x0 + addr) into the pipeline + * */ + void read_mem_byte_B() { + mem_saved_reg = read_reg_value; + + if ( decoder->is_rob_empty() ) { + VanadisInstruction* next_ins = new VanadisLoadInstruction( + current_break_addr, decoder->getHardwareThread(), + decoder->getDecoderOptions(), 0, mem_addr, 1, 1, + false, MEM_TRANSACTION_NONE, LOAD_INT_REGISTER); + + decoder->instruction_to_rob( output, next_ins, NULL, NULL ); + + state_m = &read_mem_byte_C_stub; + } + + return; + } + + static void read_mem_byte_C_stub( VanadisRISCV64GDB * me ) { + me->read_mem_byte_C(); + } + + /* + * Get the read in byte from x1 + * */ + void read_mem_byte_C() { + if ( decoder->is_rob_empty() ) { + read_reg_index = 1; + + read_reg_ret = &read_mem_byte_D_stub; + + state_m = &read_reg_A_stub; + } + + return; + } + + static void read_mem_byte_D_stub( VanadisRISCV64GDB * me ) { + me->read_mem_byte_D(); + } + + /* + * Re-write saved user x1 value to x1 + * */ + void read_mem_byte_D() { + mem_data = read_reg_value & 0xff; + + write_reg_value = mem_saved_reg; + + write_reg_index = 1; + + write_reg_ret = read_mem_byte_ret; + + state_m = &write_reg_A_stub; + + return; + } + + static void read_mem_bytes_stub( VanadisRISCV64GDB * me ) { + me->read_mem_bytes(); + } + + /* + * Read memory 1 byte at a time into the data_buf + * In hex format + * + * Respond with the hex data as a packet once done + * + * Vanadis will just die if something goes wrong + * */ + void read_mem_bytes() { + if ( !mem_first_byte ) { + mem_len--; + mem_addr++; + write_to_databuff_as_hex((uint8_t *)&mem_data, 1); + } + + if ( mem_len <= 0 ) { + send_packet((const char*)data_buf); + + state_m = &poll_for_packet_stub; + } + else { + mem_first_byte = false; + read_mem_byte_ret = &read_mem_bytes_stub; + state_m = &read_mem_byte_A_stub; + } + + return; + } + + static void write_reg_A_stub( VanadisRISCV64GDB * me ) { + me->write_reg_A(); + } + + /* + * Insert instruction into pipeline + * write_reg_call() will be called when instruction executes + * */ + void write_reg_A() { + if ( decoder->is_rob_empty() ) { + VanadisInstruction* next_ins = new VanadisSetRegisterByCallInstruction( + current_break_addr, decoder->getHardwareThread(), + decoder->getDecoderOptions(), write_reg_index, + &write_reg_call_stub, (void *)this ); + + decoder->instruction_to_rob( output, next_ins, NULL, NULL ); + + write_reg_done = false; + + state_m = &write_reg_B_stub; + } + + return; + } + + static void * write_reg_call_stub( void * arg_input ) { + VanadisRISCV64GDB * me = (VanadisRISCV64GDB *)arg_input; + return me->write_reg_call( ); + } + + /* + * Called by executing instruction + * The value we return is written to the architectural register + * */ + void * write_reg_call( ) { + write_reg_done = true; + + return (void *)&write_reg_value; + } + + static void write_reg_B_stub( VanadisRISCV64GDB * me ) { + me->write_reg_B(); + } + + /* + * We hold until the value actually wrote to the register + * */ + void write_reg_B() { + if ( write_reg_done ) { + state_m = write_reg_ret; + } + + return; + } + + static void read_reg_A_stub( VanadisRISCV64GDB * me ) { + me->read_reg_A(); + } + + /* + * Insert instruction into pipeline + * read_reg_call() will be called when instruction executes + * */ + void read_reg_A() { + if ( decoder->is_rob_empty() ) { + VanadisInstruction* next_ins = new VanadisGetRegisterByCallInstruction( + current_break_addr, decoder->getHardwareThread(), + decoder->getDecoderOptions(), read_reg_index, + &read_reg_call_stub, (void *)this ); + + decoder->instruction_to_rob( output, next_ins, NULL, NULL ); + + read_reg_done = false; + + state_m = &read_reg_B_stub; + } + + return; + } + + static void read_reg_call_stub( void * arg_input, void * arg_ret ) { + VanadisRISCV64GDB * me = (VanadisRISCV64GDB *)arg_input; + int64_t value = *(int64_t *)(arg_ret); + me->read_reg_call( value ); + } + + /* + * Called by executing instruction + * The value we get came from the read register + * */ + void read_reg_call( int64_t value ) { + read_reg_value = value; + + read_reg_done = true; + } + + static void read_reg_B_stub( VanadisRISCV64GDB * me ) { + me->read_reg_B(); + } + + /* + * We hold until the value actually read from the register + * */ + void read_reg_B() { + if ( read_reg_done ) { + state_m = read_reg_ret; + } + + return; + } + + static void read_all_reg_stub( VanadisRISCV64GDB * me ) { + me->read_all_reg(); + } + + /* + * Read all integer registers 1 register at a time + * + * Write the outputs as hex to the data_buf + * + * Send out the hex data as a packet when done + * */ + void read_all_reg() { + if ( read_reg_index >= 0 ) { + write_to_databuff_as_hex((uint8_t *)&read_reg_value, 8); + } + + if ( read_reg_index >= 31 ) { + send_packet((const char*)data_buf); + + state_m = &poll_for_packet_stub; + } + else { + read_reg_index++; + + read_reg_ret = &read_all_reg_stub; + + state_m = &read_reg_A_stub; + } + + return; + } + + static void read_one_reg_stub( VanadisRISCV64GDB * me ) { + me->read_one_reg(); + } + + /* + * Read 1 integer register + * + * Register index 32 is the program counter + * + * Process if normal integer register + * + * read_one_reg => read_reg_A => read_reg_B => read_one_reg_end => done + * + * */ + void read_one_reg() { + if ( read_reg_index == 32 ) { + write_to_databuff_as_hex((uint8_t *)¤t_break_addr, 8); + + send_packet((const char*)data_buf); + + state_m = &poll_for_packet_stub; + } + else { + read_reg_ret = &read_one_reg_end_stub; + + state_m = &read_reg_A_stub; + } + + return; + } + + static void read_one_reg_end_stub( VanadisRISCV64GDB * me ) { + me->read_one_reg_end(); + } + + void read_one_reg_end() { + write_to_databuff_as_hex((uint8_t *)&read_reg_value, 8); + + send_packet((const char*)data_buf); + + state_m = &poll_for_packet_stub; + + return; + } + + /* + * If we are not getting data we don't want to poll every + * cycle of the simulation, as this will slow down the simulation + * considerably. We poll once every 1000 cycles instead. + * */ + bool poll_delay_done() { + return poll_delay_cnt++ < 1000; + } + + static void poll_for_packet_stub( VanadisRISCV64GDB * me ) { + me->poll_for_packet(); + } + + /* + * Read in bytes until we find the '$' start of packet + * */ + void poll_for_packet() { + if ( poll_delay_done() ) { + return; + } + + char buffer; + + while ( get_char_gdb(&buffer) ) { + if ( buffer == '$' ) { + sum = 0; + state_m = &read_packet_stub; + pkt_buf_len = 0; + read_packet(); + return; + } + if ( buffer == '#' ) { + // We might have started reading mid packet + // See if they can resend + send_nack(); + } + } + + poll_delay_cnt = 0; + + return; + } + + static void read_packet_stub( VanadisRISCV64GDB * me ) { + me->read_packet(); + } + + /* + * Read in the packet and calculate the checksum until + * we get the '#' end of packet character + * + * Once we find the end, the next 2 characters are hex + * encoded checksum + * */ + void read_packet( ) { + if ( poll_delay_done() ) { + return; + } + + char buffer; + + while ( get_char_gdb(&buffer) ) { + if ( buffer == '#' ) { + pkt_buf[pkt_buf_len] = '\0'; + hex_builder_ret = &check_packet_stub; + state_m = &read_hex_stub; + hex_index = 0; + hex_len = 2; + read_hex(); + return; + } + + if ( pkt_buf_len >= sizeof(pkt_buf) ) { + output->fatal(CALL_INFO, -1, "Error - GDB rec buff overflow.\n"); + } + + pkt_buf[pkt_buf_len++] = buffer; + sum += (uint8_t)buffer; + } + + poll_delay_cnt = 0; + + return; + } + + static void check_packet_stub( VanadisRISCV64GDB * me ) { + me->check_packet(); + } + + /* + * Ensure the checksum matched + * if it did + * Ack back with a '+' and service the packet + * if id didn't + * Nack back with a '-' + * */ + void check_packet( ) { + if ( (uint8_t)hex_builder == sum ) { + send_ack(); + do_packet(); + } + else { + send_nack(); + state_m = &poll_for_packet_stub; + } + + return; + } + + /* + * We need to check if the client wants us to stop + * everyonce in a while 0x3 is the classic stop byte + * but if the client is trying to send us packets, lets stop anyway + * */ + bool poll_for_interrupt() { + if ( poll_delay_done() ) { + return false; + } + + char buffer; + + while ( get_char_gdb(&buffer) ) { + if ( buffer == 0x3 || buffer == '$' ) { + if ( buffer == 0x3 ) { + send_ack(); + } + return true; + } + } + + poll_delay_cnt = 0; + + return false; + } + + /* + * Ack '+' and Nack '-' + * */ + void send_ack() { + write_to_buff_raw((uint8_t *)"+", 1); + flush_buff_to_socket(); + } + + void send_nack() { + write_to_buff_raw((uint8_t *)"-", 1); + flush_buff_to_socket(); + } + + /* + * Append raw bytes to the send buffer + * */ + void write_to_buff_raw( uint8_t *buf, ssize_t len ) { + if (send_buf_len + len >= sizeof(send_buf)) { + output->fatal(CALL_INFO, -1, "Error - GDB send buff overflow.\n"); + } + memcpy(&send_buf[send_buf_len], buf, len); + send_buf_len += len; + } + + /* + * Send the send_buf out over the socket until it all transmits + * + * If we are debugging the comms, write bytes directly to standard out + * Most of the bytes are displayable characters, but some are binary. + * Will need to pipe output to a file and use hexdump to see these, sorry + * + * If we are not connected to the client, just drop the packet so we don't hang. + * */ + void flush_buff_to_socket() { + int bytes_left = send_buf_len; + int offset = 0; + + if ( !connected ) { + return; + } + + while ( 1 ) { + int wrote = write(sock_fd, send_buf + offset, bytes_left); + + if ( wrote == 0 ) { + output->verbose(CALL_INFO, 0, 0, "GDB client closed connection while server trying to respond\n"); + connected = false; + return; + } + + if ( wrote > 0 ) { + if ( debug_comms ) { + std::cout << "\nOUT :: "; + for ( int i = 0; i < wrote; i++ ) { + std::cout << send_buf[offset + i]; + } + std::cout << "\n"; + } + bytes_left -= wrote; + + if ( bytes_left == 0 ) { + break; + } + } + } + + send_buf_len = 0; + } + + /* + * Try to open up our port for listening + * */ + void try_listen() { + const int one = 1; + struct sockaddr_in addr; + int ret; + + listen_fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, IPPROTO_TCP); + if (listen_fd < 0) { + output->fatal(CALL_INFO, -1, "Error - Failure trying to open socket for GDB.\n"); + } + + ret = setsockopt(listen_fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)); + if (ret < 0) { + output->fatal(CALL_INFO, -1, "Error - Could not set socket option for GDB socket SO_REUSEADDR.\n"); + } + + output->verbose(CALL_INFO, 0, 0, "GDB listening on port |%d|\n", port); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = inet_addr("127.0.0.1"); + addr.sin_port = htons(port); + + if (addr.sin_addr.s_addr == INADDR_NONE) { + output->fatal(CALL_INFO, -1, "Error - Failure converting IP address for |127.0.0.1|\n"); + } + + ret = bind(listen_fd, (struct sockaddr *)&addr, sizeof(addr)); + if (ret < 0) { + output->fatal(CALL_INFO, -1, "Error - bind for GDB failed on |127.0.0.1:%d|\n", port); + } + + ret = listen(listen_fd, 1); + if (ret < 0) { + output->fatal(CALL_INFO, -1, "Error - listen for GDB failed on |127.0.0.1:%d|\n", port); + } + + listening = true; + } + + /* + * See if we can connect to the client + * */ + void try_connect() { + if ( !listening ) { + try_listen(); + } + + fd_set rfds; + struct timeval tv; + int ret; + const int one = 1; + + FD_ZERO(&rfds); + FD_SET(listen_fd, &rfds); + + tv.tv_sec = 0; + tv.tv_usec = 0; + + ret = select(listen_fd + 1, &rfds, NULL, NULL, &tv); + + if (ret < 1) { + return; + } + + sock_fd = accept(listen_fd, NULL, NULL); + if (sock_fd < 0) { + output->fatal(CALL_INFO, -1, "Error - GDB socket accept failed with data available\n"); + } + + ret = setsockopt(sock_fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one)); + if (ret < 0) { + output->fatal(CALL_INFO, -1, "Error - Could not set socket option for GDB socket SO_KEEPALIVE.\n"); + } + + ret = setsockopt(sock_fd, IPPROTO_TCP, TCP_NODELAY, &one, sizeof(one)); + if (ret < 0) { + output->fatal(CALL_INFO, -1, "Error - Could not set socket option for GDB socket TCP_NODELAY.\n"); + } + + output->verbose(CALL_INFO, 0, 0, "GDB server is connected to client\n"); + connected = true; + + return; + } + + /* + * Read a single character from the socket in a non-blocking way + * I realize this is slow, but there won't be much traffic + * and it won't have crazy latency requirements + * */ + bool get_char_gdb( char * buffer ) { + if ( !connected ) { + try_connect(); + + if ( !connected ) { + return false; + } + } + + fd_set rfds; + struct timeval tv; + int ret; + + FD_ZERO(&rfds); + FD_SET(sock_fd, &rfds); + + tv.tv_sec = 0; + tv.tv_usec = 0; + + ret = select(sock_fd + 1, &rfds, NULL, NULL, &tv); + + if (ret < 1) { + return false; + } + + ret = read(sock_fd, buffer, 1); + if (ret == 0) { + output->verbose(CALL_INFO, 0, 0, "GDB client closed connection, still listening\n"); + connected = false; + return false; + } + else if ( ret == 1 ) { + if ( debug_comms ) { + std::cout << buffer[0] << std::flush; + } + return true; + } + + return false; + } + + /* + * Decoder calls this to pass us our needed functions + * */ + void register_pipeline( SST::Output* output_p, VanadisDecoder * dec ) { + output = output_p; + decoder = dec; + } + + /* + * Write out a proper GDB server packet + * Starts with '$' and ends with '#' followed by 2 byte hex checksum + * */ + void send_packet_w_size(const uint8_t *pkt, size_t size) { + uint8_t checksum = 0; + size_t i; + write_to_buff_raw((uint8_t *)"$", 1); + for (i = 0; i < size; i++) { + checksum += pkt[i]; + } + write_to_buff_raw((uint8_t *)pkt, size); + write_to_buff_raw((uint8_t *)"#", 1); + + char buf[3]; + + snprintf(buf, 3, "%02x", checksum); + write_to_buff_raw((uint8_t *)buf, 2); + + flush_buff_to_socket(); + } + + void send_packet(const char *pkt) { + send_packet_w_size((const uint8_t *)pkt, strlen(pkt)); + } + + /* + * Convert a single hex character to a 4 bit integer + * */ + bool hex_to_int(char hex, uint8_t * result) { + if ((hex >= 'a') && (hex <= 'f')) { + *result = ((hex - 'a') + 10); + return true; + } + else if ((hex >= '0') && (hex <= '9')) { + *result = (hex - '0'); + return true; + } + else if ((hex >= 'A') && (hex <= 'F')) { + *result = ((hex - 'A') + 10); + return true; + } + return false; + } + + static void read_hex_stub( VanadisRISCV64GDB * me ) { + me->read_hex(); + } + + /* + * Create a number from an N character hex string coming from the socket + * */ + void read_hex( ) { + if ( poll_delay_done() ) { + return; + } + + uint8_t buffer; + uint64_t number; + + if ( hex_index == 0 ) { + hex_builder = 0; + } + + if ( hex_index >= hex_len ) { + state_m = hex_builder_ret; + (*hex_builder_ret)(this); + return; + } + + while ( get_char_gdb((char *)&buffer) ) { + if ( hex_to_int( buffer, (uint8_t *)&number) ) { + hex_builder += (number << (((hex_len - hex_index) - 1) * 4)); + hex_index++; + if ( hex_index >= hex_len ) { + state_m = hex_builder_ret; + (*hex_builder_ret)(this); + return; + } + } + else { + send_nack(); + state_m = &poll_for_packet_stub; + return; + } + } + + poll_delay_cnt = 0; + + return; + } + + /* + * Write 2 hex characters for each passed in char to + * the data buff and NUL terminate the string + * */ + void write_to_databuff_as_hex(uint8_t * data, int count) { + uint8_t byte; + for (int i = 0; i < count; i++) { + byte = data[i]; + data_buf[data_buf_len++] = int_to_hex[byte >> 4]; + data_buf[data_buf_len++] = int_to_hex[byte & 0xf]; + } + data_buf[data_buf_len] = '\0'; + } + + /* + * Read in X bytes from hex stream into data buff + * hex stream will be twice as long as data buff + * */ + void read_hex_to_databuff(uint8_t * hex, int count) { + uint8_t high; + uint8_t low; + uint8_t byte; + int i; + for ( i = 0; i < count; i++ ) { + hex_index = i * 2; + if ( !hex_to_int( hex[hex_index], &high) ) { + send_packet("E93"); + state_m = &poll_for_packet_stub; + return; + } + if ( !hex_to_int( hex[hex_index + 1], &low) ) { + send_packet("E93"); + state_m = &poll_for_packet_stub; + return; + } + byte = (high << 4) + low; + + data_buf[i] = byte; + } + + data_buf_len = i; + data_buf[data_buf_len] = '\0'; + } + + /* + * Copy binary data to data buff + * '}' is used as an escape character + * skip this character and do the next character xored with 0x20 + * */ + void read_bin_to_databuff(uint8_t * bin, int count) { + int i; + int k; + for ( i = 0, k = 0; i < count; i++ ) { + uint8_t byte = bin[i]; + if ( byte == '}') { + i++; + byte = bin[i] ^ 0x20; + } + + data_buf[k++] = byte; + data_buf_len++; + } + + data_buf[data_buf_len] = '\0'; + } + + // Convert 4 bit integer to hex character + const char * int_to_hex = "0123456789abcdef"; + + bool active; + int port; + bool break_at_start; + int break_limit; + uint64_t initial_break_addr; + bool debug_comms; + SST::Output* output; + std::unordered_map breakpoints; + + uint64_t current_break_addr; + bool in_break; + bool pass_1; + bool pass_from_step; + bool interrupt; + int wait_for_squash_count; + int poll_delay_cnt; + + void (* state_m)( VanadisRISCV64GDB * ); + + int listen_fd; + int sock_fd; + bool connected; + bool listening; + + uint8_t pkt_buf[BUFFER_LEN]; + int pkt_buf_len; + uint8_t send_buf[BUFFER_LEN]; + int send_buf_len; + uint8_t data_buf[BUFFER_LEN]; + int data_buf_len; + uint8_t sum; + + VanadisDecoder * decoder; + + uint64_t hex_builder; + int hex_index; + int hex_len; + void (* hex_builder_ret)( VanadisRISCV64GDB * ); + + int read_reg_index; + int64_t read_reg_value; + bool read_reg_done; + void (* read_reg_ret)( VanadisRISCV64GDB * ); + + int write_reg_index; + int64_t write_reg_value; + bool write_reg_done; + void (* write_reg_ret)( VanadisRISCV64GDB * ); + + uint64_t mem_addr; + uint64_t mem_len; + uint64_t mem_index; + int64_t mem_data; + int64_t mem_saved_reg; + bool mem_first_byte; + void (* read_mem_byte_ret)( VanadisRISCV64GDB * ); + void (* write_mem_byte_ret)( VanadisRISCV64GDB * ); +}; +} // namespace Vanadis +} // namespace SST + +#endif diff --git a/src/sst/elements/vanadis/inst/vgetregcallable.h b/src/sst/elements/vanadis/inst/vgetregcallable.h new file mode 100644 index 0000000000..32c7097399 --- /dev/null +++ b/src/sst/elements/vanadis/inst/vgetregcallable.h @@ -0,0 +1,74 @@ +// Copyright 2009-2024 NTESS. Under the terms +// of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Copyright (c) 2009-2024, NTESS +// All rights reserved. +// +// Portions are copyright of other developers: +// See the file CONTRIBUTORS.TXT in the top level directory +// of the distribution for more information. +// +// This file is part of the SST software package. For license +// information, see the LICENSE file in the top level directory of the +// distribution. + +#ifndef _H_VANADIS_GETREG_BY_CALL +#define _H_VANADIS_GETREG_BY_CALL + +#include "inst/vinst.h" + +#include + +namespace SST { +namespace Vanadis { + +template +class VanadisGetRegisterByCallInstruction : public VanadisInstruction +{ +public: + VanadisGetRegisterByCallInstruction( + const uint64_t addr, const uint32_t hw_thr, const VanadisDecoderOptions* isa_opts, const uint16_t src_1, + void (* call)(void *, void *), void * arg_input) : + VanadisInstruction(addr, hw_thr, isa_opts, 1, 0, 1, 0, 0, 0, 0, 0), call_func(call), arg_in(arg_input) + { + isa_int_regs_in[0] = src_1; + } + + VanadisGetRegisterByCallInstruction* clone() override { return new VanadisGetRegisterByCallInstruction(*this); } + VanadisFunctionalUnitType getInstFuncType() const override { return INST_INT_ARITH; } + const char* getInstCode() const override { return "GETREG"; } + + void printToBuffer(char* buffer, size_t buffer_size) override + { + snprintf( + buffer, buffer_size, "GETREG %5" PRIu16 " <- imm=function() (phys: %5" PRIu16 " <- function())", + isa_int_regs_in[0], phys_int_regs_in[0]); + } + + void execute(SST::Output* output, VanadisRegisterFile* regFile) override + { +#ifdef VANADIS_BUILD_DEBUG + if(output->getVerboseLevel() >= 16) { + std::ostringstream ss; + ss << "Execute: 0x" << std::hex << getInstructionAddress() << std::dec << " " << getInstCode(); + ss << " phys: in= " << phys_int_regs_in[0] << ", isa: in=" << isa_int_regs_in[0]; + output->verbose( CALL_INFO, 16, 0, "%s\n", ss.str().c_str()); + } +#endif + + uint64_t src_1 = regFile->getIntReg(phys_int_regs_in[0]); + (*call_func)( arg_in, &src_1 ); + + markExecuted(); + } + +private: + void (* call_func)(void *, void *); + void * arg_in; +}; + +} // namespace Vanadis +} // namespace SST + +#endif diff --git a/src/sst/elements/vanadis/inst/vinstall.h b/src/sst/elements/vanadis/inst/vinstall.h index 736f5385c4..8acd664972 100644 --- a/src/sst/elements/vanadis/inst/vinstall.h +++ b/src/sst/elements/vanadis/inst/vinstall.h @@ -85,6 +85,7 @@ #include "inst/vnop.h" #include "inst/vsetreg.h" #include "inst/vsetregcallable.h" +#include "inst/vgetregcallable.h" #include "inst/vsyscall.h" // int-reg move diff --git a/src/sst/elements/vanadis/inst/vsetregcallable.h b/src/sst/elements/vanadis/inst/vsetregcallable.h index 725f4155dc..f9a6bcdc68 100644 --- a/src/sst/elements/vanadis/inst/vsetregcallable.h +++ b/src/sst/elements/vanadis/inst/vsetregcallable.h @@ -26,13 +26,11 @@ namespace Vanadis { template class VanadisSetRegisterByCallInstruction : public VanadisInstruction { - typedef std::function SetRegisterCallable; - public: VanadisSetRegisterByCallInstruction( const uint64_t addr, const uint32_t hw_thr, const VanadisDecoderOptions* isa_opts, const uint16_t dest, - SetRegisterCallable call) : - VanadisInstruction(addr, hw_thr, isa_opts, 0, 1, 0, 1, 0, 0, 0, 0), call_func(call) + void * (* call)(void *), void * arg_input) : + VanadisInstruction(addr, hw_thr, isa_opts, 0, 1, 0, 1, 0, 0, 0, 0), call_func(call), arg_in(arg_input) { isa_int_regs_out[0] = dest; } @@ -50,7 +48,7 @@ class VanadisSetRegisterByCallInstruction : public VanadisInstruction void execute(SST::Output* output, VanadisRegisterFile* regFile) override { - const reg_format reg_value = call_func(); + const reg_format reg_value = *(reg_format *)((*call_func)( arg_in )); #ifdef VANADIS_BUILD_DEBUG if(output->getVerboseLevel() >= 16) { @@ -75,7 +73,8 @@ class VanadisSetRegisterByCallInstruction : public VanadisInstruction } private: - SetRegisterCallable call_func; + void * (* call_func)(void *); + void * arg_in; }; } // namespace Vanadis diff --git a/src/sst/elements/vanadis/tests/basic_vanadis_gdb.py b/src/sst/elements/vanadis/tests/basic_vanadis_gdb.py new file mode 100644 index 0000000000..461d315530 --- /dev/null +++ b/src/sst/elements/vanadis/tests/basic_vanadis_gdb.py @@ -0,0 +1,600 @@ +import os +import sst +mh_debug_level=10 +mh_debug=0 +# this has to be a string +dbgAddr="0" +stopDbg="0" + +checkpointDir = "" +checkpoint = "" + +#checkpointDir = "checkpoint0" +#checkpoint = "load" +#checkpoint = "save" + +pythonDebug=False + +vanadis_isa = os.getenv("VANADIS_ISA", "MIPS") +isa="mipsel" +vanadis_isa = os.getenv("VANADIS_ISA", "RISCV64") +isa="riscv64" + +loader_mode = os.getenv("VANADIS_LOADER_MODE", "0") + +testDir="basic-io" +exe = "hello-world" +#exe = "hello-world-cpp" +#exe = "openat" +#exe = "printf-check" +#exe = "read-write" +#exe = "fread-fwrite" +#exe = "unlink" +#exe = "unlinkat" +#exe = "lseek" + +#testDir = "basic-math" +#exe = "sqrt-double" +#exe = "sqrt-float" + +#testDir = "basic-ops" +#exe = "test-branch" +#exe = "test-shift" + +#testDir = "misc" +#exe = "mt-dgemm" +#exe = "stream" +#exe = "stream-fortran" +#exe = "gettime" +#exe = "splitLoad" +#exe = "fork" +#exe = "clone" +#exe = "pthread" +#exe = "openmp" +#exe = "openmp2" +#exe = "uname" +#exe = "mem-test" +#exe = "checkpoint" + +physMemSize = "4GiB" + +tlbType = "simpleTLB" +mmuType = "simpleMMU" + +# Define SST core options +sst.setProgramOption("timebase", "1ps") +sst.setProgramOption("stop-at", "0 ns") + +# Tell SST what statistics handling we want +sst.setStatisticLoadLevel(4) +sst.setStatisticOutput("sst.statOutputConsole") + +full_exe_name = os.getenv("VANADIS_EXE", "./small/" + testDir + "/" + exe + "/" + isa + "/" + exe ) +exe_name= full_exe_name.split("/")[-1] + +verbosity = int(os.getenv("VANADIS_VERBOSE", 0)) +os_verbosity = os.getenv("VANADIS_OS_VERBOSE", verbosity) +pipe_trace_file = os.getenv("VANADIS_PIPE_TRACE", "") +lsq_ld_entries = os.getenv("VANADIS_LSQ_LD_ENTRIES", 16) +lsq_st_entries = os.getenv("VANADIS_LSQ_ST_ENTRIES", 8) + +rob_slots = os.getenv("VANADIS_ROB_SLOTS", 64) +retires_per_cycle = os.getenv("VANADIS_RETIRES_PER_CYCLE", 4) +issues_per_cycle = os.getenv("VANADIS_ISSUES_PER_CYCLE", 4) +decodes_per_cycle = os.getenv("VANADIS_DECODES_PER_CYCLE", 4) + +integer_arith_cycles = int(os.getenv("VANADIS_INTEGER_ARITH_CYCLES", 2)) +integer_arith_units = int(os.getenv("VANADIS_INTEGER_ARITH_UNITS", 2)) +fp_arith_cycles = int(os.getenv("VANADIS_FP_ARITH_CYCLES", 8)) +fp_arith_units = int(os.getenv("VANADIS_FP_ARITH_UNITS", 2)) +branch_arith_cycles = int(os.getenv("VANADIS_BRANCH_ARITH_CYCLES", 2)) + +cpu_clock = os.getenv("VANADIS_CPU_CLOCK", "2.3GHz") + +numCpus = int(os.getenv("VANADIS_NUM_CORES", 1)) +numThreads = int(os.getenv("VANADIS_NUM_HW_THREADS", 1)) + +vanadis_cpu_type = "vanadis." +vanadis_cpu_type += os.getenv("VANADIS_CPU_ELEMENT_NAME","dbg_VanadisCPU") + +if (verbosity > 0): + print("Verbosity: " + str(verbosity) + " -> loading Vanadis CPU type: " + vanadis_cpu_type) + print("Auto-clock syscalls: " + str(auto_clock_sys)) +# vanadis_cpu_type = "vanadisdbg.VanadisCPU" + +app_args = os.getenv("VANADIS_EXE_ARGS", "") + +app_params = {} +if app_args != "": + app_args_list = app_args.split(" ") + # We have a plus 1 because the executable name is arg0 + app_args_count = len( app_args_list ) + 1 + + app_params["argc"] = app_args_count + + if (verbosity > 0): + print("Identified " + str(app_args_count) + " application arguments, adding to input parameters.") + arg_start = 1 + for next_arg in app_args_list: + if (verbosity > 0): + print("arg" + str(arg_start) + " = " + next_arg) + app_params["arg" + str(arg_start)] = next_arg + arg_start = arg_start + 1 +else: + app_params["argc"] = 1 + if (verbosity > 0): + print("No application arguments found, continuing with argc=1") + +vanadis_decoder = "vanadis.Vanadis" + vanadis_isa + "Decoder" +vanadis_os_hdlr = "vanadis.Vanadis" + vanadis_isa + "OSHandler" + + +protocol="MESI" + +# OS related params +osParams = { + "processDebugLevel" : 0, + "dbgLevel" : os_verbosity, + "dbgMask" : 8, + "cores" : numCpus, + "hardwareThreadCount" : numThreads, + "page_size" : 4096, + "physMemSize" : physMemSize, + "useMMU" : True, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + + +processList = ( + ( 1, { + "env_count" : 1, + "env0" : "OMP_NUM_THREADS={}".format(numCpus*numThreads), + "exe" : full_exe_name, + "arg0" : exe_name, + } ), + #( 1, { + # "env_count" : 2, "env0" : "HOME=/home/sdhammo", "env1" : "NEWHOME=/home/sdhammo2", "argc" : 1, "exe" : "./tests/small/basic-io/hello-world/mipsel/hello-world", + #"exe" : "./tests/small/basic-io/read-write/mipsel/read-write", + #} ), +) + +processList[0][1].update(app_params) + +osl1cacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +mmuParams = { + "debug_level": 0, + "num_cores": numCpus, + "num_threads": numThreads, + "page_size": 4096, +} + +memRtrParams ={ + "xbar_bw" : "1GB/s", + "link_bw" : "1GB/s", + "input_buf_size" : "2KB", + "num_ports" : str(numCpus+2), + "flit_size" : "72B", + "output_buf_size" : "2KB", + "id" : "0", + "topology" : "merlin.singlerouter" +} + +dirCtrlParams = { + "coherence_protocol" : protocol, + "entry_cache_size" : "1024", + "debug" : mh_debug, + "debug_level" : mh_debug_level, + "addr_range_start" : "0x0", + "addr_range_end" : "0xFFFFFFFF" +} + +dirNicParams = { + "network_bw" : "25GB/s", + "group" : 2, +} + +memCtrlParams = { + "clock" : cpu_clock, + "backend.mem_size" : physMemSize, + "backing" : "malloc", + "initBacking": 1, + "addr_range_start": 0, + "addr_range_end": 0xffffffff, + "debug_level" : mh_debug_level, + "debug" : mh_debug, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +memParams = { + "mem_size" : "4GiB", + "access_time" : "1 ns" +} + +# CPU related params +tlbParams = { + "debug_level": 0, + "hitLatency": 1, + "num_hardware_threads": numThreads, + "num_tlb_entries_per_thread": 64, + "tlb_set_size": 4, +} + +tlbWrapperParams = { + "debug_level": 0, +} + +decoderParams = { + "loader_mode" : loader_mode, + "uop_cache_entries" : 1536, + "predecode_cache_entries" : 4 +} + +osHdlrParams = { } + +branchPredParams = { + "branch_entries" : 32 +} + +gdbParams = { + "gdb_active" : True, + "gdb_port" : 1234, + "gdb_break_at_start" : True, + "gdb_debug_comms" : True +} + +cpuParams = { + "clock" : cpu_clock, + "verbose" : verbosity, + "hardware_threads": numThreads, + "physical_fp_registers" : 168 * numThreads, + "physical_integer_registers" : 180 * numThreads, + "integer_arith_cycles" : integer_arith_cycles, + "integer_arith_units" : integer_arith_units, + "fp_arith_cycles" : fp_arith_cycles, + "fp_arith_units" : fp_arith_units, + "branch_unit_cycles" : branch_arith_cycles, + "print_int_reg" : False, + "print_fp_reg" : False, + "pipeline_trace_file" : pipe_trace_file, + "reorder_slots" : rob_slots, + "decodes_per_cycle" : decodes_per_cycle, + "issues_per_cycle" : issues_per_cycle, + "retires_per_cycle" : retires_per_cycle, + "pause_when_retire_address" : os.getenv("VANADIS_HALT_AT_ADDRESS", 0), + "start_verbose_when_issue_address": dbgAddr, + "stop_verbose_when_retire_address": stopDbg, + "print_rob" : False, + "checkpointDir" : checkpointDir, + "checkpoint" : checkpoint +} + +lsqParams = { + "verbose" : verbosity, + "address_mask" : 0xFFFFFFFF, + "max_stores" : lsq_st_entries, + "max_loads" : lsq_ld_entries, +} + +l1dcacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l1icacheParams = { + "access_latency_cycles" : "2", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "8", + "cache_line_size" : "64", + "cache_size" : "32 KB", + "prefetcher" : "cassini.NextBlockPrefetcher", + "prefetcher.reach" : 1, + "L1" : "1", + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} + +l2cacheParams = { + "access_latency_cycles" : "14", + "cache_frequency" : cpu_clock, + "replacement_policy" : "lru", + "coherence_protocol" : protocol, + "associativity" : "16", + "cache_line_size" : "64", + "cache_size" : "1MB", + "mshr_latency_cycles": 3, + "debug" : mh_debug, + "debug_level" : mh_debug_level, +} +busParams = { + "bus_frequency" : cpu_clock, +} + +l2memLinkParams = { + "group" : 1, + "network_bw" : "25GB/s" +} + +class CPU_Builder: + def __init__(self): + pass + + # CPU + def build( self, prefix, nodeId, cpuId ): + + if pythonDebug: + print("build {}".format(prefix) ) + + # CPU + cpu = sst.Component(prefix, vanadis_cpu_type) + cpu.addParams( cpuParams ) + cpu.addParam( "core_id", cpuId ) + cpu.enableAllStatistics() + + # CPU.decoder + for n in range(numThreads): + decode = cpu.setSubComponent( "decoder"+str(n), vanadis_decoder ) + decode.addParams( decoderParams ) + + decode.enableAllStatistics() + + # CPU.decoder.osHandler + os_hdlr = decode.setSubComponent( "os_handler", vanadis_os_hdlr ) + os_hdlr.addParams( osHdlrParams ) + + # CPU.decocer.branch_pred + branch_pred = decode.setSubComponent( "branch_unit", "vanadis.VanadisBasicBranchUnit" ) + branch_pred.addParams( branchPredParams ) + branch_pred.enableAllStatistics() + + # CPU.decocer.gdb + gdb = decode.setSubComponent( "gdb_unit", "vanadis.VanadisRISCV64GDB" ) + gdb.addParams( gdbParams ) + gdb.enableAllStatistics() + + # CPU.lsq + cpu_lsq = cpu.setSubComponent( "lsq", "vanadis.VanadisBasicLoadStoreQueue" ) + cpu_lsq.addParams(lsqParams) + cpu_lsq.enableAllStatistics() + + # CPU.lsq mem interface which connects to D-cache + cpuDcacheIf = cpu_lsq.setSubComponent( "memory_interface", "memHierarchy.standardInterface" ) + + # CPU.mem interface for I-cache + cpuIcacheIf = cpu.setSubComponent( "mem_interface_inst", "memHierarchy.standardInterface" ) + + # L1 D-cache + cpu_l1dcache = sst.Component(prefix + ".l1dcache", "memHierarchy.Cache") + cpu_l1dcache.addParams( l1dcacheParams ) + + # L1 I-cache to cpu interface + l1dcache_2_cpu = cpu_l1dcache.setSubComponent("cpulink", "memHierarchy.MemLink") + # L1 I-cache to L2 interface + l1dcache_2_l2cache = cpu_l1dcache.setSubComponent("memlink", "memHierarchy.MemLink") + + # L2 I-cache + cpu_l1icache = sst.Component( prefix + ".l1icache", "memHierarchy.Cache") + cpu_l1icache.addParams( l1icacheParams ) + + # L1 I-iache to cpu interface + l1icache_2_cpu = cpu_l1icache.setSubComponent("cpulink", "memHierarchy.MemLink") + # L1 I-cache to L2 interface + l1icache_2_l2cache = cpu_l1icache.setSubComponent("memlink", "memHierarchy.MemLink") + + # L2 cache + cpu_l2cache = sst.Component(prefix+".l2cache", "memHierarchy.Cache") + cpu_l2cache.addParams( l2cacheParams ) + + # L2 cache cpu interface + l2cache_2_l1caches = cpu_l2cache.setSubComponent("cpulink", "memHierarchy.MemLink") + + # L2 cache mem interface + l2cache_2_mem = cpu_l2cache.setSubComponent("memlink", "memHierarchy.MemNIC") + l2cache_2_mem.addParams( l2memLinkParams ) + + # L1 to L2 buss + cache_bus = sst.Component(prefix+".bus", "memHierarchy.Bus") + cache_bus.addParams(busParams) + + # CPU data TLB + dtlbWrapper = sst.Component(prefix+".dtlb", "mmu.tlb_wrapper") + dtlbWrapper.addParams(tlbWrapperParams) +# dtlbWrapper.addParam( "debug_level", 0) + dtlb = dtlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + dtlb.addParams(tlbParams) + + # CPU instruction TLB + itlbWrapper = sst.Component(prefix+".itlb", "mmu.tlb_wrapper") + itlbWrapper.addParams(tlbWrapperParams) +# itlbWrapper.addParam( "debug_level", 0) + itlbWrapper.addParam("exe",True) + itlb = itlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); + itlb.addParams(tlbParams) + + # CPU (data) -> TLB -> Cache + link_cpu_dtlb_link = sst.Link(prefix+".link_cpu_dtlb_link") + link_cpu_dtlb_link.connect( (cpuDcacheIf, "port", "1ns"), (dtlbWrapper, "cpu_if", "1ns") ) + link_cpu_dtlb_link.setNoCut() + + # data TLB -> data L1 + link_cpu_l1dcache_link = sst.Link(prefix+".link_cpu_l1dcache_link") + link_cpu_l1dcache_link.connect( (dtlbWrapper, "cache_if", "1ns"), (l1dcache_2_cpu, "port", "1ns") ) + link_cpu_l1dcache_link.setNoCut() + + # CPU (instruction) -> TLB -> Cache + link_cpu_itlb_link = sst.Link(prefix+".link_cpu_itlb_link") + link_cpu_itlb_link.connect( (cpuIcacheIf, "port", "1ns"), (itlbWrapper, "cpu_if", "1ns") ) + link_cpu_itlb_link.setNoCut() + + # instruction TLB -> instruction L1 + link_cpu_l1icache_link = sst.Link(prefix+".link_cpu_l1icache_link") + link_cpu_l1icache_link.connect( (itlbWrapper, "cache_if", "1ns"), (l1icache_2_cpu, "port", "1ns") ) + link_cpu_l1icache_link.setNoCut(); + + # data L1 -> bus + link_l1dcache_l2cache_link = sst.Link(prefix+".link_l1dcache_l2cache_link") + link_l1dcache_l2cache_link.connect( (l1dcache_2_l2cache, "port", "1ns"), (cache_bus, "high_network_0", "1ns") ) + link_l1dcache_l2cache_link.setNoCut() + + # instruction L1 -> bus + link_l1icache_l2cache_link = sst.Link(prefix+".link_l1icache_l2cache_link") + link_l1icache_l2cache_link.connect( (l1icache_2_l2cache, "port", "1ns"), (cache_bus, "high_network_1", "1ns") ) + link_l1icache_l2cache_link.setNoCut() + + # BUS to L2 cache + link_bus_l2cache_link = sst.Link(prefix+".link_bus_l2cache_link") + link_bus_l2cache_link.connect( (cache_bus, "low_network_0", "1ns"), (l2cache_2_l1caches, "port", "1ns") ) + link_bus_l2cache_link.setNoCut() + + return (cpu, "os_link", "5ns"), (l2cache_2_mem, "port", "1ns") , (dtlb, "mmu", "1ns"), (itlb, "mmu", "1ns") + + +def addParamsPrefix(prefix,params): + #print( prefix ) + ret = {} + for key, value in params.items(): + #print( key, value ) + ret[ prefix + "." + key] = value + + #print( ret ) + return ret + +# node OS +node_os = sst.Component("os", "vanadis.VanadisNodeOS") +node_os.addParams(osParams) + +num=0 +for i,process in processList: + #print( process ) + for y in range(i): + #print( "process", num ) + node_os.addParams( addParamsPrefix( "process" + str(num), process ) ) + num+=1 + +if pythonDebug: + print('total hardware threads ' + str(num) ) + +# node OS MMU +node_os_mmu = node_os.setSubComponent( "mmu", "mmu." + mmuType ) +node_os_mmu.addParams(mmuParams) + +# node OS memory interface to L1 data cache +node_os_mem_if = node_os.setSubComponent( "mem_interface", "memHierarchy.standardInterface" ) + +# node OS l1 data cache +os_cache = sst.Component("node_os.cache", "memHierarchy.Cache") +os_cache.addParams(osl1cacheParams) +os_cache_2_cpu = os_cache.setSubComponent("cpulink", "memHierarchy.MemLink") +os_cache_2_mem = os_cache.setSubComponent("memlink", "memHierarchy.MemNIC") +os_cache_2_mem.addParams( l2memLinkParams ) + +# node memory router +comp_chiprtr = sst.Component("chiprtr", "merlin.hr_router") +comp_chiprtr.addParams(memRtrParams) +comp_chiprtr.setSubComponent("topology","merlin.singlerouter") + +# node directory controller +dirctrl = sst.Component("dirctrl", "memHierarchy.DirectoryController") +dirctrl.addParams(dirCtrlParams) + +# node directory controller port to memory +dirtoM = dirctrl.setSubComponent("memlink", "memHierarchy.MemLink") +# node directory controller port to cpu +dirNIC = dirctrl.setSubComponent("cpulink", "memHierarchy.MemNIC") +dirNIC.addParams(dirNicParams) + +# node memory controller +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.addParams( memCtrlParams ) + +# node memory controller port to directory controller +memToDir = memctrl.setSubComponent("cpulink", "memHierarchy.MemLink") + +# node memory controller backend +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams(memParams) + +# node OS data TLB +#ostlbWrapper = sst.Component("ostlb", "mmu.tlb_wrapper") +#ostlbWrapper.addParams(tlbWrapperParams) +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu." + tlbType ); +#ostlb = ostlbWrapper.setSubComponent("tlb", "mmu.passThroughTLB" ); +#ostlb.addParams(tlbParams) + +# OS (data) -> TLB -> Cache +#link_os_ostlb_link = sst.Link("link_os_ostlb_link") +#link_os_ostlb_link.connect( (node_os_mem_if, "port", "1ns"), (ostlbWrapper, "cpu_if", "1ns") ) + +# Directory controller to memory router +link_dir_2_rtr = sst.Link("link_dir_2_rtr") +link_dir_2_rtr.connect( (comp_chiprtr, "port"+str(numCpus), "1ns"), (dirNIC, "port", "1ns") ) +link_dir_2_rtr.setNoCut() + +# Directory controller to memory controller +link_dir_2_mem = sst.Link("link_dir_2_mem") +link_dir_2_mem.connect( (dirtoM, "port", "1ns"), (memToDir, "port", "1ns") ) +link_dir_2_mem.setNoCut() + +# MMU -> ostlb +# don't need when using pass through TLB +#link_mmu_ostlb_link = sst.Link("link_mmu_ostlb_link") +#link_mmu_ostlb_link.connect( (node_os_mmu, "ostlb", "1ns"), (ostlb, "mmu", "1ns") ) + +# ostlb -> os l1 cache +link_os_cache_link = sst.Link("link_os_cache_link") +#link_os_cache_link.connect( (ostlbWrapper, "cache_if", "1ns"), (os_cache_2_cpu, "port", "1ns") ) +link_os_cache_link.connect( (node_os_mem_if, "port", "1ns"), (os_cache_2_cpu, "port", "1ns") ) +link_os_cache_link.setNoCut() + +os_cache_2_rtr = sst.Link("os_cache_2_rtr") +os_cache_2_rtr.connect( (os_cache_2_mem, "port", "1ns"), (comp_chiprtr, "port"+str(numCpus+1), "1ns") ) +os_cache_2_rtr.setNoCut() + +cpuBuilder = CPU_Builder() + +# build all CPUs +nodeId = 0 +for cpu in range(numCpus): + + prefix="node" + str(nodeId) + ".cpu" + str(cpu) + os_hdlr, l2cache, dtlb, itlb = cpuBuilder.build(prefix, nodeId, cpu) + + # MMU -> dtlb + link_mmu_dtlb_link = sst.Link(prefix + ".link_mmu_dtlb_link") + link_mmu_dtlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".dtlb", "1ns"), dtlb ) + + # MMU -> itlb + link_mmu_itlb_link = sst.Link(prefix + ".link_mmu_itlb_link") + link_mmu_itlb_link.connect( (node_os_mmu, "core"+ str(cpu) +".itlb", "1ns"), itlb ) + + # CPU os handler -> node OS + link_core_os_link = sst.Link(prefix + ".link_core_os_link") + link_core_os_link.connect( os_hdlr, (node_os, "core" + str(cpu), "5ns") ) + + # connect cpu L2 to router + link_l2cache_2_rtr = sst.Link(prefix + ".link_l2cache_2_rtr") + link_l2cache_2_rtr.connect( l2cache, (comp_chiprtr, "port" + str(cpu), "1ns") ) + diff --git a/src/sst/elements/vanadis/vanadis.cc b/src/sst/elements/vanadis/vanadis.cc index 7e970d33e3..01b4a6d6bc 100644 --- a/src/sst/elements/vanadis/vanadis.cc +++ b/src/sst/elements/vanadis/vanadis.cc @@ -207,6 +207,8 @@ VANADIS_COMPONENT::VANADIS_COMPONENT(SST::ComponentId_t id, SST::Params& params) retire_isa_tables[i]->reset(issue_isa_tables[i]); halted_masks[i] = true; + + thread_decoders[i]->register_pipeline( output ); } delete[] decoder_name; @@ -1211,6 +1213,10 @@ VANADIS_COMPONENT::tick(SST::Cycle_t cycle) return true; } + for ( uint32_t i = 0; i < hw_threads; ++i ) { + thread_decoders[i]->gdb_tick(); + } + #ifdef VANADIS_BUILD_DEBUG const auto output_verbosity = output->getVerboseLevel(); #endif @@ -2180,7 +2186,7 @@ VANADIS_COMPONENT::checkpoint(FILE* fp ) auto reg_file = register_files[i]; auto thr_decoder = thread_decoders[i]; - uint64_t tlsPtr = thread_decoders[i]->getThreadLocalStoragePointer(); + uint64_t tlsPtr = *(uint64_t *)thread_decoders[i]->getThreadLocalStoragePointer(); fprintf(fp,"tlsPtr: %#" PRIx64 "\n",tlsPtr); for ( int i = 0; i < isa_table->getNumIntRegs(); i++ ) { uint64_t val = reg_file->getIntReg( isa_table->getIntPhysReg( i ) ); @@ -2263,7 +2269,7 @@ void VANADIS_COMPONENT::getThreadState( VanadisGetThreadStateReq* req ) auto isa_table = retire_isa_tables[hw_thr]; auto reg_file = register_files[hw_thr]; uint64_t instPtr = rob[hw_thr]->peek()->getInstructionAddress(); - uint64_t tlsPtr = thread_decoders[hw_thr]->getThreadLocalStoragePointer(); + uint64_t tlsPtr = *(uint64_t *)thread_decoders[hw_thr]->getThreadLocalStoragePointer(); output->verbose(CALL_INFO, 8, 0,"get thread state, hw_th=%d instPtr=%#" PRIx64 " tlsPtr=%#" PRIx64 "\n",hw_thr,instPtr,tlsPtr);