From 72c09d489471f57dd080225c73f8948c76e55447 Mon Sep 17 00:00:00 2001 From: christopher Date: Tue, 1 Oct 2024 22:35:30 -0400 Subject: [PATCH 1/6] Revamp command line options, added new options to set various thresholds. VBits are now set for GPU use case and based on MEMORY_ACCESS_SIZE making it consistent with other bit based values. Fixed some formatting, including spaces instead of tabs. --- gs_patterns.h | 38 +++--- gs_patterns_core.cpp | 157 ++----------------------- gs_patterns_core.h | 253 +++++++++++++++++++++++++++++++--------- gs_patterns_main.cpp | 169 +++++++++++++++++++++++---- gsnv_patterns.cpp | 16 ++- gsnv_patterns.h | 8 ++ gspin_patterns.cpp | 15 ++- gspin_patterns.h | 4 + nvbit_tracing/README.md | 21 ++-- 9 files changed, 418 insertions(+), 263 deletions(-) diff --git a/gs_patterns.h b/gs_patterns.h index f74b67c..3f151bd 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -23,20 +23,16 @@ #define OBOUNDS_ALLOC (2*OBOUNDS + 3) //patterns -#define USTRIDES 1024 //Threshold for number of accesses -#define NSTRIDES 15 //Threshold for number of unique distances -#define OUTTHRESH (0.5) //Threshold for percentage of distances at boundaries of histogram +#define DEFAULT_THRESHOLD_USTRIDES 1024 //Default Threshold for number of accesses +#define DEFAULT_THRESHOLD_NSTRIDES 15 //Default Threshold for number of unique distances +#define DEFAULT_THRESHOLD_OUT_DIST_PERCENT (0.5) //Default Threshold for percentage of distances at boundaries of histogram + #define NTOP (10) //Final gather / scatters to keep #define INITIAL_PSIZE (1<<15) #define MAX_PSIZE (1<<30) //Max number of indices recorded per gather/scatter #define MAX_LINE_LENGTH 1024 -#if !defined(VBITS) -# define VBITS (512L) -# define VBYTES (VBITS/8) -#endif - namespace gs_patterns { typedef uintptr_t addr_t; @@ -163,21 +159,21 @@ namespace gs_patterns Metrics(const Metrics &) = delete; Metrics & operator=(const Metrics & right) = delete; - std::string type_as_string() { return !_mType ? "GATHER" : "SCATTER"; } - std::string getName() { return !_mType ? "Gather" : "Scatter"; } - std::string getShortName() { return !_mType ? "G" : "S"; } - std::string getShortNameLower() { return !_mType ? "g" : "s"; } + std::string type_as_string() { return !_mType ? "GATHER" : "SCATTER"; } + std::string getName() { return !_mType ? "Gather" : "Scatter"; } + std::string getShortName() { return !_mType ? "G" : "S"; } + std::string getShortNameLower() { return !_mType ? "g" : "s"; } auto get_srcline() { return srcline[_mType]; } int ntop = 0; - int64_t iaddrs_nosym = 0; + int64_t iaddrs_nosym = 0; int64_t indices_nosym = 0; - int64_t iaddrs_sym = 0; - int64_t indices_sym = 0; + int64_t iaddrs_sym = 0; + int64_t indices_sym = 0; double cnt = 0.0; int offset[NTOP] = {0}; - int size[NTOP] = {0}; + int size[NTOP] = {0}; addr_t tot[NTOP] = {0}; addr_t top[NTOP] = {0}; @@ -314,6 +310,14 @@ namespace gs_patterns int64_t maddr; }; + struct Thresholds + { + // Defaults set here, but potentially overrided + int num_accesses = DEFAULT_THRESHOLD_USTRIDES; + int num_strides = DEFAULT_THRESHOLD_NSTRIDES; + float out_dist_percent = DEFAULT_THRESHOLD_OUT_DIST_PERCENT; + }; + template class MemPatterns { @@ -339,6 +343,8 @@ namespace gs_patterns get_instr_window() = 0; virtual void set_log_level(int8_t ll) = 0; virtual int8_t get_log_level() = 0; + + virtual Thresholds & get_thresholds() = 0; }; } // namespace gs_patterns diff --git a/gs_patterns_core.cpp b/gs_patterns_core.cpp index b9b0354..ba26509 100644 --- a/gs_patterns_core.cpp +++ b/gs_patterns_core.cpp @@ -39,150 +39,7 @@ namespace gs_patterns_core return; } - - void create_metrics_file(FILE * fp, FILE * fp2, const std::string & file_prefix, Metrics & target_metrics, bool & first_spatter) - { - int i = 0; - int j = 0; - - //Create stride histogram and create spatter - int sidx; - int firstgs = 1; - int unique_strides; - int64_t hbin = 0; - int64_t n_stride[OBOUNDS_ALLOC]; - double outbounds; - - if (file_prefix.empty()) throw GSFileError ("Empty file prefix provided."); - - if (first_spatter) printf("\n"); - - printf("\n"); - for (i = 0; i < target_metrics.ntop; i++) { - printf("***************************************************************************************\n"); - - unique_strides = 0; - for (j = 0; j < OBOUNDS_ALLOC; j++) - n_stride[j] = 0; - - for (j = 1; j < target_metrics.offset[i]; j++) { - sidx = target_metrics.patterns[i][j] - target_metrics.patterns[i][j - 1] + OBOUNDS + 1; - sidx = (sidx < 1) ? 0 : sidx; - sidx = (sidx > OBOUNDS_ALLOC - 1) ? OBOUNDS_ALLOC - 1 : sidx; - n_stride[sidx]++; - } - - for (j = 0; j < OBOUNDS_ALLOC; j++) { - if (n_stride[j] > 0) { - unique_strides++; - } - } - - outbounds = (double) (n_stride[0] + n_stride[OBOUNDS_ALLOC-1]) / (double) target_metrics.offset[i]; - - if (((unique_strides > NSTRIDES) || (outbounds > OUTTHRESH) && (target_metrics.offset[i] > USTRIDES ) )) { - //if (true) { - - if (firstgs) { - firstgs = 0; - printf("***************************************************************************************\n"); - printf("%sS\n", target_metrics.type_as_string().c_str()); - } - printf("***************************************************************************************\n"); - //create a binary file - FILE * fp_bin; - - char bin_name[1024]; - sprintf(bin_name, "%s.%s.%03d.%02dB.sbin", file_prefix.c_str(), target_metrics.getShortNameLower().c_str(), \ - i, target_metrics.size[i]); - printf("%s\n", bin_name); - //std::string bin_name = \ - // file_prefix + "." + target_metrics.getShortNameLower().c_str() + "." + std::to_string(i) + "." + \ - // std::to_string(target_metrics.size[i]) + "B.sbin"; - - fp_bin = fopen(bin_name, "w"); - if (NULL == fp_bin) - throw GSFileError("Could not open " + std::string(bin_name) + "!"); - - printf("%sIADDR -- %p\n", target_metrics.getShortName().c_str(), (void*) target_metrics.top[i]); - printf("SRCLINE -- %s\n", target_metrics.get_srcline()[target_metrics.top_idx[i]]); - printf("GATHER %c -- %6.3f%c (%4ld-bit chunks)\n", - '%', 100.0 * (double) target_metrics.tot[i] / target_metrics.cnt, '%', VBITS); - printf("DTYPE -- %d bytes\n", target_metrics.size[i]); - printf("NINDICES -- %d\n", target_metrics.offset[i]); - printf("INDICES:\n"); - - int64_t nlcnt = 0; - for (j = 0; j < target_metrics.offset[i]; j++) { - - if (j <= 49) { - printf("%10ld ", target_metrics.patterns[i][j]); - fflush(stdout); - if (( ++nlcnt % 10) == 0) - printf("\n"); - - } else if (j >= (target_metrics.offset[i] - 50)) { - printf("%10ld ", target_metrics.patterns[i][j]); - fflush(stdout); - if (( ++nlcnt % 10) == 0) - printf("\n"); - - } else if (j == 50) - printf("...\n"); - } - printf("\n"); - printf("DIST HISTOGRAM --\n"); - - hbin = 0; - for(j=0; j 1 stride (non-contiguous) <-------------------- + // ? > 1 stride (non-contiguous) <-------------------- if ((gs == -1) && (abs(iw.get_maddr() - iw.get_maddr_prev()) > 1)) gs = w; } iw.get_maddr_prev() = iw.get_maddr(); } - //Once a gather/scatter, always a gather/scatter - if (gs == -1) { - - InstrInfo & target_iinfo = (w == 0) ? gather_iinfo : scatter_iinfo; - for(k=0; k + void create_metrics_file(FILE * fp, FILE * fp2, const std::string & file_prefix, Metrics & target_metrics, const Thresholds & thresholds, bool & first_spatter) + { + int i = 0; + int j = 0; - template - void create_spatter_file(MemPatterns & mp, const std::string & file_prefix) + const size_t VBITS = MEMORY_ACCESS_SIZE/8; + + //Create stride histogram and create spatter + int sidx; + bool firstgs = true; + int unique_strides; + int64_t hbin = 0; + int64_t n_stride[OBOUNDS_ALLOC]; + double outbounds; + + if (file_prefix.empty()) throw GSFileError ("Empty file prefix provided."); + + if (first_spatter) printf("\n"); + + printf("\n"); + for (i = 0; i < target_metrics.ntop; i++) { + printf("***************************************************************************************\n"); + + unique_strides = 0; + for (j = 0; j < OBOUNDS_ALLOC; j++) + n_stride[j] = 0; + + for (j = 1; j < target_metrics.offset[i]; j++) { + sidx = target_metrics.patterns[i][j] - target_metrics.patterns[i][j - 1] + OBOUNDS + 1; + sidx = (sidx < 1) ? 0 : sidx; + sidx = (sidx > OBOUNDS_ALLOC - 1) ? OBOUNDS_ALLOC - 1 : sidx; + n_stride[sidx]++; + } + + for (j = 0; j < OBOUNDS_ALLOC; j++) { + if (n_stride[j] > 0) { + unique_strides++; + } + } + + outbounds = (double) (n_stride[0] + n_stride[OBOUNDS_ALLOC-1]) / (double) target_metrics.offset[i]; + + if (unique_strides > thresholds.num_strides || + (outbounds > thresholds.out_dist_percent && target_metrics.offset[i] > thresholds.num_accesses)) + { + if (firstgs) { + firstgs = false; + printf("***************************************************************************************\n"); + printf("%sS\n", target_metrics.type_as_string().c_str()); + } + printf("***************************************************************************************\n"); + //create a binary file + FILE * fp_bin; + + char bin_name[1024]; + sprintf(bin_name, "%s.%s.%03d.%02dB.sbin", file_prefix.c_str(), target_metrics.getShortNameLower().c_str(), + i, target_metrics.size[i]); + + printf("%s\n", bin_name); + //std::string bin_name = \ + //file_prefix + "." + target_metrics.getShortNameLower().c_str() + "." + std::to_string(i) + "." + \ + //std::to_string(target_metrics.size[i]) + "B.sbin"; + + fp_bin = fopen(bin_name, "w"); + if (NULL == fp_bin) + throw GSFileError("Could not open " + std::string(bin_name) + "!"); + + printf("%sIADDR -- %p\n", target_metrics.getShortName().c_str(), (void*) target_metrics.top[i]); + printf("SRCLINE -- %s\n", target_metrics.get_srcline()[target_metrics.top_idx[i]]); + printf("GATHER %c -- %6.3f%c (%4ld-bit chunks)\n", + '%', 100.0 * (double) target_metrics.tot[i] / target_metrics.cnt, '%', VBITS); + printf("DTYPE -- %d bytes\n", target_metrics.size[i]); + printf("NINDICES -- %d\n", target_metrics.offset[i]); + printf("INDICES:\n"); + + int64_t nlcnt = 0; + for (j = 0; j < target_metrics.offset[i]; j++) { + + if (j <= 49) { + printf("%10ld ", target_metrics.patterns[i][j]); + fflush(stdout); + if (( ++nlcnt % 10) == 0) + printf("\n"); + + } else if (j >= (target_metrics.offset[i] - 50)) { + printf("%10ld ", target_metrics.patterns[i][j]); + fflush(stdout); + if (( ++nlcnt % 10) == 0) + printf("\n"); + + } else if (j == 50) + printf("...\n"); + } + printf("\n"); + printf("DIST HISTOGRAM --\n"); + + hbin = 0; + for (j=0; j < OBOUNDS_ALLOC; j++) + { + + if (j == 0) { + printf("( -inf, %5ld]: %ld\n", (int64_t)(-(VBITS+1)), n_stride[j]); + hbin = 0; + + } else if (j == OBOUNDS +1) { + printf("[%5ld, 0): %ld\n", (int64_t)-VBITS, hbin); + hbin = 0; + + } else if (j == (OBOUNDS_ALLOC-2) ) { + printf("[ 0, %5ld]: %ld\n", VBITS, hbin); + hbin = 0; + + } else if (j == (OBOUNDS_ALLOC-1)) { + printf("[%5ld, inf): %ld\n", VBITS+1, n_stride[j]); + + } else { + hbin += n_stride[j]; + } + } + + if (first_spatter) { + first_spatter = false; + fprintf(fp, " {\"kernel\":\"%s\", \"pattern\":[", target_metrics.getName().c_str()); + } else { + fprintf(fp, ",\n {\"kernel\":\"%s\", \"pattern\":[", target_metrics.getName().c_str()); + } + + fwrite(target_metrics.patterns[i], sizeof(uint64_t), target_metrics.offset[i], fp_bin); + fclose(fp_bin); + + for (j = 0; j < target_metrics.offset[i] - 1; j++) + fprintf(fp, "%ld,", target_metrics.patterns[i][j]); + fprintf(fp, "%ld", target_metrics.patterns[i][target_metrics.offset[i] - 1]); + fprintf(fp, "], \"count\":1}"); + + fprintf(fp2, "0x%lx,%s,%d,%s,%d,%6.3f\n", + target_metrics.top[i], + target_metrics.get_srcline()[target_metrics.top_idx[i]], + target_metrics.size[i], + target_metrics.getShortName().c_str(), + target_metrics.offset[i], + 100.0 * (double) target_metrics.tot[i] / target_metrics.cnt); + } + printf("***************************************************************************************\n\n"); + } + } + + template + void create_spatter_file(MemPatterns & mp, const std::string & file_prefix) { // Create spatter file FILE *fp, *fp2; @@ -298,12 +441,12 @@ namespace gs_patterns_core // Header fprintf(fp, "[ "); - fprintf(fp2, "#iaddr, sourceline, type size bytes, g/s, nindices, final percentage of g/s\n"); + fprintf(fp2, "#iaddr, sourceline, type size bytes, g/s, nindices, final percentage of g/s\n"); bool first_spatter = true; - create_metrics_file(fp, fp2, file_prefix, mp.get_gather_metrics(), first_spatter); + create_metrics_file(fp, fp2, file_prefix, mp.get_gather_metrics(), mp.get_thresholds(), first_spatter); - create_metrics_file(fp, fp2, file_prefix, mp.get_scatter_metrics(), first_spatter); + create_metrics_file(fp, fp2, file_prefix, mp.get_scatter_metrics(), mp.get_thresholds(), first_spatter); // Footer fprintf(fp, " ]"); diff --git a/gs_patterns_main.cpp b/gs_patterns_main.cpp index 3794c28..f356b9d 100644 --- a/gs_patterns_main.cpp +++ b/gs_patterns_main.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include "gs_patterns.h" #include "gs_patterns_core.h" @@ -19,49 +20,169 @@ using namespace gs_patterns::gspin_patterns; void usage (const std::string & prog_name) { - std::cerr << "Usage: " << prog_name << " \n" - << " " << prog_name << " -nv [-ow] [-v]" << std::endl; + std::cerr << "Usage: " << prog_name << " \n" + << " " << prog_name << " \n" + << " [ -n, -nvbit ] - Trace file provided is NVBit trace\n" + << " [ -w, -num_strides ] - Use memory acceses from one warp only (warp 0)\n" + << " [ -v, -verbose ] - Verbose output\n" + << "Additional options: \n" + << " [ -a, -num_accesses ] - Threshold for number of accesses\n" + << " [ -s, -num_strides ] - Threshold for number of unique distances\n" + << " [ -o, -out_dist_percent ] - Threshold for percentage of distances at boundaries of histogram\n" + << " [ -help, -h ] - Dispaly program options\n" + << std::endl; } -int main(int argc, char ** argv) +bool get_arg_int(const char * arg, int & value) { - try + try { + value = stoi(std::string(arg)); + return true; + } + catch (...) { + std::cerr << "ERROR: Unable to convert [" + std::string(arg) + "] to integer" << std::endl; + } + return false; +} + +bool get_arg_float(const char * arg, float & value) +{ + try { + value = stof(std::string(arg)); + return true; + } + catch (...) { + std::cerr << "ERROR: Unable to convert [" + std::string(arg) + "] to float" << std::endl; + } + return false; +} + +struct ProgramArgs +{ + std::string prog_name; + bool use_gs_nv = false; + bool verbose = false; + bool one_warp = false; + int num_accesses = -1; + int num_strides = -1; + float out_dist_percent = -1.0; + std::string trace_file_name; + std::string binary_file_name; + + void get_args(char** argv, int &argc) { - bool use_gs_nv = false; - bool verbose = false; - bool one_warp = false; - for (int i = 0; i < argc; i++) { - if (std::string(argv[i]) == "-nv") { - use_gs_nv = true; - } - else if (std::string(argv[i]) == "-v") { - verbose = true; - } - else if (std::string(argv[i]) == "-ow") { - one_warp = true; + static struct option options[] = { + {"nvbit", no_argument, 0, 'n'}, + {"one_warp", no_argument, 0, 'w'}, + {"verbose", no_argument, 0, 'v'}, + {"help", no_argument, 0, 'h'}, + {"num_accesses" , required_argument, 0, 'a'}, + {"num_strides", required_argument, 0, 's'}, + {"out_dist_percent", required_argument, 0, 'o'}, + {0, 0, 0, 0 } + }; + int option_index = 0; + + while (true) + { + int c = getopt_long(argc, argv, "nwvha:s:o:", options, &option_index); + + if (c == -1) break; + + int n = 0; + switch (c) { + case 'n': + use_gs_nv = true; + break; + + case 'w': + one_warp = true; + break; + + case 'v': + verbose = true; + break; + + case 'a': + if (!get_arg_int(optarg, num_accesses)) { + usage(prog_name); + exit(-1); + } + break; + + case 's': + if (!get_arg_int(optarg, num_strides)) { + usage(prog_name); + exit(-1); + } + break; + + case 'o': + if (!get_arg_float(optarg, out_dist_percent)) { + usage(prog_name); + exit(-1); + } + break; + + case 'h': + usage(prog_name); + exit(0); + + default: + usage(prog_name); + exit(-1); } } + // Handle Positional args + if (optind < argc) { + trace_file_name = argv[optind++]; // was 1 + } + if (optind < argc) { + binary_file_name = argv[optind++]; // was 2 + } + } +}; + +void updateThresholds(Thresholds & thresholds, ProgramArgs & pArgs) +{ + if (pArgs.num_strides >= 0) + thresholds.num_strides = pArgs.num_strides; + if (pArgs.num_accesses >= 0) + thresholds.num_accesses = pArgs.num_accesses; + if (pArgs.out_dist_percent >= 0.0) + thresholds.out_dist_percent = pArgs.out_dist_percent; +} + +int main(int argc, char ** argv) +{ + try + { size_t pos = std::string(argv[0]).find_last_of("/"); std::string prog_name = std::string(argv[0]).substr(pos+1); + ProgramArgs pArgs; + pArgs.get_args(argv, argc); + if (argc < 3) { usage(prog_name); throw GSError("Invalid program arguments"); } - if (use_gs_nv) + if (pArgs.use_gs_nv) { MemPatternsForNV mp; - mp.set_trace_file(argv[1]); + mp.set_trace_file(pArgs.trace_file_name); const char * config_file = std::getenv(GSNV_CONFIG_FILE); if (config_file) { mp.set_config_file(config_file); } - if (verbose) mp.set_log_level(1); - if (one_warp) mp.set_one_warp_mode(one_warp); + if (pArgs.verbose) mp.set_log_level(1); + if (pArgs.one_warp) mp.set_one_warp_mode(pArgs.one_warp); + + updateThresholds(mp.get_thresholds(), pArgs); // ----------------- Process Traces ----------------- @@ -75,9 +196,11 @@ int main(int argc, char ** argv) { MemPatternsForPin mp; - mp.set_trace_file(argv[1]); - mp.set_binary_file(argv[2]); - if (verbose) mp.set_log_level(1); + mp.set_trace_file(pArgs.trace_file_name); + mp.set_binary_file(pArgs.binary_file_name); + if (pArgs.verbose) mp.set_log_level(1); + + updateThresholds(mp.get_thresholds(), pArgs); // ----------------- Process Traces ----------------- diff --git a/gsnv_patterns.cpp b/gsnv_patterns.cpp index 11e84e5..c1d3a2a 100644 --- a/gsnv_patterns.cpp +++ b/gsnv_patterns.cpp @@ -423,8 +423,8 @@ void MemPatternsForNV::process_second_pass() uint64_t mcnt = 0; // used our own local mcnt while iterating over file in this method. // State carried thru - addr_t iaddr; - int64_t maddr; + addr_t iaddr = 0; + int64_t maddr = 0; addr_t gather_base[NTOP] = {0}; addr_t scatter_base[NTOP] = {0}; @@ -805,6 +805,18 @@ void MemPatternsForNV::set_config_file(const std::string & config_file) bool mode = val ? true : false; set_one_warp_mode(mode); } + else if (GSNV_THRESHOLD_NUM_ACCESSES == name) { + int val = atoi(value.c_str()); + _thresholds.num_accesses = val; + } + else if (GSNV_THRESHOLD_NUM_STRIDES == name) { + int val = atoi(value.c_str()); + _thresholds.num_strides = val; + } + else if (GSNV_THRESHOLD_OUT_DIST_PERCENT == name) { + float val = atof(value.c_str()); + _thresholds.out_dist_percent = val; + } else { std::cerr << "Unknown setting <" << name << "> with value <" << value << "> " << "specified in config file: " << _config_file_name << " ignoring ..." << std::endl; diff --git a/gsnv_patterns.h b/gsnv_patterns.h index 133beb9..c94df1f 100644 --- a/gsnv_patterns.h +++ b/gsnv_patterns.h @@ -116,6 +116,10 @@ namespace gsnv_patterns static constexpr const char * GSNV_LOG_LEVEL = "GSNV_LOG_LEVEL"; static constexpr const char * GSNV_ONE_WARP_MODE = "GSNV_ONE_WARP_MODE"; + static constexpr const char * GSNV_THRESHOLD_NUM_ACCESSES = "GSNV_THRESHOLD_NUM_ACCESSES"; + static constexpr const char * GSNV_THRESHOLD_NUM_STRIDES = "GSNV_THRESHOLD_NUM_STRIDES"; + static constexpr const char * GSNV_THRESHOLD_OUT_DIST_PERCENT = "GSNV_THRESHOLD_OUT_DIST_PERCENT"; + MemPatternsForNV(): _metrics(GATHER, SCATTER), _iinfo(GATHER, SCATTER), @@ -142,6 +146,8 @@ namespace gsnv_patterns void set_log_level(int8_t level) override { _log_level = level; } int8_t get_log_level() override { return _log_level; } + Thresholds & get_thresholds() override { return _thresholds; } + void set_trace_file(const std::string & trace_file_name); inline const std::string & get_trace_file_name() { return _trace_file_name; } @@ -224,6 +230,8 @@ namespace gsnv_patterns std::string _trace_out_file_name; // Ouput file containing nvbit traces encounterd if requested std::string _tmp_trace_out_file_name; // Temp file used to store traces before re-writing to _trace_out_filename + Thresholds _thresholds; + std::string _config_file_name; std::set _target_kernels; bool _limit_trace_count = false; diff --git a/gspin_patterns.cpp b/gspin_patterns.cpp index 7aae7b9..522c8b5 100644 --- a/gspin_patterns.cpp +++ b/gspin_patterns.cpp @@ -145,13 +145,13 @@ double MemPatternsForPin::update_source_lines_from_binary(mem_access_type mType) translate_iaddr(get_binary_file_name(), target_metrics.get_srcline()[k], target_iinfo.get_iaddrs()[k]); if (startswith(target_metrics.get_srcline()[k], "?")) { target_iinfo.get_icnt()[k] = 0; - target_metrics.iaddrs_nosym++; - target_metrics.indices_nosym += target_iinfo.get_occ()[k]; - - } else { - target_metrics.iaddrs_sym++; - target_metrics.indices_sym += target_iinfo.get_occ()[k]; - } + target_metrics.iaddrs_nosym++; + target_metrics.indices_nosym += target_iinfo.get_occ()[k]; + + } else { + target_metrics.iaddrs_sym++; + target_metrics.indices_sym += target_iinfo.get_occ()[k]; + } #endif target_cnt += target_iinfo.get_icnt()[k]; @@ -184,7 +184,6 @@ void MemPatternsForPin::process_traces() trace_entry_t *p_drtrace = NULL; trace_entry_t drtrace[NBUFS]; // was static (1024 bytes) - while (drline_read(fp_drtrace, drtrace, &p_drtrace, &iret)) { //decode drtrace drline = p_drtrace; diff --git a/gspin_patterns.h b/gspin_patterns.h index 78d64de..23925a2 100644 --- a/gspin_patterns.h +++ b/gspin_patterns.h @@ -114,6 +114,8 @@ namespace gspin_patterns void set_binary_file(const std::string & binary_file_name) { _binary_file_name = binary_file_name; } const std::string & get_binary_file_name() { return _binary_file_name; } + Thresholds & get_thresholds() override { return _thresholds; } + void update_metrics(); std::string get_file_prefix (); @@ -129,6 +131,8 @@ namespace gspin_patterns TraceInfo _trace_info; InstrWindow _iw; + Thresholds _thresholds; + int8_t _log_level = 0; std::string _trace_file_name; diff --git a/nvbit_tracing/README.md b/nvbit_tracing/README.md index b12303c..587edcb 100644 --- a/nvbit_tracing/README.md +++ b/nvbit_tracing/README.md @@ -27,7 +27,7 @@ cp -rv gs_patterns/nvbit_tracing/gsnv_trace $NVBIT_DIR/tools/ cd $NVBIT_DIR -#Compile tools and test apps. Make sure the gsnv_trace tool compiled. If successful will produced $NVBIT_DIR/tools/gsnv_trace/gsnv_trace.so +#Compile tools and test apps. Make sure the gsnv_trace tool compiles. If successful will produced $NVBIT_DIR/tools/gsnv_trace/gsnv_trace.so make -j ``` @@ -46,14 +46,17 @@ The config file should have 1 configuration setting per line. Configuration set The following are a list of configuration items currently supported: -| Config | Description | possible values | -|----------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------| -| GSNV_LOG_LEVEL | Sets the log level (only 0-2 are currently supported) | 0 to 255 | -| GSNV_TARGET_KERNEL | Specifies the names of Kernels which will be instrumented seperated by space, it none is provided all Kernels will be instrumented. If no exact match found, Will match all kernels which starts with the string provided. | A String | -| GSNV_FILE_PREFIX | Can be used if specify the prefix of output files e.g if prefix is "trace_file" then output files will be names trace_file.json, etc. If none is provided one will be inferred from the input trace file if provided. | A String | -| GSNV_TRACE_OUT_FILE | Specifies the name of the output file which will be written with trace data. Trace file will not be written if this is not provided. | A String | -| GSNV_MAX_TRACE_COUNT | Specifies the maximum number of memory traces which are processed, once this number of traces are seen instrumentation is disabled (Can be useful to produce a small trace file for testing) | An Integer e.g 1000000 | -| GSNV_ONE_WARP_MODE | Enable handling traces for a single warp (defaults to warp 0 if enabled). Analogous to trace of first thread in CPU mode. | 1 (on) or 0 (off) the default) | +| Configs | Description | possible values | +|---------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------| +| GSNV_LOG_LEVEL | Sets the log level (only 0-2 are currently supported) | 0 to 255 | +| GSNV_TARGET_KERNEL | Specifies the names of Kernels which will be instrumented seperated by space, it none is provided all Kernels will be instrumented. If no exact match found, Will match all kernels which starts with the string provided. | A String | +| GSNV_FILE_PREFIX | Can be used if specify the prefix of output files e.g if prefix is "trace_file" then output files will be names trace_file.json, etc. If none is provided one will be inferred from the input trace file if provided. | A String | +| GSNV_TRACE_OUT_FILE | Specifies the name of the output file which will be written with trace data. Trace file will not be written if this is not provided. | A String | +| GSNV_MAX_TRACE_COUNT | Specifies the maximum number of memory traces which are processed, once this number of traces are seen instrumentation is disabled (Can be useful to produce a small trace file for testing) | An Integer e.g 1000000 | +| GSNV_ONE_WARP_MODE | Enable handling traces for a single warp (defaults to warp 0 if enabled). Analogous to trace of first thread in CPU mode. | 1 (on) or 0 (off) the default) | +| GSNV_THRESHOLD_NUM_ACCESSES | Sets the threshold for number of accesses | An Integer | +| GSNV_THRESHOLD_NUM_STRIDES | Sets the threshold for number of unique distances | An Integer | +| GSNV_THRESHOLD_OUT_DIST_PERCENT | Sets the threshold for percentage of distances at boundaries of histogram | A percentage as a Decimal e.g 0.5 | From 21f28a5d0cf77e2e251ac4d41251995730375a38 Mon Sep 17 00:00:00 2001 From: christopher Date: Wed, 2 Oct 2024 22:39:38 -0400 Subject: [PATCH 2/6] Update readme and usage info. --- gs_patterns_main.cpp | 17 +++++++++-------- nvbit_tracing/README.md | 4 ++-- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/gs_patterns_main.cpp b/gs_patterns_main.cpp index f356b9d..8b90cec 100644 --- a/gs_patterns_main.cpp +++ b/gs_patterns_main.cpp @@ -22,14 +22,15 @@ void usage (const std::string & prog_name) { std::cerr << "Usage: " << prog_name << " \n" << " " << prog_name << " \n" - << " [ -n, -nvbit ] - Trace file provided is NVBit trace\n" - << " [ -w, -num_strides ] - Use memory acceses from one warp only (warp 0)\n" - << " [ -v, -verbose ] - Verbose output\n" - << "Additional options: \n" - << " [ -a, -num_accesses ] - Threshold for number of accesses\n" - << " [ -s, -num_strides ] - Threshold for number of unique distances\n" - << " [ -o, -out_dist_percent ] - Threshold for percentage of distances at boundaries of histogram\n" - << " [ -help, -h ] - Dispaly program options\n" + << "[Options for NVBit traces]:\n" + << " [ -n, --nvbit ] - Trace file provided is NVBit trace\n" + << " [ -w, --one_wwarp ] - Use memory accesses from one warp only (warp 0)\n" + << " [ -v, --verbose ] - Enable verbose logging\n" + << "[General options avaialable for all traces]:\n" + << " [ -a, --num_accesses ] - Threshold for number of accesses\n" + << " [ -s, --num_strides ] - Threshold for number of unique distances\n" + << " [ -o, --out_dist_perc ] - Threshold for percentage of distances at boundaries of histogram\n" + << " [ -h, --help ] - Display program options\n" << std::endl; } diff --git a/nvbit_tracing/README.md b/nvbit_tracing/README.md index 587edcb..ba3999a 100644 --- a/nvbit_tracing/README.md +++ b/nvbit_tracing/README.md @@ -95,13 +95,13 @@ The gzip command will compress the resulting trace file for use by gs_patterns i In the previous section on Instrumenting an application, we used gsnv_trace.so to instrument an application, the resulting trace file was then compressed. The instrumentation run also generated pattern files. If we want to rerun the pattern generation we can do so using the generated (and compressed) trace file without re-instrumenting the application as this is much faster. -To do this we just need to run the gs_pattern binary with the trace file and the "-nv " option. The "-nv" option indicates that the trace file is a NVBit trace. +To do this we just need to run the gs_pattern binary with the trace file and the "-n " option. The "-n" option indicates that the trace file is an NVBit trace. Example: ``` export GS_PATTERNS_DIR=/path/to/gs_patterns/binary/ -$GS_PATTERNS_DIR/gs_patterns -nv +$GS_PATTERNS_DIR/gs_patterns -n ``` ### Important Notes From 74b59d94cf171fc5fa248fbcbf2cb9d981188696 Mon Sep 17 00:00:00 2001 From: christopher Date: Tue, 8 Oct 2024 21:17:55 -0400 Subject: [PATCH 3/6] Update readme & the Makefile for gsnv_trace. --- gs_patterns_core.h | 8 +++++--- nvbit_tracing/README.md | 24 ++++++++++++++++-------- nvbit_tracing/gsnv_trace/Makefile | 8 +++++--- 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/gs_patterns_core.h b/gs_patterns_core.h index bc154c9..58593ef 100644 --- a/gs_patterns_core.h +++ b/gs_patterns_core.h @@ -331,9 +331,11 @@ namespace gs_patterns_core i, target_metrics.size[i]); printf("%s\n", bin_name); - //std::string bin_name = \ - //file_prefix + "." + target_metrics.getShortNameLower().c_str() + "." + std::to_string(i) + "." + \ - //std::to_string(target_metrics.size[i]) + "B.sbin"; + /* + std::string bin_name = + file_prefix + "." + target_metrics.getShortNameLower().c_str() + "." + std::to_string(i) + "." + + std::to_string(target_metrics.size[i]) + "B.sbin"; + */ fp_bin = fopen(bin_name, "w"); if (NULL == fp_bin) diff --git a/nvbit_tracing/README.md b/nvbit_tracing/README.md index ba3999a..6aea724 100644 --- a/nvbit_tracing/README.md +++ b/nvbit_tracing/README.md @@ -1,31 +1,39 @@ # Setup -Download NVBit from the following locations: -https://github.com/NVlabs/NVBit +Building the NVBit client requires first building gs_patterns. + +## Step 1 - Download NVBit + +NVBit can be downloaded from the following location: https://github.com/NVlabs/NVBit #### Tested with version 1.7 https://github.com/NVlabs/NVBit/releases/tag/1.7 -#### From the parent directory of the gs_patterns distribution +#### From the parent directory of the gs_patterns distribution umpack and build the gsnv_trace shared library. ``` # For example for Linux x86_64) -wget https://github.com/NVlabs/NVBit/releases/download/1.7/nvbit-Linux-aarch64-1.7.tar.bz2 +wget https://github.com/NVlabs/NVBit/releases/download/1.7/nvbit-Linux-x86_64-1.7-1.tar.bz2 ``` +## Step 2 - Build gsnv_trace.so ``` -module load gcc #or make sure you have gcc. Tested with 8.5.0 and 11.4.0 +# Make sure you have gcc installed. Tested with gcc 8.5.0 and 11.4.0 -tar xvf +tar xvf nvbit-Linux-x86_64-1.7-1.tar.bz2 export NVBIT_DIR= # full path cp -rv gs_patterns/nvbit_tracing/gsnv_trace $NVBIT_DIR/tools/ -cd $NVBIT_DIR +cd $NVBIT_DIR/tools + +# Edit gs_patterns/nvbit_tracing/gsnv_trace/Makefile and ensure: +a) GSPATTERNS_CORE_INC_PATH is set to the location of the gs_patterns directory, and +b) GSPATTERNS_CORE_LIB_PATH is set to the location where the gs_patterns_core.so shared library is located. #Compile tools and test apps. Make sure the gsnv_trace tool compiles. If successful will produced $NVBIT_DIR/tools/gsnv_trace/gsnv_trace.so make -j @@ -34,7 +42,7 @@ make -j *** NOTE *** make sure you gzip the nvbit trace output file before attempting to use with gs_patterns. -# gsnv_trace +## Running gsnv_trace The gsnv_trace tool will instrument one or more CUDA kernels within a CUDA application and pass the resulting memory traces to the gs_patterns gs_patterns_core library. Once the application has completed and all kernels are retired the gs_patterns_core library will begin processing the trace data and automatically generate the pattern outputs and pattern output files. diff --git a/nvbit_tracing/gsnv_trace/Makefile b/nvbit_tracing/gsnv_trace/Makefile index 1a6a9ec..7c6e611 100644 --- a/nvbit_tracing/gsnv_trace/Makefile +++ b/nvbit_tracing/gsnv_trace/Makefile @@ -20,10 +20,12 @@ MAXRREGCOUNT_FLAG= endif NVBIT_PATH=../../core -GSPATTERNS_CORE_PATH=../../../gs_patterns -INCLUDES=-I$(NVBIT_PATH) -I$(GSPATTERNS_CORE_PATH) +GSPATTERNS_CORE_INC_PATH=../../../gs_patterns/ +GSPATTERNS_CORE_LIB_PATH=../../../gs_patterns/build -LIBS=-L$(NVBIT_PATH) -lnvbit -L$(GSPATTERNS_CORE_PATH)/build -lgs_patterns_core +INCLUDES=-I$(NVBIT_PATH) -I$(GSPATTERNS_CORE_INC_PATH) + +LIBS=-L$(NVBIT_PATH) -lnvbit -L$(GSPATTERNS_CORE_LIB_PATH) -lgs_patterns_core NVCC_PATH=-L $(subst bin/nvcc,lib64,$(shell which nvcc | tr -s /)) SOURCES=$(wildcard *.cu) From 8f9673c672a97e3153005af660387f66b7bae424 Mon Sep 17 00:00:00 2001 From: christopher Date: Tue, 8 Oct 2024 21:54:26 -0400 Subject: [PATCH 4/6] Update readme & the Makefile for gsnv_trace. --- nvbit_tracing/README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/nvbit_tracing/README.md b/nvbit_tracing/README.md index 6aea724..c98133e 100644 --- a/nvbit_tracing/README.md +++ b/nvbit_tracing/README.md @@ -1,6 +1,10 @@ # Setup -Building the NVBit client requires first building gs_patterns. +Building the NVBit client requires first building gs_patterns it also requires a recent version of the NVIDIA CUDA libraries. +gsnv_trace has been built and tested with CUDA 12.3. Please ensure your PATH and LD_LIBRARY_PATH are set accordingly so that nvcc is found. + +export PATH=/path/to/cuda-12.3/bin${PATH:+:${PATH}} +export LD_LIBRARY_PATH=/path/to/cuda-12.3/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} ## Step 1 - Download NVBit From f71eadd1371752e4a31b5486687c6dd7f4fbeeb2 Mon Sep 17 00:00:00 2001 From: christopher Date: Tue, 8 Oct 2024 21:59:56 -0400 Subject: [PATCH 5/6] Update readme & the Makefile for gsnv_trace. --- nvbit_tracing/gsnv_trace/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nvbit_tracing/gsnv_trace/Makefile b/nvbit_tracing/gsnv_trace/Makefile index 7c6e611..81ca765 100644 --- a/nvbit_tracing/gsnv_trace/Makefile +++ b/nvbit_tracing/gsnv_trace/Makefile @@ -21,7 +21,7 @@ endif NVBIT_PATH=../../core GSPATTERNS_CORE_INC_PATH=../../../gs_patterns/ -GSPATTERNS_CORE_LIB_PATH=../../../gs_patterns/build +GSPATTERNS_CORE_LIB_PATH=../../../gs_patterns/build/ INCLUDES=-I$(NVBIT_PATH) -I$(GSPATTERNS_CORE_INC_PATH) From 464a4494dc9c2754df25c6c2722f937641d34b27 Mon Sep 17 00:00:00 2001 From: Eric Liddy <109451980+ericliddy@users.noreply.github.com> Date: Fri, 13 Dec 2024 21:55:55 -0800 Subject: [PATCH 6/6] Update gs_patterns.h Use memset instead of nested for loop for performance improvement. Seems to increase performance by 2-10x, depending on application being instrumented and hardware. --- gs_patterns.h | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/gs_patterns.h b/gs_patterns.h index f74b67c..8d9b979 100644 --- a/gs_patterns.h +++ b/gs_patterns.h @@ -263,24 +263,18 @@ namespace gs_patterns void init() { for (int w = 0; w < 2; w++) { - for (int i = 0; i < IWINDOW; i++) { - _w_iaddrs[w][i] = -1; - _w_bytes[w][i] = 0; - _w_cnt[w][i] = 0; - for (uint64_t j = 0; j < MAX_ACCESS_SIZE; j++) - _w_maddr[w][i][j] = -1; - } + memset(_w_iaddrs[w], 0xFF, IWINDOW * sizeof(int64_t)); + memset(_w_bytes[w], 0x00, IWINDOW * sizeof(int64_t)); + memset(_w_cnt[w], 0x00, IWINDOW * sizeof(int64_t)); + memset(&_w_maddr[w][0][0], 0xFF, IWINDOW * MAX_ACCESS_SIZE * sizeof(int64_t)); } } void reset(int w) { - for (int i = 0; i < IWINDOW; i++) { - _w_iaddrs[w][i] = -1; - _w_bytes[w][i] = 0; - _w_cnt[w][i] = 0; - for (uint64_t j = 0; j < MAX_ACCESS_SIZE; j++) - _w_maddr[w][i][j] = -1; - } + memset(_w_iaddrs[w], 0xFF, IWINDOW * sizeof(int64_t)); + memset(_w_bytes[w], 0x00, IWINDOW * sizeof(int64_t)); + memset(_w_cnt[w], 0x00, IWINDOW * sizeof(int64_t)); + memset(&_w_maddr[w][0][0], 0xFF, IWINDOW * MAX_ACCESS_SIZE * sizeof(int64_t)); } void reset() {