From 2c070c3060f90d7e4cc6bade1c4f1fcf23f11834 Mon Sep 17 00:00:00 2001 From: Derrick Greenspan Date: Wed, 18 Feb 2026 02:14:56 -0500 Subject: [PATCH] Fixed basic_1node_1smp.py --- .../elements/opal/tests/basic_1node_1smp.py | 268 +++++++++--------- 1 file changed, 135 insertions(+), 133 deletions(-) diff --git a/src/sst/elements/opal/tests/basic_1node_1smp.py b/src/sst/elements/opal/tests/basic_1node_1smp.py index aed3091835..04f23ef1fe 100644 --- a/src/sst/elements/opal/tests/basic_1node_1smp.py +++ b/src/sst/elements/opal/tests/basic_1node_1smp.py @@ -5,7 +5,7 @@ sst.setProgramOption("timebase", "1ps") # Tell SST what statistics handling we want -sst.setStatisticLoadLevel(4) +sst.setStatisticLoadLevel(0) clock = "2GHz" @@ -14,8 +14,8 @@ #os.environ['OMP_NUM_THREADS'] = str(cores/2) -local_memory_capacity = 128 # Size of memory in MBs -shared_memory_capacity = 2048 # 2GB +local_memory_capacity = 128 # Size of memory in MBs +shared_memory_capacity = 2048 # 2GB shared_memory = 1 page_size = 4 # In KB num_pages = local_memory_capacity * 1024 // page_size + 8*1024*1024//page_size @@ -33,10 +33,9 @@ "arielmode" : 0, "appargcount" : 0, "max_insts" : 10000, - "executable" : "./app/opal_test", + "executable" : "app/opal_test", "node" : 0, - "launchparamcount" : 1, - "launchparam0" : "-ifeellucky", + "launchparamcount" : 0, }) # Opal uses this memory manager to intercept memory translation requests, mallocs, mmaps, etc. @@ -51,15 +50,15 @@ "pagesize0" : page_size * 1024, }) -ariel.enableAllStatistics({"type":"sst.AccumulatorStatistic"}) +#ariel.enableAllStatistics({"type":"sst.AccumulatorStatistic"}) mmu = sst.Component("mmu", "Samba") mmu.addParams({ "os_page_size": 4, - "perfect": 0, + "perfect": 0, "corecount": cores//2, - "sizes_L1": 3, - "page_size1_L1": 4, + "sizes_L1": 3, + "page_size1_L1": 4, "page_size2_L1": 2048, "page_size3_L1": 1024*1024, "assoc1_L1": 4, @@ -82,28 +81,28 @@ "levels": 2, "max_width_L1": 3, "max_outstanding_L1": 2, - "max_outstanding_PTWC": 2, + "max_outstanding_PTWC": 2, "latency_L1": 4, "parallel_mode_L1": 1, "max_outstanding_L2": 2, "max_width_L2": 4, "latency_L2": 10, "parallel_mode_L2": 0, - "self_connected" : 0, - "page_walk_latency": 200, - "size1_PTWC": 32, # this just indicates the number entries of the page table walk cache level 1 (PTEs) - "assoc1_PTWC": 4, # this just indicates the associtativit the page table walk cache level 1 (PTEs) - "size2_PTWC": 32, # this just indicates the number entries of the page table walk cache level 2 (PMDs) - "assoc2_PTWC": 4, # this just indicates the associtativit the page table walk cache level 2 (PMDs) - "size3_PTWC": 32, # this just indicates the number entries of the page table walk cache level 3 (PUDs) - "assoc3_PTWC": 4, # this just indicates the associtativit the page table walk cache level 3 (PUDs) - "size4_PTWC": 32, # this just indicates the number entries of the page table walk cache level 4 (PGD) - "assoc4_PTWC": 4, # this just indicates the associtativit the page table walk cache level 4 (PGD) - "latency_PTWC": 10, # This is the latency of checking the page table walk cache - "opal_latency": "30ps", - "emulate_faults": 1, + "self_connected" : 0, + "page_walk_latency": 200, + "size1_PTWC": 32, # this just indicates the number entries of the page table walk cache level 1 (PTEs) + "assoc1_PTWC": 4, # this just indicates the associtativit the page table walk cache level 1 (PTEs) + "size2_PTWC": 32, # this just indicates the number entries of the page table walk cache level 2 (PMDs) + "assoc2_PTWC": 4, # this just indicates the associtativit the page table walk cache level 2 (PMDs) + "size3_PTWC": 32, # this just indicates the number entries of the page table walk cache level 3 (PUDs) + "assoc3_PTWC": 4, # this just indicates the associtativit the page table walk cache level 3 (PUDs) + "size4_PTWC": 32, # this just indicates the number entries of the page table walk cache level 4 (PGD) + "assoc4_PTWC": 4, # this just indicates the associtativit the page table walk cache level 4 (PGD) + "latency_PTWC": 10, # This is the latency of checking the page table walk cache + "opal_latency": "30ps", + "emulate_faults": 1, }) -mmu.enableAllStatistics({"type":"sst.AccumulatorStatistic"}) +#mmu.enableAllStatistics({"type":"sst.AccumulatorStatistic"}) # MMU uses this page fault handler. pagefaulthandler = mmu.setSubComponent("pagefaulthandler", "Opal.PageFaultHandler") @@ -113,28 +112,28 @@ opal= sst.Component("opal","Opal") opal.addParams({ - "clock" : clock, - "num_nodes" : 1, - "verbose" : 1, - "max_inst" : 32, - "shared_mempools" : 1, - "shared_mem.mempool0.start" : local_memory_capacity*1024*1024, - "shared_mem.mempool0.size" : shared_memory_capacity*1024, - "shared_mem.mempool0.frame_size": page_size, - "shared_mem.mempool0.mem_type" : 0, - "node0.cores" : cores//2, - "node0.allocation_policy" : 1, - "node0.page_migration" : 0, - "node0.page_migration_policy" : 0, - "node0.num_pages_to_migrate" : 0, - "node0.latency" : 2000, - "node0.memory.start" : 0, - "node0.memory.size" : local_memory_capacity*1024, - "node0.memory.frame_size" : page_size, - "node0.memory.mem_type" : 0, - "num_ports" : cores, + "clock" : clock, + "num_nodes" : 1, + "verbose" : 1, + "max_inst" : 32, + "shared_mempools" : 1, + "shared_mem.mempool0.start" : local_memory_capacity*1024*1024, + "shared_mem.mempool0.size" : shared_memory_capacity*1024, + "shared_mem.mempool0.frame_size": page_size, + "shared_mem.mempool0.mem_type" : 0, + "node0.cores" : cores//2, + "node0.allocation_policy" : 1, + "node0.page_migration" : 0, + "node0.page_migration_policy" : 0, + "node0.num_pages_to_migrate" : 0, + "node0.latency" : 2000, + "node0.memory.start" : 0, + "node0.memory.size" : local_memory_capacity*1024, + "node0.memory.frame_size" : page_size, + "node0.memory.mem_type" : 0, + "num_ports" : cores, }) -opal.enableAllStatistics({"type":"sst.AccumulatorStatistic"}) +#opal.enableAllStatistics({"type":"sst.AccumulatorStatistic"}) l1_params = { @@ -142,7 +141,7 @@ "cache_size": "32KiB", "associativity": 8, "access_latency_cycles": 4, - "L1": 1, + "L1": 1, "verbose": 30, "maxRequestDelay" : "1000000", } @@ -156,25 +155,25 @@ } l3_params = { - "access_latency_cycles" : "12", - "cache_frequency" : clock, - "associativity" : "16", - "cache_size" : "2MB", - "mshr_num_entries" : "4096", + "access_latency_cycles" : "12", + "cache_frequency" : clock, + "associativity" : "16", + "cache_size" : "2MB", + "mshr_num_entries" : "4096", "num_cache_slices" : 1, - "slice_allocation_policy" : "rr", + "slice_allocation_policy" : "rr", } link_params = { - "shared_memory": shared_memory, - "node": 0, + "shared_memory": shared_memory, + "node": 0, } nic_params = { - "shared_memory": shared_memory, - "node": 0, - "network_bw": "96GiB/s", - "local_memory_size" : local_memory_capacity*1024*1024, + "shared_memory": shared_memory, + "node": 0, + "network_bw": "96GiB/s", + "local_memory_size" : local_memory_capacity*1024*1024, } @@ -195,8 +194,7 @@ def __init__(self, name,networkId,input_latency,output_latency): "input_buf_size" : "1KB", "output_buf_size" : "1KB", }) - - topo = self.rtr.setSubComponent("topology", "merlin.singlerouter") + topo = self.rtr.setSubComponent("topology", "merlin.singlerouter") def getNextPort(self): self.ports += 1 @@ -209,40 +207,44 @@ def getNextPort(self): for next_core in range(cores): - l1 = sst.Component("l1cache_" + str(next_core), "memHierarchy.Cache") - l1.addParams(l1_params) - l1_highlink = l1.setSubComponent("highlink", "memHierarchy.MemLink") - l1_lowlink = l1.setSubComponent("lowlink", "memHierarchy.MemLink") - l1_highlink.addParams(link_params) - l1_lowlink.addParams(link_params) + l1 = sst.Component("l1cache_" + str(next_core), "memHierarchy.Cache") + l1.addParams(l1_params) + l1_highlink = l1.setSubComponent("highlink", "memHierarchy.MemLink") + l1_lowlink = l1.setSubComponent("lowlink", "memHierarchy.MemLink") + l1_highlink.addParams(link_params) + l1_lowlink.addParams(link_params) + + l2 = sst.Component("l2cache_" + str(next_core), "memHierarchy.Cache") + l2.addParams(l2_params) + l2_highlink = l2.setSubComponent("highlink", "memHierarchy.MemLink") + l2_lowlink = l2.setSubComponent("lowlink", "Opal.OpalMemNIC") + l2_highlink.addParams(link_params) + l2_lowlink.addParams(nic_params) + l2_lowlink.addParams({ "group" : 1}) + - l2 = sst.Component("l2cache_" + str(next_core), "memHierarchy.Cache") - l2.addParams(l2_params) - l2_highlink = l2.setSubComponent("highlink", "memHierarchy.MemLink") - l2_lowlink = l2.setSubComponent("lowlink", "Opal.OpalMemNIC") - l2_highlink.addParams(link_params) - l2_lowlink.addParams(nic_params) - l2_lowlink.addParams({ "group" : 1}) + if next_core < cores//2: + ArielMMULink = sst.Link("cpu_mmu_link_" + str(next_core)) + ArielMMULink.connect((ariel, "cache_link_%d"%next_core, "300ps"), (mmu, "cpu_to_mmu%d"%next_core, "300ps")) - arielMMULink = sst.Link("cpu_mmu_link_" + str(next_core)) - MMUCacheLink = sst.Link("mmu_cache_link_" + str(next_core)) - PTWMemLink = sst.Link("ptw_mem_link_" + str(next_core)) - PTWOpalLink = sst.Link("ptw_opal_" + str(next_core)) - ArielOpalLink = sst.Link("ariel_opal_" + str(next_core)) + ArielOpalLink = sst.Link("ariel_opal_" + str(next_core)) + ArielOpalLink.connect((memmgr, "opal_link_%d"%next_core, "300ps"), (opal, "coreLink%d"%(next_core), "300ps")) - if next_core < cores//2: - arielMMULink.connect((ariel, "cache_link_%d"%next_core, "300ps"), (mmu, "cpu_to_mmu%d"%next_core, "300ps")) - ArielOpalLink.connect((memmgr, "opal_link_%d"%next_core, "300ps"), (opal, "coreLink%d"%(next_core), "300ps")) - MMUCacheLink.connect((mmu, "mmu_to_cache%d"%next_core, "300ps"), (l1_highlink, "port", "300ps")) - PTWOpalLink.connect( (pagefaulthandler, "opal_link_%d"%next_core, "300ps"), (opal, "mmuLink%d"%(next_core), "300ps") ) - else: - PTWMemLink.connect((mmu, "ptw_to_mem%d"%(next_core-cores//2), "300ps"), (l1_highlink, "port", "300ps")) + MMUCacheLink = sst.Link("mmu_cache_link_" + str(next_core)) + MMUCacheLink.connect((mmu, "mmu_to_cache%d"%next_core, "300ps"), (l1_highlink, "port", "300ps")) - l2_core_link = sst.Link("l2cache_" + str(next_core) + "_link") - l2_core_link.connect((l1_lowlink, "port", "300ps"), (l2_highlink, "port", "300ps")) + PTWOpalLink = sst.Link("ptw_opal_" + str(next_core)) + PTWOpalLink.connect( (pagefaulthandler, "opal_link_%d"%next_core, "300ps"), (opal, "mmuLink%d"%(next_core), "300ps") ) - l2_ring_link = sst.Link("l2_ring_link_" + str(next_core)) - l2_ring_link.connect((l2_lowlink, "port", "300ps"), (internal_network.rtr, "port%d"%(internal_network.getNextPort()), "300ps")) + else: + PTWMemLink = sst.Link("ptw_mem_link_" + str(next_core)) + PTWMemLink.connect((mmu, "ptw_to_mem%d"%(next_core-cores//2), "300ps"), (l1_highlink, "port", "300ps")) + + l2_core_link = sst.Link("l2cache_" + str(next_core) + "_link") + l2_core_link.connect((l1_lowlink, "port", "300ps"), (l2_highlink, "port", "300ps")) + + l2_ring_link = sst.Link("l2_ring_link_" + str(next_core)) + l2_ring_link.connect((l2_lowlink, "port", "300ps"), (internal_network.rtr, "port%d"%(internal_network.getNextPort()), "300ps")) @@ -253,9 +255,9 @@ def getNextPort(self): l3_link.addParams(nic_params) l3_link.addParams({ "group" : 2, - "addr_range_start": 0, - "addr_range_end": (local_memory_capacity*1024*1024) - 1, - "interleave_size": "0B", + "addr_range_start": 0, + "addr_range_end": (local_memory_capacity*1024*1024) - 1, + "interleave_size": "0B", }) l3_ring_link = sst.Link("l3_link") @@ -264,27 +266,27 @@ def getNextPort(self): mem = sst.Component("local_memory", "memHierarchy.MemController") mem.addParams({ - "clock" : "1.2GHz", - "backing" : "none", - "backend" : "memHierarchy.timingDRAM", - "backend.id" : 0, - "backend.addrMapper" : "memHierarchy.roundRobinAddrMapper", - "backend.addrMapper.interleave_size" : "64B", - "backend.addrMapper.row_size" : "1KiB", - "backend.clock" : "1.2GHz", - "backend.mem_size" : str(local_memory_capacity) + "MiB", - "backend.channels" : 2, - "backend.channel.numRanks" : 2, - "backend.channel.rank.numBanks" : 16, - "backend.channel.transaction_Q_size" : 32, - "backend.channel.rank.bank.CL" : 14, - "backend.channel.rank.bank.CL_WR" : 12, - "backend.channel.rank.bank.RCD" : 14, - "backend.channel.rank.bank.TRP" : 14, - "backend.channel.rank.bank.dataCycles" : 2, - "backend.channel.rank.bank.pagePolicy" : "memHierarchy.simplePagePolicy", - "backend.channel.rank.bank.transactionQ" : "memHierarchy.fifoTransactionQ", - "backend.channel.rank.bank.pagePolicy.close" : 1, + "clock" : "1.2GHz", + "backing" : "none", + "backend" : "memHierarchy.timingDRAM", + "backend.id" : 0, + "backend.addrMapper" : "memHierarchy.roundRobinAddrMapper", + "backend.addrMapper.interleave_size" : "64B", + "backend.addrMapper.row_size" : "1KiB", + "backend.clock" : "1.2GHz", + "backend.mem_size" : str(local_memory_capacity) + "MiB", + "backend.channels" : 2, + "backend.channel.numRanks" : 2, + "backend.channel.rank.numBanks" : 16, + "backend.channel.transaction_Q_size" : 32, + "backend.channel.rank.bank.CL" : 14, + "backend.channel.rank.bank.CL_WR" : 12, + "backend.channel.rank.bank.RCD" : 14, + "backend.channel.rank.bank.TRP" : 14, + "backend.channel.rank.bank.dataCycles" : 2, + "backend.channel.rank.bank.pagePolicy" : "memHierarchy.simplePagePolicy", + "backend.channel.rank.bank.transactionQ" : "memHierarchy.fifoTransactionQ", + "backend.channel.rank.bank.pagePolicy.close" : 1, }) mem_link = mem.setSubComponent("highlink", "memHierarchy.MemLink") mem_link.addParams({ @@ -294,8 +296,8 @@ def getNextPort(self): dc = sst.Component("dc", "memHierarchy.DirectoryController") dc.addParams({ - "entry_cache_size": 256*1024*1024, #Entry cache size of mem/blocksize - "clock": "200MHz", + "entry_cache_size": 256*1024*1024, #Entry cache size of mem/blocksize + "clock": "200MHz", #"debug" : 1, #"debug_level" : 10, }) @@ -306,11 +308,11 @@ def getNextPort(self): dc_highlink.addParams(nic_params) dc_highlink.addParams({ "group" : 3, - "addr_range_start" : 0, - "addr_range_end" : (local_memory_capacity*1024*1024)-1, - "interleave_size": "0B", - "shared_memory": shared_memory, - "node": 0, + "addr_range_start" : 0, + "addr_range_end" : (local_memory_capacity*1024*1024)-1, + "interleave_size": "0B", + "shared_memory": shared_memory, + "node": 0, #"debug" : 1, #"debug_level" : 10, }) @@ -333,8 +335,8 @@ def getNextPort(self): ext_mem = sst.Component("ExternalNVMmemContr", "memHierarchy.MemController") ext_mem.addParams({ "memory_size" : str(shared_memory_capacity) + "MB", - "max_requests_per_cycle" : 4, - "backing" : "none", + "max_requests_per_cycle" : 4, + "backing" : "none", "clock" : clock, }) @@ -350,16 +352,16 @@ def getNextPort(self): ext_dc = sst.Component("ExtMemDc", "memHierarchy.DirectoryController") ext_dc.addParams({ - "entry_cache_size": 256*1024*1024, #Entry cache size of mem/blocksize - "clock": "1GHz", + "entry_cache_size": 256*1024*1024, #Entry cache size of mem/blocksize + "clock": "1GHz", }) ext_dc_highlink = ext_dc.setSubComponent("highlink", "Opal.OpalMemNIC") ext_dc_lowlink = ext_dc.setSubComponent("lowlink", "memHierarchy.MemLink") ext_dc_highlink.addParams({ - "network_bw": "80GiB/s", - "addr_range_start" : (local_memory_capacity*1024*1024), - "addr_range_end" : (local_memory_capacity*1024*1024) + (shared_memory_capacity*1024*1024) -1, - "node": 9999, + "network_bw": "80GiB/s", + "addr_range_start" : (local_memory_capacity*1024*1024), + "addr_range_end" : (local_memory_capacity*1024*1024) + (shared_memory_capacity*1024*1024) -1, + "node": 9999, "group" : 3, # TODO is this the right routing group? means sources are all components in group 2 and dests are all components in group 4 }) @@ -396,15 +398,15 @@ def getNextPort(self): def bridge(net0, net1): net0port = net0.getNextPort() net1port = net1.getNextPort() - name = "%s-%s"%(net0.name, net1.name) - bridge = sst.Component("Bridge:%s"%name, "merlin.Bridge") + name = "%s_%s"%(net0.name, net1.name) + bridge = sst.Component("Bridge_%s"%name, "merlin.Bridge") bridge.addParams({ "translator": "memHierarchy.MemNetBridge", "network_bw" : "80GiB/s", }) - link = sst.Link("B0-%s"%name) + link = sst.Link("B0_%s"%name) link.connect( (bridge, "network0", "500ps"), (net0.rtr, "port%d"%net0port, "500ps") ) - link = sst.Link("B1-%s"%name) + link = sst.Link("B1_%s"%name) link.connect( (bridge, "network1", "500ps"), (net1.rtr, "port%d"%net1port, "500ps") )