ISCACitations/citations_visualization.html at main · VerticalResearchGroup/ISCACitations · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142

<!DOCTYPE html>
<html>
<head>
    <meta charset="utf-8" />
    <title>Citation Visualization</title>
    <style>
        body { margin: 0; padding: 0; font-family: Arial, sans-serif; }
    </style>
</head>
<body>
    <div style="width: 95%; margin: auto; min-height: 700px;">
        <div>                        <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
        <script charset="utf-8" src="https://cdn.plot.ly/plotly-3.3.0.min.js" integrity="sha256-bO3dS6yCpk9aK4gUpNELtCiDeSYvGYnK7jFI58NQnHI=" crossorigin="anonymous"></script>                <div id="90441c37-9fb2-487c-b537-9f92cc8220c9" class="plotly-graph-div" style="height:700px; width:100%;"></div>            <script type="text/javascript">                window.PLOTLYENV=window.PLOTLYENV || {};                                if (document.getElementById("90441c37-9fb2-487c-b537-9f92cc8220c9")) {                    Plotly.newPlot(                        "90441c37-9fb2-487c-b537-9f92cc8220c9",                        [{"customdata":[["In-Datacenter Performance Analysis of a Tensor Processing Unit",0.004230877,0.004230877,"https:\u002f\u002fscholar.google.com\u002fscholar?q=In-Datacenter%20Performance%20Analysis%20of%20a%20Tensor%20Processing%20Unit"],["EIE: efficient inference engine on compressed deep neural network",0.003927248,0.008158125,"https:\u002f\u002fscholar.google.com\u002fscholar?q=EIE%3A%20efficient%20inference%20engine%20on%20compressed%20deep%20neural%20network"],["ISAAC: a convolutional neural network accelerator with in-situ analog arithmetic in crossbars",0.003760569,0.011918694,"https:\u002f\u002fscholar.google.com\u002fscholar?q=ISAAC%3A%20a%20convolutional%20neural%20network%20accelerator%20with%20in-situ%20analog%20arithmetic%20in%20crossbars"],["Eyeriss: a spatial architecture for energy-efficient dataflow for convolutional neural networks",0.00369545,0.015614145,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Eyeriss%3A%20a%20spatial%20architecture%20for%20energy-efficient%20dataflow%20for%20convolutional%20neural%20networks"],["PRIME: a novel processing-in-memory architecture for neural network computation in ReRAM-based main memory",0.003658894,0.019273039,"https:\u002f\u002fscholar.google.com\u002fscholar?q=PRIME%3A%20a%20novel%20processing-in-memory%20architecture%20for%20neural%20network%20computation%20in%20ReRAM-based%20main%20memory"],["SCNN: An Accelerator for Compressed-sparse Convolutional Neural Networks",0.003566161,0.0228392,"https:\u002f\u002fscholar.google.com\u002fscholar?q=SCNN%3A%20An%20Accelerator%20for%20Compressed-sparse%20Convolutional%20Neural%20Networks"],["ShiDianNao: shifting vision processing closer to the sensor",0.00349411,0.02633331,"https:\u002f\u002fscholar.google.com\u002fscholar?q=ShiDianNao%3A%20shifting%20vision%20processing%20closer%20to%20the%20sensor"],["A scalable processing-in-memory accelerator for parallel graph processing",0.00338815,0.02972146,"https:\u002f\u002fscholar.google.com\u002fscholar?q=A%20scalable%20processing-in-memory%20accelerator%20for%20parallel%20graph%20processing"],["Cnvlutin: ineffectual-neuron-free deep neural network computing",0.003310936,0.033032396,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Cnvlutin%3A%20ineffectual-neuron-free%20deep%20neural%20network%20computing"],["Minerva: enabling low-power, highly-accurate deep neural network accelerators",0.003218265,0.036250661,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Minerva%3A%20enabling%20low-power%2C%20highly-accurate%20deep%20neural%20network%20accelerators"],["Bit fusion: bit-level dynamically composable architecture for accelerating deep neural networks",0.003178936,0.039429597,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Bit%20fusion%3A%20bit-level%20dynamically%20composable%20architecture%20for%20accelerating%20deep%20neural%20networks"],["A configurable cloud-scale DNN processor for real-time AI",0.003177673,0.042607271,"https:\u002f\u002fscholar.google.com\u002fscholar?q=A%20configurable%20cloud-scale%20DNN%20processor%20for%20real-time%20AI"],["Heracles: improving resource efficiency at scale",0.003170668,0.045777938,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Heracles%3A%20improving%20resource%20efficiency%20at%20scale"],["MLPerf inference benchmark",0.003150355,0.048928294,"https:\u002f\u002fscholar.google.com\u002fscholar?q=MLPerf%20inference%20benchmark"],["Profiling a warehouse-scale computer",0.003145647,0.052073941,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Profiling%20a%20warehouse-scale%20computer"],["PIM-enabled instructions: a low-overhead, locality-aware processing-in-memory architecture",0.003143615,0.055217556,"https:\u002f\u002fscholar.google.com\u002fscholar?q=PIM-enabled%20instructions%3A%20a%20low-overhead%2C%20locality-aware%20processing-in-memory%20architecture"],["Neurocube: a programmable digital neuromorphic architecture with high-density 3D memory",0.003056782,0.058274338,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Neurocube%3A%20a%20programmable%20digital%20neuromorphic%20architecture%20with%20high-density%203D%20memory"],["Ten lessons from three generations shaped Google's TPUv4i",0.003023079,0.061297417,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Ten%20lessons%20from%20three%20generations%20shaped%20Google%27s%20TPUv4i"],["Neural cache: bit-serial in-cache acceleration of deep neural networks",0.003019574,0.064316991,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Neural%20cache%3A%20bit-serial%20in-cache%20acceleration%20of%20deep%20neural%20networks"],["Scalpel: Customizing DNN Pruning to the Underlying Hardware Parallelism",0.002999827,0.067316818,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Scalpel%3A%20Customizing%20DNN%20Pruning%20to%20the%20Underlying%20Hardware%20Parallelism"],["Cambricon: an instruction set architecture for neural networks",0.002966633,0.070283451,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Cambricon%3A%20an%20instruction%20set%20architecture%20for%20neural%20networks"],["Maximizing CNN Accelerator Efficiency Through Resource Partitioning",0.002920268,0.073203719,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Maximizing%20CNN%20Accelerator%20Efficiency%20Through%20Resource%20Partitioning"],["Accel-sim: an extensible simulation framework for validated GPU modeling",0.002915922,0.076119641,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Accel-sim%3A%20an%20extensible%20simulation%20framework%20for%20validated%20GPU%20modeling"],["Plasticine: A Reconfigurable Architecture For Parallel Paterns",0.002888978,0.079008619,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Plasticine%3A%20A%20Reconfigurable%20Architecture%20For%20Parallel%20Paterns"],["Firesim: FPGA-accelerated cycle-exact scale-out system simulation in the public cloud",0.002870136,0.081878755,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Firesim%3A%20FPGA-accelerated%20cycle-exact%20scale-out%20system%20simulation%20in%20the%20public%20cloud"],["Biscuit: a framework for near-data processing of big data workloads",0.002842956,0.084721711,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Biscuit%3A%20a%20framework%20for%20near-data%20processing%20of%20big%20data%20workloads"],["Transparent offloading and mapping (TOM): enabling programmer-transparent near-data processing in GPU systems",0.00281951,0.08754122,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Transparent%20offloading%20and%20mapping%20%28TOM%29%3A%20enabling%20programmer-transparent%20near-data%20processing%20in%20GPU%20systems"],["Hardware architecture and software stack for PIM based on commercial DRAM technology",0.002818173,0.090359394,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Hardware%20architecture%20and%20software%20stack%20for%20PIM%20based%20on%20commercial%20DRAM%20technology"],["RecNMP: accelerating personalized recommendation with near-memory processing",0.002774767,0.093134161,"https:\u002f\u002fscholar.google.com\u002fscholar?q=RecNMP%3A%20accelerating%20personalized%20recommendation%20with%20near-memory%20processing"],["UCNN: exploiting computational reuse in deep neural networks via weight repetition",0.002747633,0.095881794,"https:\u002f\u002fscholar.google.com\u002fscholar?q=UCNN%3A%20exploiting%20computational%20reuse%20in%20deep%20neural%20networks%20via%20weight%20repetition"],["MCM-GPU: Multi-Chip-Module GPUs for Continued Performance Scalability",0.002736652,0.098618446,"https:\u002f\u002fscholar.google.com\u002fscholar?q=MCM-GPU%3A%20Multi-Chip-Module%20GPUs%20for%20Continued%20Performance%20Scalability"],["ScaleDeep: A Scalable Compute Architecture for Learning and Evaluating Deep Networks",0.002735062,0.101353508,"https:\u002f\u002fscholar.google.com\u002fscholar?q=ScaleDeep%3A%20A%20Scalable%20Compute%20Architecture%20for%20Learning%20and%20Evaluating%20Deep%20Networks"],["RedEye: analog ConvNet image sensor architecture for continuous mobile vision",0.002708886,0.104062394,"https:\u002f\u002fscholar.google.com\u002fscholar?q=RedEye%3A%20analog%20ConvNet%20image%20sensor%20architecture%20for%20continuous%20mobile%20vision"],["Back to the future: leveraging Belady's algorithm for improved cache replacement",0.002707201,0.106769596,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Back%20to%20the%20future%3A%20leveraging%20Belady%27s%20algorithm%20for%20improved%20cache%20replacement"],["Data reorganization in memory using 3D-stacked DRAM",0.002703815,0.10947341,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Data%20reorganization%20in%20memory%20using%203D-stacked%20DRAM"],["BlueDBM: an appliance for big data analytics",0.00268827,0.112161681,"https:\u002f\u002fscholar.google.com\u002fscholar?q=BlueDBM%3A%20an%20appliance%20for%20big%20data%20analytics"],["Pioneering chiplet technology and design for the AMD EPYC\u2122and Ryzen\u2122processor families",0.00268827,0.114849951,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Pioneering%20chiplet%20technology%20and%20design%20for%20the%20AMD%20EPYC%E2%84%A2and%20Ryzen%E2%84%A2processor%20families"],["Stream-Dataflow Acceleration",0.00267762,0.117527571,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Stream-Dataflow%20Acceleration"],["Redundant memory mappings for fast access to large memories",0.002675822,0.120203393,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Redundant%20memory%20mappings%20for%20fast%20access%20to%20large%20memories"],["CraterLake: a hardware accelerator for efficient unbounded computation on encrypted data",0.002672205,0.122875598,"https:\u002f\u002fscholar.google.com\u002fscholar?q=CraterLake%3A%20a%20hardware%20accelerator%20for%20efficient%20unbounded%20computation%20on%20encrypted%20data"],["Revisiting RowHammer: an experimental analysis of modern DRAM devices and mitigation techniques",0.002668561,0.125544159,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Revisiting%20RowHammer%3A%20an%20experimental%20analysis%20of%20modern%20DRAM%20devices%20and%20mitigation%20techniques"],["ACT: designing sustainable computer systems with an architectural carbon modeling tool",0.002668561,0.128212719,"https:\u002f\u002fscholar.google.com\u002fscholar?q=ACT%3A%20designing%20sustainable%20computer%20systems%20with%20an%20architectural%20carbon%20modeling%20tool"],["Sparse ReRAM engine: joint exploration of activation and weight sparsity in compressed neural networks",0.002651809,0.130864528,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Sparse%20ReRAM%20engine%3A%20joint%20exploration%20of%20activation%20and%20weight%20sparsity%20in%20compressed%20neural%20networks"],["Energy-efficient neural network accelerator based on outlier-aware low-precision computation",0.002646092,0.13351062,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Energy-efficient%20neural%20network%20accelerator%20based%20on%20outlier-aware%20low-precision%20computation"],["DeepRecSys: a system for optimizing end-to-end at-scale neural recommendation inference",0.002644171,0.136154792,"https:\u002f\u002fscholar.google.com\u002fscholar?q=DeepRecSys%3A%20a%20system%20for%20optimizing%20end-to-end%20at-scale%20neural%20recommendation%20inference"],["SnaPEA: predictive early activation for reducing computation in deep convolutional neural networks",0.002610282,0.138765074,"https:\u002f\u002fscholar.google.com\u002fscholar?q=SnaPEA%3A%20predictive%20early%20activation%20for%20reducing%20computation%20in%20deep%20convolutional%20neural%20networks"],["Full-stack, real-system quantum computer studies: architectural comparisons and design insights",0.002606133,0.141371206,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Full-stack%2C%20real-system%20quantum%20computer%20studies%3A%20architectural%20comparisons%20and%20design%20insights"],["BTS: an accelerator for bootstrappable fully homomorphic encryption",0.002606133,0.143977339,"https:\u002f\u002fscholar.google.com\u002fscholar?q=BTS%3A%20an%20accelerator%20for%20bootstrappable%20fully%20homomorphic%20encryption"],["ELSA: hardware-software co-design for efficient, lightweight self-attention mechanism in neural networks",0.002601947,0.146579286,"https:\u002f\u002fscholar.google.com\u002fscholar?q=ELSA%3A%20hardware-software%20co-design%20for%20efficient%2C%20lightweight%20self-attention%20mechanism%20in%20neural%20networks"],["Energy efficient architecture for graph analytics accelerators",0.002599841,0.149179127,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Energy%20efficient%20architecture%20for%20graph%20analytics%20accelerators"],["New attacks and defense for encrypted-address cache",0.002573812,0.151752939,"https:\u002f\u002fscholar.google.com\u002fscholar?q=New%20attacks%20and%20defense%20for%20encrypted-address%20cache"],["DjiNN and Tonic: DNN as a service and its implications for future warehouse scale computers",0.002569333,0.154322272,"https:\u002f\u002fscholar.google.com\u002fscholar?q=DjiNN%20and%20Tonic%3A%20DNN%20as%20a%20service%20and%20its%20implications%20for%20future%20warehouse%20scale%20computers"],["EDDIE: EM-Based Detection of Deviations in Program Execution",0.002564812,0.156887084,"https:\u002f\u002fscholar.google.com\u002fscholar?q=EDDIE%3A%20EM-Based%20Detection%20of%20Deviations%20in%20Program%20Execution"],["Regaining Lost Cycles with HotCalls: A Fast Interface for SGX Secure Enclaves",0.002562536,0.15944962,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Regaining%20Lost%20Cycles%20with%20HotCalls%3A%20A%20Fast%20Interface%20for%20SGX%20Secure%20Enclaves"],["Dynamo: facebook's data center-wide power management system",0.002539153,0.161988773,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Dynamo%3A%20facebook%27s%20data%20center-wide%20power%20management%20system"],["Understanding and Optimizing Asynchronous Low-Precision Stochastic Gradient Descent",0.002534337,0.16452311,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Understanding%20and%20Optimizing%20Asynchronous%20Low-Precision%20Stochastic%20Gradient%20Descent"],["Rumba: an online quality management system for approximate computing",0.00253191,0.167055021,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Rumba%3A%20an%20online%20quality%20management%20system%20for%20approximate%20computing"],["Asymptotic improvements to quantum circuits via qutrits",0.002529472,0.169584492,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Asymptotic%20improvements%20to%20quantum%20circuits%20via%20qutrits"],["Statistical assertions for validating patterns and finding bugs in quantum programs",0.00252702,0.172111512,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Statistical%20assertions%20for%20validating%20patterns%20and%20finding%20bugs%20in%20quantum%20programs"],["Gist: efficient data encoding for deep neural network training",0.00252208,0.174633592,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Gist%3A%20efficient%20data%20encoding%20for%20deep%20neural%20network%20training"],["Clank: Architectural Support for Intermittent Computation",0.00251959,0.177153182,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Clank%3A%20Architectural%20Support%20for%20Intermittent%20Computation"],["Accelerating distributed reinforcement learning with in-switch computing",0.002512043,0.179665225,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Accelerating%20distributed%20reinforcement%20learning%20with%20in-switch%20computing"],["DSAGEN: synthesizing programmable spatial accelerators",0.0025095,0.182174725,"https:\u002f\u002fscholar.google.com\u002fscholar?q=DSAGEN%3A%20synthesizing%20programmable%20spatial%20accelerators"],["CoSA: scheduling by constrained optimization for spatial accelerators",0.002501791,0.184676516,"https:\u002f\u002fscholar.google.com\u002fscholar?q=CoSA%3A%20scheduling%20by%20constrained%20optimization%20for%20spatial%20accelerators"],["TWiCe: preventing row-hammering by exploiting time window counters",0.002485991,0.187162507,"https:\u002f\u002fscholar.google.com\u002fscholar?q=TWiCe%3A%20preventing%20row-hammering%20by%20exploiting%20time%20window%20counters"],["HeteroOS: OS Design for Heterogeneous Memory Management in Datacenter",0.002483306,0.189645813,"https:\u002f\u002fscholar.google.com\u002fscholar?q=HeteroOS%3A%20OS%20Design%20for%20Heterogeneous%20Memory%20Management%20in%20Datacenter"],["The Reach Profiler (REAPER): Enabling the Mitigation of DRAM Retention Failures via Profiling at Aggressive Conditions",0.002477891,0.192123704,"https:\u002f\u002fscholar.google.com\u002fscholar?q=The%20Reach%20Profiler%20%28REAPER%29%3A%20Enabling%20the%20Mitigation%20of%20DRAM%20Retention%20Failures%20via%20Profiling%20at%20Aggressive%20Conditions"],["Duality cache for data parallel acceleration",0.002466873,0.194590578,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Duality%20cache%20for%20data%20parallel%20acceleration"],["ASIC clouds: specializing the datacenter",0.002458441,0.197049018,"https:\u002f\u002fscholar.google.com\u002fscholar?q=ASIC%20clouds%3A%20specializing%20the%20datacenter"],["Secure Hierarchy-Aware Cache Replacement Policy (SHARP): Defending Against Cache-Based Side Channel Atacks",0.002458441,0.199507459,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Secure%20Hierarchy-Aware%20Cache%20Replacement%20Policy%20%28SHARP%29%3A%20Defending%20Against%20Cache-Based%20Side%20Channel%20Atacks"],["Architecting to achieve a billion requests per second throughput on a single key-value store server platform",0.002455597,0.201963056,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Architecting%20to%20achieve%20a%20billion%20requests%20per%20second%20throughput%20on%20a%20single%20key-value%20store%20server%20platform"],["Automatic generation of efficient accelerators for reconfigurable hardware",0.002449857,0.204412913,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Automatic%20generation%20of%20efficient%20accelerators%20for%20reconfigurable%20hardware"],["Modular routing design for chiplet-based systems",0.002449857,0.206862771,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Modular%20routing%20design%20for%20chiplet-based%20systems"],["CoNDA: efficient cache coherence support for near-data accelerators",0.002444048,0.209306819,"https:\u002f\u002fscholar.google.com\u002fscholar?q=CoNDA%3A%20efficient%20cache%20coherence%20support%20for%20near-data%20accelerators"],["The Mondrian Data Engine",0.002438168,0.211744987,"https:\u002f\u002fscholar.google.com\u002fscholar?q=The%20Mondrian%20Data%20Engine"],["Efficient invisible speculative execution through selective delay and value prediction",0.002438168,0.214183155,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Efficient%20invisible%20speculative%20execution%20through%20selective%20delay%20and%20value%20prediction"],["SoftSKU: optimizing server architectures for microservice diversity @scale",0.00242921,0.216612365,"https:\u002f\u002fscholar.google.com\u002fscholar?q=SoftSKU%3A%20optimizing%20server%20architectures%20for%20microservice%20diversity%20%40scale"],["MGPUSim: enabling multi-GPU performance modeling and optimization",0.002426186,0.219038551,"https:\u002f\u002fscholar.google.com\u002fscholar?q=MGPUSim%3A%20enabling%20multi-GPU%20performance%20modeling%20and%20optimization"],["Centaur: a chiplet-based, hybrid sparse-dense accelerator for personalized recommendations",0.002426186,0.221464738,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Centaur%3A%20a%20chiplet-based%2C%20hybrid%20sparse-dense%20accelerator%20for%20personalized%20recommendations"],["Warped-compression: enabling power efficient GPUs through register compression",0.002420081,0.223884819,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Warped-compression%3A%20enabling%20power%20efficient%20GPUs%20through%20register%20compression"],["Warped-slicer: efficient intra-SM slicing through dynamic resource partitioning for GPU multiprogramming",0.002416999,0.226301818,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Warped-slicer%3A%20efficient%20intra-SM%20slicing%20through%20dynamic%20resource%20partitioning%20for%20GPU%20multiprogramming"],["Laconic deep learning inference acceleration",0.002413898,0.228715716,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Laconic%20deep%20learning%20inference%20acceleration"],["Semantic locality and context-based prefetching using reinforcement learning",0.002394853,0.231110568,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Semantic%20locality%20and%20context-based%20prefetching%20using%20reinforcement%20learning"],["Think fast: a tensor streaming processor (TSP) for accelerating deep learning workloads",0.002391603,0.233502172,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Think%20fast%3A%20a%20tensor%20streaming%20processor%20%28TSP%29%20for%20accelerating%20deep%20learning%20workloads"],["A case for core-assisted bottleneck acceleration in GPUs: enabling flexible data compression with assist warps",0.002378382,0.235880554,"https:\u002f\u002fscholar.google.com\u002fscholar?q=A%20case%20for%20core-assisted%20bottleneck%20acceleration%20in%20GPUs%3A%20enabling%20flexible%20data%20compression%20with%20assist%20warps"],["Xuantie-910: a commercial multi-core 12-stage pipeline out-of-order 64-bit high performance RISC-V processor with vector extension",0.002378382,0.238258936,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Xuantie-910%3A%20a%20commercial%20multi-core%2012-stage%20pipeline%20out-of-order%2064-bit%20high%20performance%20RISC-V%20processor%20with%20vector%20extension"],["A fully associative, tagless DRAM cache",0.002375019,0.240633956,"https:\u002f\u002fscholar.google.com\u002fscholar?q=A%20fully%20associative%2C%20tagless%20DRAM%20cache"],["Accelerating dependent cache misses with an enhanced memory controller",0.002375019,0.243008975,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Accelerating%20dependent%20cache%20misses%20with%20an%20enhanced%20memory%20controller"],["GraFboost: using accelerated flash storage for external graph analytics",0.002375019,0.245383994,"https:\u002f\u002fscholar.google.com\u002fscholar?q=GraFboost%3A%20using%20accelerated%20flash%20storage%20for%20external%20graph%20analytics"],["Perceptron-based prefetch filtering",0.002371632,0.247755626,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Perceptron-based%20prefetch%20filtering"],["Flexible software profiling of GPU architectures",0.002368221,0.250123848,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Flexible%20software%20profiling%20of%20GPU%20architectures"],["A multi-neural network acceleration architecture",0.002368221,0.252492069,"https:\u002f\u002fscholar.google.com\u002fscholar?q=A%20multi-neural%20network%20acceleration%20architecture"],["Flexible auto-refresh: enabling scalable and energy-efficient DRAM refresh reductions",0.002364786,0.254856855,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Flexible%20auto-refresh%3A%20enabling%20scalable%20and%20energy-efficient%20DRAM%20refresh%20reductions"],["Quantitative comparison of hardware transactional memory for Blue Gene\u002fQ, zEnterprise EC12, Intel Core, and POWER8",0.002361326,0.257218181,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Quantitative%20comparison%20of%20hardware%20transactional%20memory%20for%20Blue%20Gene\u002fQ%2C%20zEnterprise%20EC12%2C%20Intel%20Core%2C%20and%20POWER8"],["Bit-plane compression: transforming data for better compression in many-core architectures",0.002361326,0.259579506,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Bit-plane%20compression%3A%20transforming%20data%20for%20better%20compression%20in%20many-core%20architectures"],["SpinalFlow: an architecture and dataflow tailored for spiking neural networks",0.00235784,0.261937347,"https:\u002f\u002fscholar.google.com\u002fscholar?q=SpinalFlow%3A%20an%20architecture%20and%20dataflow%20tailored%20for%20spiking%20neural%20networks"],["Sparsity-aware and re-configurable NPU architecture for samsung flagship mobile SoC",0.00235784,0.264295187,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Sparsity-aware%20and%20re-configurable%20NPU%20architecture%20for%20samsung%20flagship%20mobile%20SoC"],["Language-level persistency",0.002354329,0.266649516,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Language-level%20persistency"],["PACMAN: attacking ARM pointer authentication with speculative execution",0.00234364,0.268993156,"https:\u002f\u002fscholar.google.com\u002fscholar?q=PACMAN%3A%20attacking%20ARM%20pointer%20authentication%20with%20speculative%20execution"],["CAWA: coordinated warp scheduling and cache prioritization for critical warp acceleration of GPGPU workloads",0.002336378,0.271329534,"https:\u002f\u002fscholar.google.com\u002fscholar?q=CAWA%3A%20coordinated%20warp%20scheduling%20and%20cache%20prioritization%20for%20critical%20warp%20acceleration%20of%20GPGPU%20workloads"],["Genax: a genome sequencing accelerator",0.002336378,0.273665913,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Genax%3A%20a%20genome%20sequencing%20accelerator"],["Enabling scientific computing on memristive accelerators",0.002332706,0.275998619,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Enabling%20scientific%20computing%20on%20memristive%20accelerators"],["AsmDB: understanding and mitigating front-end stalls in warehouse-scale computers",0.002321517,0.278320136,"https:\u002f\u002fscholar.google.com\u002fscholar?q=AsmDB%3A%20understanding%20and%20mitigating%20front-end%20stalls%20in%20warehouse-scale%20computers"],["Treadmill: attributing the source of tail latency through precise load testing and statistical inference",0.002317729,0.280637864,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Treadmill%3A%20attributing%20the%20source%20of%20tail%20latency%20through%20precise%20load%20testing%20and%20statistical%20inference"],["MuonTrap: preventing cross-domain spectre-like attacks by capturing speculative state",0.002317729,0.282955593,"https:\u002f\u002fscholar.google.com\u002fscholar?q=MuonTrap%3A%20preventing%20cross-domain%20spectre-like%20attacks%20by%20capturing%20speculative%20state"],["DRQ: dynamic region-based quantization for deep neural network acceleration",0.002317729,0.285273322,"https:\u002f\u002fscholar.google.com\u002fscholar?q=DRQ%3A%20dynamic%20region-based%20quantization%20for%20deep%20neural%20network%20acceleration"],["Euphrates: algorithm-SoC co-design for low-power mobile continuous vision",0.00231391,0.287587232,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Euphrates%3A%20algorithm-SoC%20co-design%20for%20low-power%20mobile%20continuous%20vision"],["Mitigating wordline crosstalk using adaptive trees of counters",0.00231391,0.289901142,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Mitigating%20wordline%20crosstalk%20using%20adaptive%20trees%20of%20counters"],["GANAX: a unified MIMD-SIMD acceleration for generative adversarial networks",0.00231391,0.292215052,"https:\u002f\u002fscholar.google.com\u002fscholar?q=GANAX%3A%20a%20unified%20MIMD-SIMD%20acceleration%20for%20generative%20adversarial%20networks"],["FLIN: enabling fairness and enhancing performance in modern NVMe solid state drives",0.00230618,0.294521232,"https:\u002f\u002fscholar.google.com\u002fscholar?q=FLIN%3A%20enabling%20fairness%20and%20enhancing%20performance%20in%20modern%20NVMe%20solid%20state%20drives"],["Dual-side sparse tensor core",0.00230618,0.296827412,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Dual-side%20sparse%20tensor%20core"],["CROW: a low-cost substrate for improving DRAM performance, energy efficiency, and reliability",0.002294347,0.299121758,"https:\u002f\u002fscholar.google.com\u002fscholar?q=CROW%3A%20a%20low-cost%20substrate%20for%20improving%20DRAM%20performance%2C%20energy%20efficiency%2C%20and%20reliability"],["MicroScope: enabling microarchitectural replay attacks",0.002294347,0.301416105,"https:\u002f\u002fscholar.google.com\u002fscholar?q=MicroScope%3A%20enabling%20microarchitectural%20replay%20attacks"],["RaPiD: AI accelerator for ultra-low precision training and inference",0.002294347,0.303710452,"https:\u002f\u002fscholar.google.com\u002fscholar?q=RaPiD%3A%20AI%20accelerator%20for%20ultra-low%20precision%20training%20and%20inference"],["Hybrid TLB Coalescing: Improving TLB Translation Coverage under Diverse Fragmented Memory Allocations",0.002290336,0.306000788,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Hybrid%20TLB%20Coalescing%3A%20Improving%20TLB%20Translation%20Coverage%20under%20Diverse%20Fragmented%20Memory%20Allocations"],["Computation reuse in DNNs by exploiting input similarity",0.002290336,0.308291124,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Computation%20reuse%20in%20DNNs%20by%20exploiting%20input%20similarity"],["EVA2: exploiting temporal redundancy in live computer vision",0.002290336,0.31058146,"https:\u002f\u002fscholar.google.com\u002fscholar?q=EVA2%3A%20exploiting%20temporal%20redundancy%20in%20live%20computer%20vision"],["Bouquet of instruction pointers: instruction pointer classifier-based spatial hardware prefetching",0.00227395,0.31285541,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Bouquet%20of%20instruction%20pointers%3A%20instruction%20pointer%20classifier-based%20spatial%20hardware%20prefetching"],["Snafu: an ultra-low-power, energy-minimal CGRA-generation framework and architecture",0.00227395,0.315129361,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Snafu%3A%20an%20ultra-low-power%2C%20energy-minimal%20CGRA-generation%20framework%20and%20architecture"],["ObfusMem: A Low-Overhead Access Obfuscation for Trusted Memories",0.002269765,0.317399126,"https:\u002f\u002fscholar.google.com\u002fscholar?q=ObfusMem%3A%20A%20Low-Overhead%20Access%20Obfuscation%20for%20Trusted%20Memories"],["Architecting noisy intermediate-scale trapped ion quantum computers",0.002269765,0.31966889,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Architecting%20noisy%20intermediate-scale%20trapped%20ion%20quantum%20computers"],["iPIM: programmable in-memory image processing accelerator using near-bank architecture",0.002269765,0.321938655,"https:\u002f\u002fscholar.google.com\u002fscholar?q=iPIM%3A%20programmable%20in-memory%20image%20processing%20accelerator%20using%20near-bank%20architecture"],["Interplay between hardware prefetcher and page eviction policy in CPU-GPU unified virtual memory",0.002261283,0.324199938,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Interplay%20between%20hardware%20prefetcher%20and%20page%20eviction%20policy%20in%20CPU-GPU%20unified%20virtual%20memory"],["BEAR: techniques for mitigating bandwidth bloat in gigascale DRAM caches",0.002252647,0.326452585,"https:\u002f\u002fscholar.google.com\u002fscholar?q=BEAR%3A%20techniques%20for%20mitigating%20bandwidth%20bloat%20in%20gigascale%20DRAM%20caches"],["PipeZK: accelerating zero-knowledge proof with a pipelined architecture",0.002252647,0.328705233,"https:\u002f\u002fscholar.google.com\u002fscholar?q=PipeZK%3A%20accelerating%20zero-knowledge%20proof%20with%20a%20pipelined%20architecture"],["Hi-fi playback: tolerating position errors in shift operations of racetrack memory",0.002248271,0.330953503,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Hi-fi%20playback%3A%20tolerating%20position%20errors%20in%20shift%20operations%20of%20racetrack%20memory"],["XED: exposing on-die error detection information for strong memory reliability",0.002243854,0.333197357,"https:\u002f\u002fscholar.google.com\u002fscholar?q=XED%3A%20exposing%20on-die%20error%20detection%20information%20for%20strong%20memory%20reliability"],["Mellow writes: extending lifetime in resistive memories through selective slow write backs",0.002243854,0.335441211,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Mellow%20writes%3A%20extending%20lifetime%20in%20resistive%20memories%20through%20selective%20slow%20write%20backs"],["Rethinking TLB Designs in Virtualized Environments: A Very Large Part-of-Memory TLB",0.002243854,0.337685065,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Rethinking%20TLB%20Designs%20in%20Virtualized%20Environments%3A%20A%20Very%20Large%20Part-of-Memory%20TLB"],["Stash: have your scratchpad and cache it too",0.002239396,0.339924461,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Stash%3A%20have%20your%20scratchpad%20and%20cache%20it%20too"],["TIMELY: pushing data movements and interfaces in PIM accelerators towards local and in time domain",0.002234896,0.342159356,"https:\u002f\u002fscholar.google.com\u002fscholar?q=TIMELY%3A%20pushing%20data%20movements%20and%20interfaces%20in%20PIM%20accelerators%20towards%20local%20and%20in%20time%20domain"],["InvisiMem: Smart Memory Defenses for Memory Bus Side Channel",0.002225767,0.344385124,"https:\u002f\u002fscholar.google.com\u002fscholar?q=InvisiMem%3A%20Smart%20Memory%20Defenses%20for%20Memory%20Bus%20Side%20Channel"],["I see dead \u03bcops: leaking secrets via Intel\u002fAMD micro-op caches",0.002225767,0.346610891,"https:\u002f\u002fscholar.google.com\u002fscholar?q=I%20see%20dead%20%CE%BCops%3A%20leaking%20secrets%20via%20Intel\u002fAMD%20micro-op%20caches"],["APPROX-NoC: A Data Approximation Framework for Network-On-Chip Architectures",0.002221137,0.348832028,"https:\u002f\u002fscholar.google.com\u002fscholar?q=APPROX-NoC%3A%20A%20Data%20Approximation%20Framework%20for%20Network-On-Chip%20Architectures"],["GraphSSD: graph semantics aware SSD",0.002221137,0.351053164,"https:\u002f\u002fscholar.google.com\u002fscholar?q=GraphSSD%3A%20graph%20semantics%20aware%20SSD"],["Evolution of the samsung exynos CPU microarchitecture",0.00221174,0.353264904,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Evolution%20of%20the%20samsung%20exynos%20CPU%20microarchitecture"],["FORMS: fine-grained polarized ReRAM-based in-situ computation for mixed-signal DNN accelerator",0.00221174,0.355476644,"https:\u002f\u002fscholar.google.com\u002fscholar?q=FORMS%3A%20fine-grained%20polarized%20ReRAM-based%20in-situ%20computation%20for%20mixed-signal%20DNN%20accelerator"],["Agile paging: exceeding the best of nested and shadow paging",0.002206971,0.357683615,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Agile%20paging%3A%20exceeding%20the%20best%20of%20nested%20and%20shadow%20paging"],["Access Pattern-Aware Cache Management for Improving Data Utilization in GPU",0.002206971,0.359890586,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Access%20Pattern-Aware%20Cache%20Management%20for%20Improving%20Data%20Utilization%20in%20GPU"],["Prediction based execution on deep neural networks",0.002206971,0.362097557,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Prediction%20based%20execution%20on%20deep%20neural%20networks"],["Hydra: enabling low-overhead mitigation of row-hammer at ultra-low thresholds via hybrid tracking",0.002206971,0.364304529,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Hydra%3A%20enabling%20low-overhead%20mitigation%20of%20row-hammer%20at%20ultra-low%20thresholds%20via%20hybrid%20tracking"],["Triad-NVM: persistency for integrity-protected and encrypted non-volatile memories",0.002202155,0.366506683,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Triad-NVM%3A%20persistency%20for%20integrity-protected%20and%20encrypted%20non-volatile%20memories"],["GoSPA: an energy-efficient high-performance globally optimized sparse convolutional neural network accelerator",0.002202155,0.368708838,"https:\u002f\u002fscholar.google.com\u002fscholar?q=GoSPA%3A%20an%20energy-efficient%20high-performance%20globally%20optimized%20sparse%20convolutional%20neural%20network%20accelerator"],["MnnFast: a fast and scalable system architecture for memory-augmented neural networks",0.002192374,0.370901212,"https:\u002f\u002fscholar.google.com\u002fscholar?q=MnnFast%3A%20a%20fast%20and%20scalable%20system%20architecture%20for%20memory-augmented%20neural%20networks"],["TIE: energy-efficient tensor train-based inference engine for deep neural network",0.002192374,0.373093586,"https:\u002f\u002fscholar.google.com\u002fscholar?q=TIE%3A%20energy-efficient%20tensor%20train-based%20inference%20engine%20for%20deep%20neural%20network"],["TENET: a framework for modeling tensor dataflow based on relation-centric notation",0.002192374,0.37528596,"https:\u002f\u002fscholar.google.com\u002fscholar?q=TENET%3A%20a%20framework%20for%20modeling%20tensor%20dataflow%20based%20on%20relation-centric%20notation"],["Morpheus: creating application objects efficiently for heterogeneous computing",0.002182389,0.377468349,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Morpheus%3A%20creating%20application%20objects%20efficiently%20for%20heterogeneous%20computing"],["HASCO: towards agile hardware and software co-design for tensor computation",0.002182389,0.379650739,"https:\u002f\u002fscholar.google.com\u002fscholar?q=HASCO%3A%20towards%20agile%20hardware%20and%20software%20co-design%20for%20tensor%20computation"],["Exploring the potential of heterogeneous von neumann\u002fdataflow execution models",0.002177318,0.381828057,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Exploring%20the%20potential%20of%20heterogeneous%20von%20neumann\u002fdataflow%20execution%20models"],["Unified address translation for memory-mapped SSDs with FlashMap",0.002177318,0.384005375,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Unified%20address%20translation%20for%20memory-mapped%20SSDs%20with%20FlashMap"],["Anubis: ultra-low overhead and recovery time for secure non-volatile memories",0.002177318,0.386182693,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Anubis%3A%20ultra-low%20overhead%20and%20recovery%20time%20for%20secure%20non-volatile%20memories"],["Eager pruning: algorithm and architecture support for fast training of deep neural networks",0.002172193,0.388354886,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Eager%20pruning%3A%20algorithm%20and%20architecture%20support%20for%20fast%20training%20of%20deep%20neural%20networks"],["Jenga: Software-Defined Cache Hierarchies",0.002167012,0.390521897,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Jenga%3A%20Software-Defined%20Cache%20Hierarchies"],["RANA: towards efficient neural acceleration with refresh-optimized embedded DRAM",0.002167012,0.392688909,"https:\u002f\u002fscholar.google.com\u002fscholar?q=RANA%3A%20towards%20efficient%20neural%20acceleration%20with%20refresh-optimized%20embedded%20DRAM"],["NN-baton: DNN workload orchestration and chiplet granularity exploration for multichip accelerators",0.002167012,0.394855921,"https:\u002f\u002fscholar.google.com\u002fscholar?q=NN-baton%3A%20DNN%20workload%20orchestration%20and%20chiplet%20granularity%20exploration%20for%20multichip%20accelerators"],["Harmonia: balancing compute and memory power in high-performance GPUs",0.002161774,0.397017694,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Harmonia%3A%20balancing%20compute%20and%20memory%20power%20in%20high-performance%20GPUs"],["Dynamic thread block launch: a lightweight execution mechanism to support irregular applications on GPUs",0.002161774,0.399179468,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Dynamic%20thread%20block%20launch%3A%20a%20lightweight%20execution%20mechanism%20to%20support%20irregular%20applications%20on%20GPUs"],["DeepAttest: an end-to-end attestation framework for deep neural networks",0.002161774,0.401341242,"https:\u002f\u002fscholar.google.com\u002fscholar?q=DeepAttest%3A%20an%20end-to-end%20attestation%20framework%20for%20deep%20neural%20networks"],["QUAC-TRNG: high-throughput true random number generation using quadruple row activation in commodity DRAM chips",0.002161774,0.403503016,"https:\u002f\u002fscholar.google.com\u002fscholar?q=QUAC-TRNG%3A%20high-throughput%20true%20random%20number%20generation%20using%20quadruple%20row%20activation%20in%20commodity%20DRAM%20chips"],["GaaS-X: graph analytics accelerator supporting sparse data representation using crossbar architectures",0.002156478,0.405659495,"https:\u002f\u002fscholar.google.com\u002fscholar?q=GaaS-X%3A%20graph%20analytics%20accelerator%20supporting%20sparse%20data%20representation%20using%20crossbar%20architectures"],["Translation ranger: operating system support for contiguity-aware TLBs",0.002151124,0.407810618,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Translation%20ranger%3A%20operating%20system%20support%20for%20contiguity-aware%20TLBs"],["Do-It-Yourself Virtual Memory Translation",0.002145709,0.409956327,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Do-It-Yourself%20Virtual%20Memory%20Translation"],["Scheduling page table walks for irregular GPU applications",0.002145709,0.412102036,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Scheduling%20page%20table%20walks%20for%20irregular%20GPU%20applications"],["PolyGraph: exposing the value of flexibility for graph processing accelerators",0.002145709,0.414247744,"https:\u002f\u002fscholar.google.com\u002fscholar?q=PolyGraph%3A%20exposing%20the%20value%20of%20flexibility%20for%20graph%20processing%20accelerators"],["The load slice core microarchitecture",0.002140232,0.416387976,"https:\u002f\u002fscholar.google.com\u002fscholar?q=The%20load%20slice%20core%20microarchitecture"],["FASE: finding amplitude-modulated side-channel emanations",0.002140232,0.418528207,"https:\u002f\u002fscholar.google.com\u002fscholar?q=FASE%3A%20finding%20amplitude-modulated%20side-channel%20emanations"],["The locality descriptor: a holistic cross-layer abstraction to express data locality in GPUs",0.002140232,0.420668439,"https:\u002f\u002fscholar.google.com\u002fscholar?q=The%20locality%20descriptor%3A%20a%20holistic%20cross-layer%20abstraction%20to%20express%20data%20locality%20in%20GPUs"],["BioHD: an efficient genome sequence search platform using HyperDimensional memorization",0.002140232,0.42280867,"https:\u002f\u002fscholar.google.com\u002fscholar?q=BioHD%3A%20an%20efficient%20genome%20sequence%20search%20platform%20using%20HyperDimensional%20memorization"],["An in-network architecture for accelerating shared-memory multiprocessor collectives",0.002129086,0.424937756,"https:\u002f\u002fscholar.google.com\u002fscholar?q=An%20in-network%20architecture%20for%20accelerating%20shared-memory%20multiprocessor%20collectives"],["Demystifying the system vulnerability stack: transient fault effects across the layers",0.002117675,0.427055431,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Demystifying%20the%20system%20vulnerability%20stack%3A%20transient%20fault%20effects%20across%20the%20layers"],["DHTM: durable hardware transactional memory",0.002111866,0.429167297,"https:\u002f\u002fscholar.google.com\u002fscholar?q=DHTM%3A%20durable%20hardware%20transactional%20memory"],["Energy-efficient video processing for virtual reality",0.002111866,0.431279163,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Energy-efficient%20video%20processing%20for%20virtual%20reality"],["Speculative data-oblivious execution: mobilizing safe prediction for safe and efficient speculative execution",0.002111866,0.433391029,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Speculative%20data-oblivious%20execution%3A%20mobilizing%20safe%20prediction%20for%20safe%20and%20efficient%20speculative%20execution"],["ARM virtualization: performance and architectural implications",0.002105986,0.435497015,"https:\u002f\u002fscholar.google.com\u002fscholar?q=ARM%20virtualization%3A%20performance%20and%20architectural%20implications"],["Density tradeoffs of non-volatile memory as a replacement for SRAM based last level cache",0.002105986,0.437603001,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Density%20tradeoffs%20of%20non-volatile%20memory%20as%20a%20replacement%20for%20SRAM%20based%20last%20level%20cache"],["Janus: optimizing memory and storage support for non-volatile memory systems",0.002100032,0.439703033,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Janus%3A%20optimizing%20memory%20and%20storage%20support%20for%20non-volatile%20memory%20systems"],["Using multiple input, multiple output formal control to maximize resource efficiency in architectures",0.002094004,0.441797037,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Using%20multiple%20input%2C%20multiple%20output%20formal%20control%20to%20maximize%20resource%20efficiency%20in%20architectures"],["Efficient metadata management for irregular data prefetching",0.002094004,0.443891041,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Efficient%20metadata%20management%20for%20irregular%20data%20prefetching"],["PROMISE: an end-to-end design of a programmable mixed-signal accelerator for machine-learning algorithms",0.002087899,0.445978941,"https:\u002f\u002fscholar.google.com\u002fscholar?q=PROMISE%3A%20an%20end-to-end%20design%20of%20a%20programmable%20mixed-signal%20accelerator%20for%20machine-learning%20algorithms"],["Cost-efficient overclocking in immersion-cooled datacenters",0.002087899,0.44806684,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Cost-efficient%20overclocking%20in%20immersion-cooled%20datacenters"],["Towards sustainable in-situ server systems in the big data era",0.002081715,0.450148555,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Towards%20sustainable%20in-situ%20server%20systems%20in%20the%20big%20data%20era"],["HEB: deploying and managing hybrid energy buffers for improving datacenter efficiency and economy",0.002081715,0.45223027,"https:\u002f\u002fscholar.google.com\u002fscholar?q=HEB%3A%20deploying%20and%20managing%20hybrid%20energy%20buffers%20for%20improving%20datacenter%20efficiency%20and%20economy"],["CLR-DRAM: a low-cost DRAM architecture enabling dynamic capacity-latency trade-off",0.002081715,0.454311986,"https:\u002f\u002fscholar.google.com\u002fscholar?q=CLR-DRAM%3A%20a%20low-cost%20DRAM%20architecture%20enabling%20dynamic%20capacity-latency%20trade-off"],["2QAN: a quantum compiler for 2-local qubit hamiltonian simulation algorithms",0.002081715,0.456393701,"https:\u002f\u002fscholar.google.com\u002fscholar?q=2QAN%3A%20a%20quantum%20compiler%20for%202-local%20qubit%20hamiltonian%20simulation%20algorithms"],["LaPerm: locality aware scheduler for dynamic parallelism on GPUs",0.002075451,0.458469152,"https:\u002f\u002fscholar.google.com\u002fscholar?q=LaPerm%3A%20locality%20aware%20scheduler%20for%20dynamic%20parallelism%20on%20GPUs"],["Virtual thread: maximizing thread-level parallelism beyond GPU scheduling limit",0.002075451,0.460544602,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Virtual%20thread%3A%20maximizing%20thread-level%20parallelism%20beyond%20GPU%20scheduling%20limit"],["Guaranteeing local differential privacy on ultra-low-power systems",0.002075451,0.462620053,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Guaranteeing%20local%20differential%20privacy%20on%20ultra-low-power%20systems"],["A quantum computational compiler and design tool for technology-specific targets",0.002075451,0.464695504,"https:\u002f\u002fscholar.google.com\u002fscholar?q=A%20quantum%20computational%20compiler%20and%20design%20tool%20for%20technology-specific%20targets"],["SARA: scaling a reconfigurable dataflow accelerator",0.002075451,0.466770954,"https:\u002f\u002fscholar.google.com\u002fscholar?q=SARA%3A%20scaling%20a%20reconfigurable%20dataflow%20accelerator"],["Rethinking belady's algorithm to accommodate prefetching",0.002069103,0.468840057,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Rethinking%20belady%27s%20algorithm%20to%20accommodate%20prefetching"],["Hiding the Long Latency of Persist Barriers Using Speculative Execution",0.00206267,0.470902728,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Hiding%20the%20Long%20Latency%20of%20Persist%20Barriers%20Using%20Speculative%20Execution"],["Quality of Service Support for Fine-Grained Sharing on GPUs",0.00206267,0.472965398,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Quality%20of%20Service%20Support%20for%20Fine-Grained%20Sharing%20on%20GPUs"],["EQC: ensembled quantum computing for variational quantum algorithms",0.00206267,0.475028068,"https:\u002f\u002fscholar.google.com\u002fscholar?q=EQC%3A%20ensembled%20quantum%20computing%20for%20variational%20quantum%20algorithms"],["NEBULA: a neuromorphic spin-based ultra-low power architecture for SNNs and ANNs",0.00205615,0.477084218,"https:\u002f\u002fscholar.google.com\u002fscholar?q=NEBULA%3A%20a%20neuromorphic%20spin-based%20ultra-low%20power%20architecture%20for%20SNNs%20and%20ANNs"],["Multiple clone row DRAM: a low latency and area optimized DRAM",0.00204954,0.479133758,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Multiple%20clone%20row%20DRAM%3A%20a%20low%20latency%20and%20area%20optimized%20DRAM"],["Towards statistical guarantees in controlling quality tradeoffs for approximate acceleration",0.00204954,0.481183297,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Towards%20statistical%20guarantees%20in%20controlling%20quality%20tradeoffs%20for%20approximate%20acceleration"],["Generative and multi-phase learning for computer systems optimization",0.00204954,0.483232837,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Generative%20and%20multi-phase%20learning%20for%20computer%20systems%20optimization"],["uGEMM: unary computing architecture for GEMM applications",0.00204954,0.485282377,"https:\u002f\u002fscholar.google.com\u002fscholar?q=uGEMM%3A%20unary%20computing%20architecture%20for%20GEMM%20applications"],["DIMMining: pruning-efficient and parallel graph mining on near-memory-computing",0.00204954,0.487331916,"https:\u002f\u002fscholar.google.com\u002fscholar?q=DIMMining%3A%20pruning-efficient%20and%20parallel%20graph%20mining%20on%20near-memory-computing"],["MeRLiN: Exploiting Dynamic Instruction Behavior for Fast and Accurate Microarchitecture Level Reliability Assessment",0.002042837,0.489374753,"https:\u002f\u002fscholar.google.com\u002fscholar?q=MeRLiN%3A%20Exploiting%20Dynamic%20Instruction%20Behavior%20for%20Fast%20and%20Accurate%20Microarchitecture%20Level%20Reliability%20Assessment"],["Spandex: a flexible interface for efficient heterogeneous coherence",0.002042837,0.49141759,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Spandex%3A%20a%20flexible%20interface%20for%20efficient%20heterogeneous%20coherence"],["Buddy compression: enabling larger memory for deep learning and HPC workloads on GPUs",0.002042837,0.493460427,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Buddy%20compression%3A%20enabling%20larger%20memory%20for%20deep%20learning%20and%20HPC%20workloads%20on%20GPUs"],["MITTS: memory inter-arrival time traffic shaping",0.002029143,0.49548957,"https:\u002f\u002fscholar.google.com\u002fscholar?q=MITTS%3A%20memory%20inter-arrival%20time%20traffic%20shaping"],["Hardware Translation Coherence for Virtualized Systems",0.002029143,0.497518714,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Hardware%20Translation%20Coherence%20for%20Virtualized%20Systems"],["EbDa: A New Theory on Design and Verification of Deadlock-free Interconnection Networks",0.002029143,0.499547857,"https:\u002f\u002fscholar.google.com\u002fscholar?q=EbDa%3A%20A%20New%20Theory%20on%20Design%20and%20Verification%20of%20Deadlock-free%20Interconnection%20Networks"],["SeGraM: a universal hardware accelerator for genomic sequence-to-graph and sequence-to-sequence mapping",0.002029143,0.501577001,"https:\u002f\u002fscholar.google.com\u002fscholar?q=SeGraM%3A%20a%20universal%20hardware%20accelerator%20for%20genomic%20sequence-to-graph%20and%20sequence-to-sequence%20mapping"],["A software-defined tensor streaming multiprocessor for large-scale machine learning",0.002022147,0.503599148,"https:\u002f\u002fscholar.google.com\u002fscholar?q=A%20software-defined%20tensor%20streaming%20multiprocessor%20for%20large-scale%20machine%20learning"],["Page overlays: an enhanced virtual memory framework to enable fine-grained memory management",0.002015047,0.505614195,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Page%20overlays%3A%20an%20enhanced%20virtual%20memory%20framework%20to%20enable%20fine-grained%20memory%20management"],["Power attack defense: securing battery-backed data centers",0.002015047,0.507629242,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Power%20attack%20defense%3A%20securing%20battery-backed%20data%20centers"],["To PIM or not for emerging general purpose processing in DDR memory systems",0.002015047,0.50964429,"https:\u002f\u002fscholar.google.com\u002fscholar?q=To%20PIM%20or%20not%20for%20emerging%20general%20purpose%20processing%20in%20DDR%20memory%20systems"],["Stream-based memory access specialization for general purpose processors",0.002007841,0.51165213,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Stream-based%20memory%20access%20specialization%20for%20general%20purpose%20processors"],["Bonsai: high-performance adaptive merge tree sorting",0.002007841,0.513659971,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Bonsai%3A%20high-performance%20adaptive%20merge%20tree%20sorting"],["Enhancing and exploiting contiguity for fast memory virtualization",0.002007841,0.515667811,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Enhancing%20and%20exploiting%20contiguity%20for%20fast%20memory%20virtualization"],["A variable warp size architecture",0.002000524,0.517668335,"https:\u002f\u002fscholar.google.com\u002fscholar?q=A%20variable%20warp%20size%20architecture"],["PowerChief: Intelligent Power Allocation for Multi-Stage Applications to Improve Responsiveness on Power Constrained CMP",0.002000524,0.519668859,"https:\u002f\u002fscholar.google.com\u002fscholar?q=PowerChief%3A%20Intelligent%20Power%20Allocation%20for%20Multi-Stage%20Applications%20to%20Improve%20Responsiveness%20on%20Power%20Constrained%20CMP"],["Chasing Away RAts: Semantics and Evaluation for Relaxed Atomics on Heterogeneous Systems",0.002000524,0.521669382,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Chasing%20Away%20RAts%3A%20Semantics%20and%20Evaluation%20for%20Relaxed%20Atomics%20on%20Heterogeneous%20Systems"],["LogCA: A High-Level Performance Model for Hardware Accelerators",0.002000524,0.523669906,"https:\u002f\u002fscholar.google.com\u002fscholar?q=LogCA%3A%20A%20High-Level%20Performance%20Model%20for%20Hardware%20Accelerators"],["Don't forget the I\u002fO when allocating your LLC",0.002000524,0.52567043,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Don%27t%20forget%20the%20I\u002fO%20when%20allocating%20your%20LLC"],["Opening pandora's box: a systematic study of new ways microarchitecture can leak private data",0.002000524,0.527670954,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Opening%20pandora%27s%20box%3A%20a%20systematic%20study%20of%20new%20ways%20microarchitecture%20can%20leak%20private%20data"],["Thermal time shifting: leveraging phase change materials to reduce cooling costs in warehouse-scale computers",0.001985546,0.5296565,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Thermal%20time%20shifting%3A%20leveraging%20phase%20change%20materials%20to%20reduce%20cooling%20costs%20in%20warehouse-scale%20computers"],["ActivePointers: a case for software address translation on GPUs",0.001985546,0.531642046,"https:\u002f\u002fscholar.google.com\u002fscholar?q=ActivePointers%3A%20a%20case%20for%20software%20address%20translation%20on%20GPUs"],["PrORAM: dynamic prefetcher for oblivious RAM",0.001977878,0.533619925,"https:\u002f\u002fscholar.google.com\u002fscholar?q=PrORAM%3A%20dynamic%20prefetcher%20for%20oblivious%20RAM"],["Exploiting dynamic timing slack for energy efficiency in ultra-low-power embedded systems",0.001977878,0.535597803,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Exploiting%20dynamic%20timing%20slack%20for%20energy%20efficiency%20in%20ultra-low-power%20embedded%20systems"],["2B-SSD: the case for dual, byte- and block-addressable solid-state drives",0.001977878,0.537575682,"https:\u002f\u002fscholar.google.com\u002fscholar?q=2B-SSD%3A%20the%20case%20for%20dual%2C%20byte-%20and%20block-addressable%20solid-state%20drives"],["RegMutex: inter-warp GPU register time-sharing",0.001977878,0.53955356,"https:\u002f\u002fscholar.google.com\u002fscholar?q=RegMutex%3A%20inter-warp%20GPU%20register%20time-sharing"],["Opportunistic computing in GPU architectures",0.001977878,0.541531438,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Opportunistic%20computing%20in%20GPU%20architectures"],["NISQ+: boosting quantum computing power by approximating quantum error correction",0.001977878,0.543509317,"https:\u002f\u002fscholar.google.com\u002fscholar?q=NISQ%2B%3A%20boosting%20quantum%20computing%20power%20by%20approximating%20quantum%20error%20correction"],["SQUARE: strategic quantum ancilla reuse for modular quantum programs via cost-effective uncomputation",0.001977878,0.545487195,"https:\u002f\u002fscholar.google.com\u002fscholar?q=SQUARE%3A%20strategic%20quantum%20ancilla%20reuse%20for%20modular%20quantum%20programs%20via%20cost-effective%20uncomputation"],["Flex: high-availability datacenters with zero reserved power",0.001977878,0.547465074,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Flex%3A%20high-availability%20datacenters%20with%20zero%20reserved%20power"],["SPACE: locality-aware processing in heterogeneous memory for personalized recommendations",0.001977878,0.549442952,"https:\u002f\u002fscholar.google.com\u002fscholar?q=SPACE%3A%20locality-aware%20processing%20in%20heterogeneous%20memory%20for%20personalized%20recommendations"],["CloudMonatt: an architecture for security health monitoring and attestation of virtual machines in cloud computing",0.001970086,0.551413038,"https:\u002f\u002fscholar.google.com\u002fscholar?q=CloudMonatt%3A%20an%20architecture%20for%20security%20health%20monitoring%20and%20attestation%20of%20virtual%20machines%20in%20cloud%20computing"],["Parallel Automata Processor",0.001970086,0.553383123,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Parallel%20Automata%20Processor"],["Secure TLBs",0.001970086,0.555353209,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Secure%20TLBs"],["The NeBuLa RPC-optimized architecture",0.001970086,0.557323295,"https:\u002f\u002fscholar.google.com\u002fscholar?q=The%20NeBuLa%20RPC-optimized%20architecture"],["Enabling compute-communication overlap in distributed deep learning training platforms",0.001970086,0.559293381,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Enabling%20compute-communication%20overlap%20in%20distributed%20deep%20learning%20training%20platforms"],["A stochastic-computing based deep learning framework using adiabatic quantum-flux-parametron superconducting technology",0.001962164,0.561255545,"https:\u002f\u002fscholar.google.com\u002fscholar?q=A%20stochastic-computing%20based%20deep%20learning%20framework%20using%20adiabatic%20quantum-flux-parametron%20superconducting%20technology"],["Hyper-AP: enhancing associative processing through a full-stack optimization",0.001962164,0.563217709,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Hyper-AP%3A%20enhancing%20associative%20processing%20through%20a%20full-stack%20optimization"],["Ripple: profile-guided instruction cache replacement for data center applications",0.001962164,0.565179873,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Ripple%3A%20profile-guided%20instruction%20cache%20replacement%20for%20data%20center%20applications"],["COP: to compress and protect main memory",0.00195411,0.567133983,"https:\u002f\u002fscholar.google.com\u002fscholar?q=COP%3A%20to%20compress%20and%20protect%20main%20memory"],["SmartExchange: trading higher-cost memory storage\u002faccess for lower-cost computation",0.00195411,0.569088093,"https:\u002f\u002fscholar.google.com\u002fscholar?q=SmartExchange%3A%20trading%20higher-cost%20memory%20storage\u002faccess%20for%20lower-cost%20computation"],["Leaky buddies: cross-component covert channels on integrated CPU-GPU systems",0.00195411,0.571042202,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Leaky%20buddies%3A%20cross-component%20covert%20channels%20on%20integrated%20CPU-GPU%20systems"],["Sibyl: adaptive and extensible data placement in hybrid storage systems using online reinforcement learning",0.00195411,0.572996312,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Sibyl%3A%20adaptive%20and%20extensible%20data%20placement%20in%20hybrid%20storage%20systems%20using%20online%20reinforcement%20learning"],["Geyser: a compilation framework for quantum computing with neutral atoms",0.00195411,0.574950422,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Geyser%3A%20a%20compilation%20framework%20for%20quantum%20computing%20with%20neutral%20atoms"],["Nested enclave: supporting fine-grained hierarchical isolation with SGX",0.001945917,0.576896339,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Nested%20enclave%3A%20supporting%20fine-grained%20hierarchical%20isolation%20with%20SGX"],["Near data acceleration with concurrent host access",0.001945917,0.578842257,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Near%20data%20acceleration%20with%20concurrent%20host%20access"],["ABC-DIMM: alleviating the bottleneck of communication in DIMM-based near-memory processing with inter-DIMM broadcast",0.001945917,0.580788174,"https:\u002f\u002fscholar.google.com\u002fscholar?q=ABC-DIMM%3A%20alleviating%20the%20bottleneck%20of%20communication%20in%20DIMM-based%20near-memory%20processing%20with%20inter-DIMM%20broadcast"],["Exploiting long-distance interactions and tolerating atom loss in neutral atom quantum architectures",0.001945917,0.582734091,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Exploiting%20long-distance%20interactions%20and%20tolerating%20atom%20loss%20in%20neutral%20atom%20quantum%20architectures"],["A case for richer cross-layer abstractions: bridging the semantic gap with expressive memory",0.001937583,0.584671674,"https:\u002f\u002fscholar.google.com\u002fscholar?q=A%20case%20for%20richer%20cross-layer%20abstractions%3A%20bridging%20the%20semantic%20gap%20with%20expressive%20memory"],["SysScale: exploiting multi-domain dynamic voltage and frequency scaling for energy efficient mobile processors",0.001937583,0.586609256,"https:\u002f\u002fscholar.google.com\u002fscholar?q=SysScale%3A%20exploiting%20multi-domain%20dynamic%20voltage%20and%20frequency%20scaling%20for%20energy%20efficient%20mobile%20processors"],["A hardware accelerator for tracing garbage collection",0.0019291,0.588538357,"https:\u002f\u002fscholar.google.com\u002fscholar?q=A%20hardware%20accelerator%20for%20tracing%20garbage%20collection"],["Albireo: energy-efficient acceleration of convolutional neural networks via silicon photonics",0.0019291,0.590467457,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Albireo%3A%20energy-efficient%20acceleration%20of%20convolutional%20neural%20networks%20via%20silicon%20photonics"],["Themis: a network bandwidth-aware collective scheduling policy for distributed training of DL models",0.0019291,0.592396557,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Themis%3A%20a%20network%20bandwidth-aware%20collective%20scheduling%20policy%20for%20distributed%20training%20of%20DL%20models"],["Practical memory safety with REST",0.001920465,0.594317022,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Practical%20memory%20safety%20with%20REST"],["Synchronized progress in interconnection networks (SPIN): a new theory for deadlock freedom",0.001920465,0.596237488,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Synchronized%20progress%20in%20interconnection%20networks%20%28SPIN%29%3A%20a%20new%20theory%20for%20deadlock%20freedom"],["Strober: fast and accurate sample-based energy simulation for arbitrary RTL",0.001911672,0.598149159,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Strober%3A%20fast%20and%20accurate%20sample-based%20energy%20simulation%20for%20arbitrary%20RTL"],["Designing vertical processors in monolithic 3D",0.001902714,0.600051873,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Designing%20vertical%20processors%20in%20monolithic%203D"],["Cryogenic computer architecture modeling with memory-side case studies",0.001902714,0.601954586,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Cryogenic%20computer%20architecture%20modeling%20with%20memory-side%20case%20studies"],["Axiomatic hardware-software contracts for security",0.001902714,0.6038573,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Axiomatic%20hardware-software%20contracts%20for%20security"],["DynaSpAM: dynamic spatial architecture mapping using out of order instruction schedules",0.001893585,0.605750885,"https:\u002f\u002fscholar.google.com\u002fscholar?q=DynaSpAM%3A%20dynamic%20spatial%20architecture%20mapping%20using%20out%20of%20order%20instruction%20schedules"],["Sieve: scalable in-situ DRAM-based accelerator designs for massively parallel k-mer matching",0.001893585,0.60764447,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Sieve%3A%20scalable%20in-situ%20DRAM-based%20accelerator%20designs%20for%20massively%20parallel%20k-mer%20matching"],["ArMOR: defending against memory consistency model mismatches in heterogeneous architectures",0.001884279,0.609528749,"https:\u002f\u002fscholar.google.com\u002fscholar?q=ArMOR%3A%20defending%20against%20memory%20consistency%20model%20mismatches%20in%20heterogeneous%20architectures"],["Probable cause: the deanonymizing effects of approximate DRAM",0.001884279,0.611413028,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Probable%20cause%3A%20the%20deanonymizing%20effects%20of%20approximate%20DRAM"],["Fusion: design tradeoffs in coherent cache hierarchies for accelerators",0.001884279,0.613297307,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Fusion%3A%20design%20tradeoffs%20in%20coherent%20cache%20hierarchies%20for%20accelerators"],["Boosting access parallelism to PCM-based main memory",0.001884279,0.615181586,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Boosting%20access%20parallelism%20to%20PCM-based%20main%20memory"],["Lazy persistency: a high-performing and write-efficient software persistency technique",0.001884279,0.617065865,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Lazy%20persistency%3A%20a%20high-performing%20and%20write-efficient%20software%20persistency%20technique"],["IntelliNoC: a holistic design framework for energy-efficient and reliable on-chip communication for manycores",0.001884279,0.618950144,"https:\u002f\u002fscholar.google.com\u002fscholar?q=IntelliNoC%3A%20a%20holistic%20design%20framework%20for%20energy-efficient%20and%20reliable%20on-chip%20communication%20for%20manycores"],["Perforated page: supporting fragmented memory allocation for large pages",0.001884279,0.620834423,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Perforated%20page%3A%20supporting%20fragmented%20memory%20allocation%20for%20large%20pages"],["CODIC: a low-cost substrate for enabling custom in-DRAM functionalities and optimizations",0.001884279,0.622718702,"https:\u002f\u002fscholar.google.com\u002fscholar?q=CODIC%3A%20a%20low-cost%20substrate%20for%20enabling%20custom%20in-DRAM%20functionalities%20and%20optimizations"],["MOESI-prime: preventing coherence-induced hammering in commodity workloads",0.001884279,0.624602981,"https:\u002f\u002fscholar.google.com\u002fscholar?q=MOESI-prime%3A%20preventing%20coherence-induced%20hammering%20in%20commodity%20workloads"],["Lukewarm serverless functions: characterization and optimization",0.001884279,0.62648726,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Lukewarm%20serverless%20functions%3A%20characterization%20and%20optimization"],["Efficient execution of memory access phases using dataflow specialization",0.001874789,0.628362049,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Efficient%20execution%20of%20memory%20access%20phases%20using%20dataflow%20specialization"],["Peak efficiency aware scheduling for highly energy proportional servers",0.001874789,0.630236838,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Peak%20efficiency%20aware%20scheduling%20for%20highly%20energy%20proportional%20servers"],["CASH: supporting IaaS customers with a sub-core configurable architecture",0.001874789,0.632111627,"https:\u002f\u002fscholar.google.com\u002fscholar?q=CASH%3A%20supporting%20IaaS%20customers%20with%20a%20sub-core%20configurable%20architecture"],["Bespoke Processors for Applications with Ultra-low Area and Power Constraints",0.001874789,0.633986416,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Bespoke%20Processors%20for%20Applications%20with%20Ultra-low%20Area%20and%20Power%20Constraints"],["Criticality aware tiered cache hierarchy: a fundamental relook at multi-level cache hierarchies",0.001874789,0.635861205,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Criticality%20aware%20tiered%20cache%20hierarchy%3A%20a%20fundamental%20relook%20at%20multi-level%20cache%20hierarchies"],["CryoCore: a fast and dense processor architecture for cryogenic computing",0.001874789,0.637735994,"https:\u002f\u002fscholar.google.com\u002fscholar?q=CryoCore%3A%20a%20fast%20and%20dense%20processor%20architecture%20for%20cryogenic%20computing"],["FlexMiner: a pattern-aware accelerator for graph pattern mining",0.001874789,0.639610783,"https:\u002f\u002fscholar.google.com\u002fscholar?q=FlexMiner%3A%20a%20pattern-aware%20accelerator%20for%20graph%20pattern%20mining"],["APRES: improving cache efficiency by exploiting load characteristics on GPUs",0.001865107,0.64147589,"https:\u002f\u002fscholar.google.com\u002fscholar?q=APRES%3A%20improving%20cache%20efficiency%20by%20exploiting%20load%20characteristics%20on%20GPUs"],["Viyojit: Decoupling Battery and DRAM Capacities for Battery-Backed DRAM",0.001865107,0.643340997,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Viyojit%3A%20Decoupling%20Battery%20and%20DRAM%20Capacities%20for%20Battery-Backed%20DRAM"],["Robox: an end-to-end solution to accelerate autonomous control in robotics",0.001865107,0.645206104,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Robox%3A%20an%20end-to-end%20solution%20to%20accelerate%20autonomous%20control%20in%20robotics"],["CHEx86: context-sensitive enforcement of memory safety via microcode-enabled capabilities",0.001865107,0.647071211,"https:\u002f\u002fscholar.google.com\u002fscholar?q=CHEx86%3A%20context-sensitive%20enforcement%20of%20memory%20safety%20via%20microcode-enabled%20capabilities"],["JPEG-ACT: accelerating deep learning via transform-based lossy compression",0.001865107,0.648936318,"https:\u002f\u002fscholar.google.com\u002fscholar?q=JPEG-ACT%3A%20accelerating%20deep%20learning%20via%20transform-based%20lossy%20compression"],["Echo: compiler-based GPU memory footprint reduction for LSTM RNN training",0.001865107,0.650801425,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Echo%3A%20compiler-based%20GPU%20memory%20footprint%20reduction%20for%20LSTM%20RNN%20training"],["Confidential serverless made efficient with plug-in enclaves",0.001865107,0.652666532,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Confidential%20serverless%20made%20efficient%20with%20plug-in%20enclaves"],["Aggressive Pipelining of Irregular Applications on Reconfigurable Hardware",0.001855225,0.654521757,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Aggressive%20Pipelining%20of%20Irregular%20Applications%20on%20Reconfigurable%20Hardware"],["Genesis: a hardware acceleration framework for genomic data analysis",0.001855225,0.656376983,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Genesis%3A%20a%20hardware%20acceleration%20framework%20for%20genomic%20data%20analysis"],["Gorgon: accelerating machine learning from relational data",0.001855225,0.658232208,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Gorgon%3A%20accelerating%20machine%20learning%20from%20relational%20data"],["No-FAT: architectural support for low overhead memory safety checks",0.001855225,0.660087434,"https:\u002f\u002fscholar.google.com\u002fscholar?q=No-FAT%3A%20architectural%20support%20for%20low%20overhead%20memory%20safety%20checks"],["Manycore network interfaces for in-memory rack-scale computing",0.001845136,0.66193257,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Manycore%20network%20interfaces%20for%20in-memory%20rack-scale%20computing"],["Generic system calls for GPUs",0.001845136,0.663777706,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Generic%20system%20calls%20for%20GPUs"],["Relaxed persist ordering using strand persistency",0.001845136,0.665622841,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Relaxed%20persist%20ordering%20using%20strand%20persistency"],["Communication algorithm-architecture co-design for distributed deep learning",0.001845136,0.667467977,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Communication%20algorithm-architecture%20co-design%20for%20distributed%20deep%20learning"],["There's always a bigger fish: a clarifying analysis of a machine-learning-assisted side-channel attack",0.001845136,0.669313113,"https:\u002f\u002fscholar.google.com\u002fscholar?q=There%27s%20always%20a%20bigger%20fish%3A%20a%20clarifying%20analysis%20of%20a%20machine-learning-assisted%20side-channel%20attack"],["Coherence protocol for transparent management of scratchpad memories in shared memory manycore architectures",0.001834829,0.671147942,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Coherence%20protocol%20for%20transparent%20management%20of%20scratchpad%20memories%20in%20shared%20memory%20manycore%20architectures"],["All-inclusive ECC: thorough end-to-end protection for reliable computer memory",0.001834829,0.672982772,"https:\u002f\u002fscholar.google.com\u002fscholar?q=All-inclusive%20ECC%3A%20thorough%20end-to-end%20protection%20for%20reliable%20computer%20memory"],["AccQOC: accelerating quantum optimal control based pulse generation",0.001834829,0.674817601,"https:\u002f\u002fscholar.google.com\u002fscholar?q=AccQOC%3A%20accelerating%20quantum%20optimal%20control%20based%20pulse%20generation"],["Hoop: efficient hardware-assisted out-of-place update for non-volatile memory",0.001834829,0.67665243,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Hoop%3A%20efficient%20hardware-assisted%20out-of-place%20update%20for%20non-volatile%20memory"],["The anytime automaton",0.001824296,0.678476727,"https:\u002f\u002fscholar.google.com\u002fscholar?q=The%20anytime%20automaton"],["DICE: Compressing DRAM Caches for Bandwidth and Capacity",0.001824296,0.680301023,"https:\u002f\u002fscholar.google.com\u002fscholar?q=DICE%3A%20Compressing%20DRAM%20Caches%20for%20Bandwidth%20and%20Capacity"],["Software-hardware co-optimization for computational chemistry on superconducting quantum processors",0.001824296,0.682125319,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Software-hardware%20co-optimization%20for%20computational%20chemistry%20on%20superconducting%20quantum%20processors"],["A RISC-V in-network accelerator for flexible high-performance low-power packet processing",0.001824296,0.683949615,"https:\u002f\u002fscholar.google.com\u002fscholar?q=A%20RISC-V%20in-network%20accelerator%20for%20flexible%20high-performance%20low-power%20packet%20processing"],["VIP: virtualizing IP chains on handheld platforms",0.001813526,0.685763141,"https:\u002f\u002fscholar.google.com\u002fscholar?q=VIP%3A%20virtualizing%20IP%20chains%20on%20handheld%20platforms"],["Evaluation of an analog accelerator for linear algebra",0.001813526,0.687576668,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Evaluation%20of%20an%20analog%20accelerator%20for%20linear%20algebra"],["Exploiting page table locality for agile TLB prefetching",0.001813526,0.689390194,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Exploiting%20page%20table%20locality%20for%20agile%20TLB%20prefetching"],["Unlimited vector extension with data streaming support",0.001813526,0.69120372,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Unlimited%20vector%20extension%20with%20data%20streaming%20support"],["Energy efficient data encoding in DRAM channels exploiting data value similarity",0.001802509,0.693006229,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Energy%20efficient%20data%20encoding%20in%20DRAM%20channels%20exploiting%20data%20value%20similarity"],["ACCORD: enabling associativity for gigascale DRAM caches by coordinating way-install and way-prediction",0.001802509,0.694808738,"https:\u002f\u002fscholar.google.com\u002fscholar?q=ACCORD%3A%20enabling%20associativity%20for%20gigascale%20DRAM%20caches%20by%20coordinating%20way-install%20and%20way-prediction"],["Stitch: fusible heterogeneous accelerators enmeshed with many-core architecture for wearables",0.001802509,0.696611247,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Stitch%3A%20fusible%20heterogeneous%20accelerators%20enmeshed%20with%20many-core%20architecture%20for%20wearables"],["Rebooting virtual memory with midgard",0.001802509,0.698413756,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Rebooting%20virtual%20memory%20with%20midgard"],["SNS's not a synthesizer: a deep-learning-based synthesis predictor",0.001802509,0.700216265,"https:\u002f\u002fscholar.google.com\u002fscholar?q=SNS%27s%20not%20a%20synthesizer%3A%20a%20deep-learning-based%20synthesis%20predictor"],["Training personalized recommendation systems from (GPU) scratch: look forward not backwards",0.001802509,0.702018774,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Training%20personalized%20recommendation%20systems%20from%20%28GPU%29%20scratch%3A%20look%20forward%20not%20backwards"],["LAP: loop-block aware inclusion properties for energy-efficient asymmetric last level caches",0.001791232,0.703810006,"https:\u002f\u002fscholar.google.com\u002fscholar?q=LAP%3A%20loop-block%20aware%20inclusion%20properties%20for%20energy-efficient%20asymmetric%20last%20level%20caches"],["Scalable interconnects for reconfigurable spatial architectures",0.001791232,0.705601238,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Scalable%20interconnects%20for%20reconfigurable%20spatial%20architectures"],["Printed microprocessors",0.001791232,0.70739247,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Printed%20microprocessors"],["D\u00e9j\u00e0 view: spatio-temporal compute reuse for energy-efficient 360\u00b0 VR video streaming",0.001791232,0.709183703,"https:\u002f\u002fscholar.google.com\u002fscholar?q=D%C3%A9j%C3%A0%20view%3A%20spatio-temporal%20compute%20reuse%20for%20energy-efficient%20360%C2%B0%20VR%20video%20streaming"],["A specialized architecture for object serialization with applications to big data analytics",0.001791232,0.710974935,"https:\u002f\u002fscholar.google.com\u002fscholar?q=A%20specialized%20architecture%20for%20object%20serialization%20with%20applications%20to%20big%20data%20analytics"],["DRAF: a low-power DRAM-based reconfigurable acceleration fabric",0.001779684,0.712754618,"https:\u002f\u002fscholar.google.com\u002fscholar?q=DRAF%3A%20a%20low-power%20DRAM-based%20reconfigurable%20acceleration%20fabric"],["Get out of the valley: power-efficient address mapping for GPUs",0.001779684,0.714534302,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Get%20out%20of%20the%20valley%3A%20power-efficient%20address%20mapping%20for%20GPUs"],["SecDir: a secure directory to defeat directory side-channel attacks",0.001779684,0.716313986,"https:\u002f\u002fscholar.google.com\u002fscholar?q=SecDir%3A%20a%20secure%20directory%20to%20defeat%20directory%20side-channel%20attacks"],["T4: compiling sequential code for effective speculative parallelization in hardware",0.001779684,0.71809367,"https:\u002f\u002fscholar.google.com\u002fscholar?q=T4%3A%20compiling%20sequential%20code%20for%20effective%20speculative%20parallelization%20in%20hardware"],["Hardware-based domain virtualization for intra-process isolation of persistent memory objects",0.001779684,0.719873353,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Hardware-based%20domain%20virtualization%20for%20intra-process%20isolation%20of%20persistent%20memory%20objects"],["The virtual block interface: a flexible alternative to the conventional virtual memory framework",0.001779684,0.721653037,"https:\u002f\u002fscholar.google.com\u002fscholar?q=The%20virtual%20block%20interface%3A%20a%20flexible%20alternative%20to%20the%20conventional%20virtual%20memory%20framework"],["Designing calibration and expressivity-efficient instruction sets for quantum computing",0.001779684,0.723432721,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Designing%20calibration%20and%20expressivity-efficient%20instruction%20sets%20for%20quantum%20computing"],["NDMiner: accelerating graph pattern mining using near data processing",0.001779684,0.725212405,"https:\u002f\u002fscholar.google.com\u002fscholar?q=NDMiner%3A%20accelerating%20graph%20pattern%20mining%20using%20near%20data%20processing"],["Fractal: An Execution Model for Fine-Grain Nested Speculative Parallelism",0.00176785,0.726980255,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Fractal%3A%20An%20Execution%20Model%20for%20Fine-Grain%20Nested%20Speculative%20Parallelism"],["Bit-level perceptron prediction for indirect branches",0.00176785,0.728748105,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Bit-level%20perceptron%20prediction%20for%20indirect%20branches"],["Hardware-software co-design for brain-computer interfaces",0.00176785,0.730515955,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Hardware-software%20co-design%20for%20brain-computer%20interfaces"],["CryoGuard: a near refresh-free robust DRAM design for cryogenic computing",0.00176785,0.732283805,"https:\u002f\u002fscholar.google.com\u002fscholar?q=CryoGuard%3A%20a%20near%20refresh-free%20robust%20DRAM%20design%20for%20cryogenic%20computing"],["MeNDA: a near-memory multi-way merge solution for sparse transposition and dataflows",0.00176785,0.734051655,"https:\u002f\u002fscholar.google.com\u002fscholar?q=MeNDA%3A%20a%20near-memory%20multi-way%20merge%20solution%20for%20sparse%20transposition%20and%20dataflows"],["Base-victim compression: an opportunistic cache compression architecture",0.001755717,0.735807372,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Base-victim%20compression%3A%20an%20opportunistic%20cache%20compression%20architecture"],["Non-Speculative Load-Load Reordering in TSO",0.001755717,0.737563089,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Non-Speculative%20Load-Load%20Reordering%20in%20TSO"],["Division of labor: a more effective approach to prefetching",0.001755717,0.739318806,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Division%20of%20labor%3A%20a%20more%20effective%20approach%20to%20prefetching"],["Flexon: a flexible digital neuron for efficient spiking neural network simulations",0.001755717,0.741074523,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Flexon%3A%20a%20flexible%20digital%20neuron%20for%20efficient%20spiking%20neural%20network%20simulations"],["Tiny but mighty: designing and realizing scalable latency tolerance for manycore SoCs",0.001755717,0.742830239,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Tiny%20but%20mighty%3A%20designing%20and%20realizing%20scalable%20latency%20tolerance%20for%20manycore%20SoCs"],["Reducing world switches in virtualized environment with flexible cross-world calls",0.001743268,0.744573508,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Reducing%20world%20switches%20in%20virtualized%20environment%20with%20flexible%20cross-world%20calls"],["SEESAW: using superpages to improve VIPT caches",0.001743268,0.746316776,"https:\u002f\u002fscholar.google.com\u002fscholar?q=SEESAW%3A%20using%20superpages%20to%20improve%20VIPT%20caches"],["Adaptive memory-side last-level GPU caching",0.001743268,0.748060045,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Adaptive%20memory-side%20last-level%20GPU%20caching"],["XPC: architectural support for secure and efficient cross process call",0.001743268,0.749803313,"https:\u002f\u002fscholar.google.com\u002fscholar?q=XPC%3A%20architectural%20support%20for%20secure%20and%20efficient%20cross%20process%20call"],["The dark side of DNN pruning",0.001730488,0.751533801,"https:\u002f\u002fscholar.google.com\u002fscholar?q=The%20dark%20side%20of%20DNN%20pruning"],["Divide and conquer frontend bottleneck",0.001717357,0.753251159,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Divide%20and%20conquer%20frontend%20bottleneck"],["Accelerated seeding for genome sequence alignment with enumerated radix trees",0.001717357,0.754968516,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Accelerated%20seeding%20for%20genome%20sequence%20alignment%20with%20enumerated%20radix%20trees"],["Large-scale graph processing on FPGAs with caches for thousands of simultaneous misses",0.001717357,0.756685873,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Large-scale%20graph%20processing%20on%20FPGAs%20with%20caches%20for%20thousands%20of%20simultaneous%20misses"],["IntroSpectre: a pre-silicon framework for discovery and analysis of transient execution vulnerabilities",0.001717357,0.758403231,"https:\u002f\u002fscholar.google.com\u002fscholar?q=IntroSpectre%3A%20a%20pre-silicon%20framework%20for%20discovery%20and%20analysis%20of%20transient%20execution%20vulnerabilities"],["EDAM: edit distance tolerant approximate matching content addressable memory",0.001717357,0.760120588,"https:\u002f\u002fscholar.google.com\u002fscholar?q=EDAM%3A%20edit%20distance%20tolerant%20approximate%20matching%20content%20addressable%20memory"],["Asymmetry-aware work-stealing runtimes",0.001703857,0.761824445,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Asymmetry-aware%20work-stealing%20runtimes"],["The IBM zl5 high frequency mainframe branch predictor",0.001703857,0.763528302,"https:\u002f\u002fscholar.google.com\u002fscholar?q=The%20IBM%20zl5%20high%20frequency%20mainframe%20branch%20predictor"],["A case for hardware-based demand paging",0.001703857,0.765232158,"https:\u002f\u002fscholar.google.com\u002fscholar?q=A%20case%20for%20hardware-based%20demand%20paging"],["SHRINK: Reducing the ISA complexity via instruction recycling",0.001689965,0.766922123,"https:\u002f\u002fscholar.google.com\u002fscholar?q=SHRINK%3A%20Reducing%20the%20ISA%20complexity%20via%20instruction%20recycling"],["Callback: efficient synchronization without invalidation with a directory just for spin-waiting",0.001689965,0.768612088,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Callback%3A%20efficient%20synchronization%20without%20invalidation%20with%20a%20directory%20just%20for%20spin-waiting"],["ThermoGater: Thermally-Aware On-Chip Voltage Regulation",0.001689965,0.770302053,"https:\u002f\u002fscholar.google.com\u002fscholar?q=ThermoGater%3A%20Thermally-Aware%20On-Chip%20Voltage%20Regulation"],["Tailored page sizes",0.001689965,0.771992018,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Tailored%20page%20sizes"],["A cost-effective entangling prefetcher for instructions",0.001689965,0.773681983,"https:\u002f\u002fscholar.google.com\u002fscholar?q=A%20cost-effective%20entangling%20prefetcher%20for%20instructions"],["SpZip: architectural support for effective data compression in irregular applications",0.001689965,0.775371948,"https:\u002f\u002fscholar.google.com\u002fscholar?q=SpZip%3A%20architectural%20support%20for%20effective%20data%20compression%20in%20irregular%20applications"],["INSPIRE: in-storage private information retrieval via protocol and architecture co-design",0.001689965,0.777061913,"https:\u002f\u002fscholar.google.com\u002fscholar?q=INSPIRE%3A%20in-storage%20private%20information%20retrieval%20via%20protocol%20and%20architecture%20co-design"],["Increasing ising machine capacity with multi-chip architectures",0.001689965,0.778751878,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Increasing%20ising%20machine%20capacity%20with%20multi-chip%20architectures"],["MGX: near-zero overhead memory protection for data-intensive accelerators",0.001689965,0.780441843,"https:\u002f\u002fscholar.google.com\u002fscholar?q=MGX%3A%20near-zero%20overhead%20memory%20protection%20for%20data-intensive%20accelerators"],["Accelerating asynchronous programs through event sneak peek",0.001675658,0.782117501,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Accelerating%20asynchronous%20programs%20through%20event%20sneak%20peek"],["Hiding intermittent information leakage with architectural support for blinking",0.001675658,0.783793159,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Hiding%20intermittent%20information%20leakage%20with%20architectural%20support%20for%20blinking"],["A bus authentication and anti-probing architecture extending hardware trusted computing base off CPU chips and beyond",0.001675658,0.785468817,"https:\u002f\u002fscholar.google.com\u002fscholar?q=A%20bus%20authentication%20and%20anti-probing%20architecture%20extending%20hardware%20trusted%20computing%20base%20off%20CPU%20chips%20and%20beyond"],["ZnG: architecting GPU multi-processors with new flash for scalable data analysis",0.001675658,0.787144476,"https:\u002f\u002fscholar.google.com\u002fscholar?q=ZnG%3A%20architecting%20GPU%20multi-processors%20with%20new%20flash%20for%20scalable%20data%20analysis"],["REDUCT: keep it close, keep it cool!: efficient scaling of DNN inference on multi-core CPUs with near-cache compute",0.001675658,0.788820134,"https:\u002f\u002fscholar.google.com\u002fscholar?q=REDUCT%3A%20keep%20it%20close%2C%20keep%20it%20cool%21%3A%20efficient%20scaling%20of%20DNN%20inference%20on%20multi-core%20CPUs%20with%20near-cache%20compute"],["Satori: efficient and fair resource partitioning by sacrificing short-term benefits for long-term gains",0.001675658,0.790495792,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Satori%3A%20efficient%20and%20fair%20resource%20partitioning%20by%20sacrificing%20short-term%20benefits%20for%20long-term%20gains"],["HiveMind: a hardware-software system stack for serverless edge swarms",0.001675658,0.79217145,"https:\u002f\u002fscholar.google.com\u002fscholar?q=HiveMind%3A%20a%20hardware-software%20system%20stack%20for%20serverless%20edge%20swarms"],["Efficient synonym filtering and scalable delayed translation for hybrid virtual caching",0.001660911,0.793832362,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Efficient%20synonym%20filtering%20and%20scalable%20delayed%20translation%20for%20hybrid%20virtual%20caching"],["Efficient synonym filtering and scalable delayed translation for hybrid virtual caching",0.001660911,0.795493273,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Efficient%20synonym%20filtering%20and%20scalable%20delayed%20translation%20for%20hybrid%20virtual%20caching"],["Decoupled Affine Computation for SIMT GPUs",0.001660911,0.797154184,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Decoupled%20Affine%20Computation%20for%20SIMT%20GPUs"],["Exploring predictive replacement policies for instruction cache and branch target buffer",0.001660911,0.798815096,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Exploring%20predictive%20replacement%20policies%20for%20instruction%20cache%20and%20branch%20target%20buffer"],["Fine-grained warm water cooling for improving datacenter economy",0.001660911,0.800476007,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Fine-grained%20warm%20water%20cooling%20for%20improving%20datacenter%20economy"],["NvMR: non-volatile memory renaming for intermittent computing",0.001660911,0.802136918,"https:\u002f\u002fscholar.google.com\u002fscholar?q=NvMR%3A%20non-volatile%20memory%20renaming%20for%20intermittent%20computing"],["Computer performance microscopy with Shim",0.001645696,0.803782614,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Computer%20performance%20microscopy%20with%20Shim"],["SLIP: reducing wire energy in the memory hierarchy",0.001645696,0.80542831,"https:\u002f\u002fscholar.google.com\u002fscholar?q=SLIP%3A%20reducing%20wire%20energy%20in%20the%20memory%20hierarchy"],["MBus: an ultra-low power interconnect bus for next generation nanopower systems",0.001645696,0.807074007,"https:\u002f\u002fscholar.google.com\u002fscholar?q=MBus%3A%20an%20ultra-low%20power%20interconnect%20bus%20for%20next%20generation%20nanopower%20systems"],["Architectural Support for Server-Side PHP Processing",0.001645696,0.808719703,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Architectural%20Support%20for%20Server-Side%20PHP%20Processing"],["MorLog: morphable hardware logging for atomic persistence in non-volatile main memory",0.001645696,0.810365399,"https:\u002f\u002fscholar.google.com\u002fscholar?q=MorLog%3A%20morphable%20hardware%20logging%20for%20atomic%20persistence%20in%20non-volatile%20main%20memory"],["Hetero-ViTAL: a virtualization stack for heterogeneous FPGA clusters",0.001645696,0.812011095,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Hetero-ViTAL%3A%20a%20virtualization%20stack%20for%20heterogeneous%20FPGA%20clusters"],["Failure sentinels: ubiquitous just-in-time intermittent computation via low-cost hardware support for voltage monitoring",0.001645696,0.813656791,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Failure%20sentinels%3A%20ubiquitous%20just-in-time%20intermittent%20computation%20via%20low-cost%20hardware%20support%20for%20voltage%20monitoring"],["Cambricon-Q: a hybrid architecture for efficient training",0.001645696,0.815302487,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Cambricon-Q%3A%20a%20hybrid%20architecture%20for%20efficient%20training"],["ZeR\u00d8: zero-overhead resilient operation under pointer integrity attacks",0.001645696,0.816948183,"https:\u002f\u002fscholar.google.com\u002fscholar?q=ZeR%C3%98%3A%20zero-overhead%20resilient%20operation%20under%20pointer%20integrity%20attacks"],["t\u00e4k\u014d: a polymorphic cache hierarchy for general-purpose optimization of data movement",0.001645696,0.818593879,"https:\u002f\u002fscholar.google.com\u002fscholar?q=t%C3%A4k%C5%8D%3A%20a%20polymorphic%20cache%20hierarchy%20for%20general-purpose%20optimization%20of%20data%20movement"],["PPMLAC: high performance chipset architecture for secure multi-party computation",0.001645696,0.820239576,"https:\u002f\u002fscholar.google.com\u002fscholar?q=PPMLAC%3A%20high%20performance%20chipset%20architecture%20for%20secure%20multi-party%20computation"],["A synthesis framework for stitching surface code with superconducting quantum devices",0.001645696,0.821885272,"https:\u002f\u002fscholar.google.com\u002fscholar?q=A%20synthesis%20framework%20for%20stitching%20surface%20code%20with%20superconducting%20quantum%20devices"],["XQsim: modeling cross-technology control processors for 10+K qubit quantum computers",0.001645696,0.823530968,"https:\u002f\u002fscholar.google.com\u002fscholar?q=XQsim%3A%20modeling%20cross-technology%20control%20processors%20for%2010%2BK%20qubit%20quantum%20computers"],["Protogen: automatically generating directory cache coherence protocols from atomic specifications",0.001629982,0.82516095,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Protogen%3A%20automatically%20generating%20directory%20cache%20coherence%20protocols%20from%20atomic%20specifications"],["SCU: a GPU stream compaction unit for graph processing",0.001629982,0.826790932,"https:\u002f\u002fscholar.google.com\u002fscholar?q=SCU%3A%20a%20GPU%20stream%20compaction%20unit%20for%20graph%20processing"],["HALO: accelerating flow classification for scalable packet processing in NFV",0.001629982,0.828420914,"https:\u002f\u002fscholar.google.com\u002fscholar?q=HALO%3A%20accelerating%20flow%20classification%20for%20scalable%20packet%20processing%20in%20NFV"],["Taming the zoo: the unified GraphIt compiler framework for novel architectures",0.001629982,0.830050896,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Taming%20the%20zoo%3A%20the%20unified%20GraphIt%20compiler%20framework%20for%20novel%20architectures"],["MiSAR: minimalistic synchronization accelerator with resource overflow management",0.001613735,0.831664631,"https:\u002f\u002fscholar.google.com\u002fscholar?q=MiSAR%3A%20minimalistic%20synchronization%20accelerator%20with%20resource%20overflow%20management"],["Rescuing uncorrectable fault patterns in on-chip memories through error pattern transformation",0.001613735,0.833278366,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Rescuing%20uncorrectable%20fault%20patterns%20in%20on-chip%20memories%20through%20error%20pattern%20transformation"],["Post-silicon CPU adaptation made practical using machine learning",0.001613735,0.834892101,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Post-silicon%20CPU%20adaptation%20made%20practical%20using%20machine%20learning"],["Compact leakage-free support for integrity and reliability",0.001613735,0.836505836,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Compact%20leakage-free%20support%20for%20integrity%20and%20reliability"],["Vector runahead",0.001613735,0.838119571,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Vector%20runahead"],["IChannels: exploiting current management mechanisms to create covert channels in modern processors",0.001613735,0.839733307,"https:\u002f\u002fscholar.google.com\u002fscholar?q=IChannels%3A%20exploiting%20current%20management%20mechanisms%20to%20create%20covert%20channels%20in%20modern%20processors"],["RACOD: algorithm\u002fhardware co-design for mobile robot path planning",0.001613735,0.841347042,"https:\u002f\u002fscholar.google.com\u002fscholar?q=RACOD%3A%20algorithm\u002fhardware%20co-design%20for%20mobile%20robot%20path%20planning"],["There and Back Again: Optimizing the Interconnect in Networks of Memory Cubes",0.001596918,0.84294396,"https:\u002f\u002fscholar.google.com\u002fscholar?q=There%20and%20Back%20Again%3A%20Optimizing%20the%20Interconnect%20in%20Networks%20of%20Memory%20Cubes"],["Footprint: Regulating Routing Adaptiveness in Networks-on-Chip",0.001596918,0.844540878,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Footprint%3A%20Regulating%20Routing%20Adaptiveness%20in%20Networks-on-Chip"],["Aurochs: an architecture for dataflow threads",0.001596918,0.846137796,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Aurochs%3A%20an%20architecture%20for%20dataflow%20threads"],["Superconducting computing with alternating logic elements",0.001596918,0.847734714,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Superconducting%20computing%20with%20alternating%20logic%20elements"],["TDGraph: a topology-driven accelerator for high-performance streaming graph processing",0.001596918,0.849331632,"https:\u002f\u002fscholar.google.com\u002fscholar?q=TDGraph%3A%20a%20topology-driven%20accelerator%20for%20high-performance%20streaming%20graph%20processing"],["uBrain: a unary brain computer interface",0.001596918,0.85092855,"https:\u002f\u002fscholar.google.com\u002fscholar?q=uBrain%3A%20a%20unary%20brain%20computer%20interface"],["Cascading structured pruning: enabling high data reuse for sparse DNN accelerators",0.001596918,0.852525468,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Cascading%20structured%20pruning%3A%20enabling%20high%20data%20reuse%20for%20sparse%20DNN%20accelerators"],["A Programmable Galois Field Processor for the Internet of Things",0.001579489,0.854104957,"https:\u002f\u002fscholar.google.com\u002fscholar?q=A%20Programmable%20Galois%20Field%20Processor%20for%20the%20Internet%20of%20Things"],["Yukta: multilayer resource controllers to maximize efficiency",0.001579489,0.855684446,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Yukta%3A%20multilayer%20resource%20controllers%20to%20maximize%20efficiency"],["Nonblocking memory refresh",0.001579489,0.857263936,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Nonblocking%20memory%20refresh"],["Mobilizing the micro-ops: exploiting context sensitive decoding for security and energy efficiency",0.001579489,0.858843425,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Mobilizing%20the%20micro-ops%3A%20exploiting%20context%20sensitive%20decoding%20for%20security%20and%20energy%20efficiency"],["OO- VR: NUMA friendly object-oriented VR rendering framework for future NUMA-based multi-GPU systems",0.001579489,0.860422914,"https:\u002f\u002fscholar.google.com\u002fscholar?q=OO-%20VR%3A%20NUMA%20friendly%20object-oriented%20VR%20rendering%20framework%20for%20future%20NUMA-based%20multi-GPU%20systems"],["InvisiPage: oblivious demand paging for secure enclaves",0.001579489,0.862002404,"https:\u002f\u002fscholar.google.com\u002fscholar?q=InvisiPage%3A%20oblivious%20demand%20paging%20for%20secure%20enclaves"],["GraphABCD: scaling out graph analytics with asynchronous block coordinate descent",0.001579489,0.863581893,"https:\u002f\u002fscholar.google.com\u002fscholar?q=GraphABCD%3A%20scaling%20out%20graph%20analytics%20with%20asynchronous%20block%20coordinate%20descent"],["Packet chasing: spying on network packets over a cache side-channel",0.001579489,0.865161382,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Packet%20chasing%3A%20spying%20on%20network%20packets%20over%20a%20cache%20side-channel"],["Thermometer: profile-guided btb replacement for data center applications",0.001579489,0.866740871,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Thermometer%3A%20profile-guided%20btb%20replacement%20for%20data%20center%20applications"],["Emerald: graphics modeling for SoC systems",0.001561403,0.868302274,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Emerald%3A%20graphics%20modeling%20for%20SoC%20systems"],["SOFF: an OpenCL high-level synthesis framework for FPGAs",0.001561403,0.869863677,"https:\u002f\u002fscholar.google.com\u002fscholar?q=SOFF%3A%20an%20OpenCL%20high-level%20synthesis%20framework%20for%20FPGAs"],["GCoM: a detailed GPU core model for accurate analytical modeling of modern GPUs",0.001561403,0.871425079,"https:\u002f\u002fscholar.google.com\u002fscholar?q=GCoM%3A%20a%20detailed%20GPU%20core%20model%20for%20accurate%20analytical%20modeling%20of%20modern%20GPUs"],["Accelerating markov random field inference using molecular optical gibbs sampling units",0.001542607,0.872967686,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Accelerating%20markov%20random%20field%20inference%20using%20molecular%20optical%20gibbs%20sampling%20units"],["Scaling datacenter accelerators with compute-reuse architectures",0.001542607,0.874510293,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Scaling%20datacenter%20accelerators%20with%20compute-reuse%20architectures"],["BabelFish: fusing address translations for containers",0.001542607,0.876052899,"https:\u002f\u002fscholar.google.com\u002fscholar?q=BabelFish%3A%20fusing%20address%20translations%20for%20containers"],["\u03b7-LSTM: co-designing highly-efficient large LSTM training via exploiting memory-saving and architectural design opportunities",0.001542607,0.877595506,"https:\u002f\u002fscholar.google.com\u002fscholar?q=%CE%B7-LSTM%3A%20co-designing%20highly-efficient%20large%20LSTM%20training%20via%20exploiting%20memory-saving%20and%20architectural%20design%20opportunities"],["A Programmable Hardware Accelerator for Simulating Dynamical Systems",0.001523043,0.879118549,"https:\u002f\u002fscholar.google.com\u002fscholar?q=A%20Programmable%20Hardware%20Accelerator%20for%20Simulating%20Dynamical%20Systems"],["Securing GPU via region-based bounds checking",0.001523043,0.880641592,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Securing%20GPU%20via%20region-based%20bounds%20checking"],["FlexiCores: low footprint, high yield, field reprogrammable flexible microprocessors",0.001523043,0.882164636,"https:\u002f\u002fscholar.google.com\u002fscholar?q=FlexiCores%3A%20low%20footprint%2C%20high%20yield%2C%20field%20reprogrammable%20flexible%20microprocessors"],["Efficiently scaling out-of-order cores for simultaneous multithreading",0.001502647,0.883667283,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Efficiently%20scaling%20out-of-order%20cores%20for%20simultaneous%20multithreading"],["XPro: A Cross-End Processing Architecture for Data Analytics in Wearables",0.001502647,0.88516993,"https:\u002f\u002fscholar.google.com\u002fscholar?q=XPro%3A%20A%20Cross-End%20Processing%20Architecture%20for%20Data%20Analytics%20in%20Wearables"],["Accelerating GPU Hardware Transactional Memory with Snapshot Isolation",0.001502647,0.886672577,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Accelerating%20GPU%20Hardware%20Transactional%20Memory%20with%20Snapshot%20Isolation"],["Focused value prediction",0.001502647,0.888175224,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Focused%20value%20prediction"],["Efficiently supporting dynamic task parallelism on heterogeneous cache-coherent systems",0.001502647,0.889677871,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Efficiently%20supporting%20dynamic%20task%20parallelism%20on%20heterogeneous%20cache-coherent%20systems"],["TimeCache: using time to eliminate cache side channels when sharing software",0.001502647,0.891180518,"https:\u002f\u002fscholar.google.com\u002fscholar?q=TimeCache%3A%20using%20time%20to%20eliminate%20cache%20side%20channels%20when%20sharing%20software"],["Accelerating database analytic query workloads using an associative processor",0.001502647,0.892683165,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Accelerating%20database%20analytic%20query%20workloads%20using%20an%20associative%20processor"],["Master of none acceleration: a comparison of accelerator architectures for analytical query processing",0.001481344,0.894164509,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Master%20of%20none%20acceleration%3A%20a%20comparison%20of%20accelerator%20architectures%20for%20analytical%20query%20processing"],["Tvarak: software-managed hardware offload for redundancy in direct-access NVM storage",0.00145905,0.895623559,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Tvarak%3A%20software-managed%20hardware%20offload%20for%20redundancy%20in%20direct-access%20NVM%20storage"],["Efficient multi-GPU shared memory via automatic optimization of fine-grained transfers",0.00145905,0.897082609,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Efficient%20multi-GPU%20shared%20memory%20via%20automatic%20optimization%20of%20fine-grained%20transfers"],["BlockMaestro: enabling programmer-transparent task-based execution in GPU systems",0.00145905,0.898541659,"https:\u002f\u002fscholar.google.com\u002fscholar?q=BlockMaestro%3A%20enabling%20programmer-transparent%20task-based%20execution%20in%20GPU%20systems"],["Gearbox: a case for supporting accumulation dispatching and hybrid partitioning in PIM-based accelerators",0.00145905,0.900000709,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Gearbox%3A%20a%20case%20for%20supporting%20accumulation%20dispatching%20and%20hybrid%20partitioning%20in%20PIM-based%20accelerators"],["RelaxFault memory repair",0.001435668,0.901436377,"https:\u002f\u002fscholar.google.com\u002fscholar?q=RelaxFault%20memory%20repair"],["Architecting a stochastic computing unit with molecular optical devices",0.001435668,0.902872044,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Architecting%20a%20stochastic%20computing%20unit%20with%20molecular%20optical%20devices"],["Linebacker: preserving victim cache lines in idle register files of GPUs",0.001435668,0.904307712,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Linebacker%3A%20preserving%20victim%20cache%20lines%20in%20idle%20register%20files%20of%20GPUs"],["Free atomics: hardware atomic operations without fences",0.001435668,0.90574338,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Free%20atomics%3A%20hardware%20atomic%20operations%20without%20fences"],["Clean: a race detector with cleaner semantics",0.001411086,0.907154466,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Clean%3A%20a%20race%20detector%20with%20cleaner%20semantics"],["CHARSTAR: Clock Hierarchy Aware Resource Scaling in Tiled ARchitectures",0.001411086,0.908565552,"https:\u002f\u002fscholar.google.com\u002fscholar?q=CHARSTAR%3A%20Clock%20Hierarchy%20Aware%20Resource%20Scaling%20in%20Tiled%20ARchitectures"],["Non-speculative store coalescing in total store order",0.001411086,0.909976639,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Non-speculative%20store%20coalescing%20in%20total%20store%20order"],["PES: proactive event scheduling for responsive and energy-efficient mobile web computing",0.001411086,0.911387725,"https:\u002f\u002fscholar.google.com\u002fscholar?q=PES%3A%20proactive%20event%20scheduling%20for%20responsive%20and%20energy-efficient%20mobile%20web%20computing"],["AxMemo: hardware-compiler co-design for approximate code memoization",0.001411086,0.912798811,"https:\u002f\u002fscholar.google.com\u002fscholar?q=AxMemo%3A%20hardware-compiler%20co-design%20for%20approximate%20code%20memoization"],["Using SMT to accelerate nested virtualization",0.001411086,0.914209897,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Using%20SMT%20to%20accelerate%20nested%20virtualization"],["Energy efficiency boost in the AI-infused POWER10 processor",0.001411086,0.915620983,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Energy%20efficiency%20boost%20in%20the%20AI-infused%20POWER10%20processor"],["SIMD2: a generalized matrix instruction set for accelerating tensor computation beyond GEMM",0.001411086,0.917032069,"https:\u002f\u002fscholar.google.com\u002fscholar?q=SIMD2%3A%20a%20generalized%20matrix%20instruction%20set%20for%20accelerating%20tensor%20computation%20beyond%20GEMM"],["Cost-effective speculative scheduling in high performance processors",0.001385175,0.918417245,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Cost-effective%20speculative%20scheduling%20in%20high%20performance%20processors"],["Heat to power: thermal energy harvesting and recycling for warm water-cooled datacenters",0.001385175,0.91980242,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Heat%20to%20power%3A%20thermal%20energy%20harvesting%20and%20recycling%20for%20warm%20water-cooled%20datacenters"],["RingCNN: exploiting algebraically-sparse ring tensors for energy-efficient CNN-based computational imaging",0.001385175,0.921187595,"https:\u002f\u002fscholar.google.com\u002fscholar?q=RingCNN%3A%20exploiting%20algebraically-sparse%20ring%20tensors%20for%20energy-efficient%20CNN-based%20computational%20imaging"],["X-cache: a modular architecture for domain-specific caches",0.001385175,0.92257277,"https:\u002f\u002fscholar.google.com\u002fscholar?q=X-cache%3A%20a%20modular%20architecture%20for%20domain-specific%20caches"],["EyeCoD: eye tracking system acceleration via flatcam-based algorithm & accelerator co-design",0.001385175,0.923957945,"https:\u002f\u002fscholar.google.com\u002fscholar?q=EyeCoD%3A%20eye%20tracking%20system%20acceleration%20via%20flatcam-based%20algorithm%20%26%20accelerator%20co-design"],["DCS-ctrl: a fast and flexible device-control mechanism for device-centric server architecture",0.001357783,0.925315728,"https:\u002f\u002fscholar.google.com\u002fscholar?q=DCS-ctrl%3A%20a%20fast%20and%20flexible%20device-control%20mechanism%20for%20device-centric%20server%20architecture"],["PMNet: in-network data persistence",0.001357783,0.92667351,"https:\u002f\u002fscholar.google.com\u002fscholar?q=PMNet%3A%20in-network%20data%20persistence"],["A scalable architecture for reprioritizing ordered parallelism",0.001357783,0.928031293,"https:\u002f\u002fscholar.google.com\u002fscholar?q=A%20scalable%20architecture%20for%20reprioritizing%20ordered%20parallelism"],["Dynamic global adaptive routing in high-radix networks",0.001357783,0.929389076,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Dynamic%20global%20adaptive%20routing%20in%20high-radix%20networks"],["Opportunistic competition overhead reduction for expediting critical section in NoC based CMPs",0.001328729,0.930717805,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Opportunistic%20competition%20overhead%20reduction%20for%20expediting%20critical%20section%20in%20NoC%20based%20CMPs"],["MTraceCheck: Validating Non-Deterministic Behavior of Memory Consistency Models in Post-Silicon Validation",0.001328729,0.932046534,"https:\u002f\u002fscholar.google.com\u002fscholar?q=MTraceCheck%3A%20Validating%20Non-Deterministic%20Behavior%20of%20Memory%20Consistency%20Models%20in%20Post-Silicon%20Validation"],["Redundant Memory Array Architecture for Efficient Selective Protection",0.001328729,0.933375263,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Redundant%20Memory%20Array%20Architecture%20for%20Efficient%20Selective%20Protection"],["Slipstream processors revisited: exploiting branch sets",0.001328729,0.934703992,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Slipstream%20processors%20revisited%3A%20exploiting%20branch%20sets"],["Dv\u00e9: improving DRAM reliability and performance on-demand via coherent replication",0.001328729,0.936032721,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Dv%C3%A9%3A%20improving%20DRAM%20reliability%20and%20performance%20on-demand%20via%20coherent%20replication"],["CaSMap: agile mapper for reconfigurable spatial architectures by automatically clustering intermediate representations and scattering mapping process",0.001328729,0.93736145,"https:\u002f\u002fscholar.google.com\u002fscholar?q=CaSMap%3A%20agile%20mapper%20for%20reconfigurable%20spatial%20architectures%20by%20automatically%20clustering%20intermediate%20representations%20and%20scattering%20mapping%20process"],["Branch vanguard: decomposing branch functionality into prediction and resolution instructions",0.0012978,0.93865925,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Branch%20vanguard%3A%20decomposing%20branch%20functionality%20into%20prediction%20and%20resolution%20instructions"],["ShortCut: Architectural Support for Fast Object Access in Scripting Languages",0.0012978,0.939957049,"https:\u002f\u002fscholar.google.com\u002fscholar?q=ShortCut%3A%20Architectural%20Support%20for%20Fast%20Object%20Access%20in%20Scripting%20Languages"],["Filter caching for free: the untapped potential of the store-buffer",0.0012978,0.941254849,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Filter%20caching%20for%20free%3A%20the%20untapped%20potential%20of%20the%20store-buffer"],["High-performance deep-learning coprocessor integrated into x86 SoC with server-class CPUs",0.0012978,0.942552649,"https:\u002f\u002fscholar.google.com\u002fscholar?q=High-performance%20deep-learning%20coprocessor%20integrated%20into%20x86%20SoC%20with%20server-class%20CPUs"],["Mocktails: capturing the memory behaviour of proprietary mobile architectures",0.0012978,0.943850449,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Mocktails%3A%20capturing%20the%20memory%20behaviour%20of%20proprietary%20mobile%20architectures"],["HieraGen: automated generation of concurrent, hierarchical cache coherence protocols",0.0012978,0.945148248,"https:\u002f\u002fscholar.google.com\u002fscholar?q=HieraGen%3A%20automated%20generation%20of%20concurrent%2C%20hierarchical%20cache%20coherence%20protocols"],["ScoRD: a scoped race detector for GPUs",0.0012978,0.946446048,"https:\u002f\u002fscholar.google.com\u002fscholar?q=ScoRD%3A%20a%20scoped%20race%20detector%20for%20GPUs"],["NVOverlay: enabling efficient and scalable high-frequency snapshotting to NVM",0.0012978,0.947743848,"https:\u002f\u002fscholar.google.com\u002fscholar?q=NVOverlay%3A%20enabling%20efficient%20and%20scalable%20high-frequency%20snapshotting%20to%20NVM"],["PS-ORAM: efficient crash consistency support for oblivious RAM on NVM",0.0012978,0.949041648,"https:\u002f\u002fscholar.google.com\u002fscholar?q=PS-ORAM%3A%20efficient%20crash%20consistency%20support%20for%20oblivious%20RAM%20on%20NVM"],["ASAP: architecture support for asynchronous persistence",0.0012978,0.950339447,"https:\u002f\u002fscholar.google.com\u002fscholar?q=ASAP%3A%20architecture%20support%20for%20asynchronous%20persistence"],["Register file prefetching",0.0012978,0.951637247,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Register%20file%20prefetching"],["HetCore: TFET-CMOS hetero-device architecture for CPUs and GPUs",0.001264736,0.952901983,"https:\u002f\u002fscholar.google.com\u002fscholar?q=HetCore%3A%20TFET-CMOS%20hetero-device%20architecture%20for%20CPUs%20and%20GPUs"],["PF-DRAM: a precharge-free DRAM structure",0.001264736,0.954166719,"https:\u002f\u002fscholar.google.com\u002fscholar?q=PF-DRAM%3A%20a%20precharge-free%20DRAM%20structure"],["FastTrack: leveraging heterogeneous FPGA wires to design low-cost high-performance soft NoCs",0.00122922,0.955395939,"https:\u002f\u002fscholar.google.com\u002fscholar?q=FastTrack%3A%20leveraging%20heterogeneous%20FPGA%20wires%20to%20design%20low-cost%20high-performance%20soft%20NoCs"],["Time squeezing for tiny devices",0.00122922,0.956625159,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Time%20squeezing%20for%20tiny%20devices"],["Flick: fast and lightweight ISA-crossing call for heterogeneous-ISA environments",0.00122922,0.95785438,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Flick%3A%20fast%20and%20lightweight%20ISA-crossing%20call%20for%20heterogeneous-ISA%20environments"],["TransForm: formally specifying transistency models and synthesizing enhanced litmus tests",0.00122922,0.9590836,"https:\u002f\u002fscholar.google.com\u002fscholar?q=TransForm%3A%20formally%20specifying%20transistency%20models%20and%20synthesizing%20enhanced%20litmus%20tests"],["Managing reliability skew in DNA storage",0.00122922,0.960312821,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Managing%20reliability%20skew%20in%20DNA%20storage"],["Virtual melting temperature: managing server load to minimize cooling overhead with phase change materials",0.001190861,0.961503682,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Virtual%20melting%20temperature%3A%20managing%20server%20load%20to%20minimize%20cooling%20overhead%20with%20phase%20change%20materials"],["Hardware supported permission checks on persistent objects for performance and programmability",0.001190861,0.962694543,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Hardware%20supported%20permission%20checks%20on%20persistent%20objects%20for%20performance%20and%20programmability"],["Retracted on May 10, 2023: TPShare: a time-space sharing scheduling abstraction for shared cloud via vertical labels",0.001190861,0.963885404,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Retracted%20on%20May%2010%2C%202023%3A%20TPShare%3A%20a%20time-space%20sharing%20scheduling%20abstraction%20for%20shared%20cloud%20via%20vertical%20labels"],["Quantifying server memory frequency margin and using it to improve performance in HPC systems",0.001190861,0.965076265,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Quantifying%20server%20memory%20frequency%20margin%20and%20using%20it%20to%20improve%20performance%20in%20HPC%20systems"],["NASA: accelerating neural network design with a NAS processor",0.001190861,0.966267125,"https:\u002f\u002fscholar.google.com\u002fscholar?q=NASA%3A%20accelerating%20neural%20network%20design%20with%20a%20NAS%20processor"],["SoftVN: efficient memory protection via software-provided version numbers",0.001190861,0.967457986,"https:\u002f\u002fscholar.google.com\u002fscholar?q=SoftVN%3A%20efficient%20memory%20protection%20via%20software-provided%20version%20numbers"],["Anticipating and eliminating redundant computations in accelerated sparse training",0.001190861,0.968648847,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Anticipating%20and%20eliminating%20redundant%20computations%20in%20accelerated%20sparse%20training"],["PowerChop: identifying and managing non-critical units in hybrid processor architectures",0.001149162,0.969798009,"https:\u002f\u002fscholar.google.com\u002fscholar?q=PowerChop%3A%20identifying%20and%20managing%20non-critical%20units%20in%20hybrid%20processor%20architectures"],["Future vector microprocessor extensions for data aggregations",0.001149162,0.970947171,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Future%20vector%20microprocessor%20extensions%20for%20data%20aggregations"],["Retracted on January 26, 2021: 3D-based video recognition acceleration by leveraging temporal locality",0.001149162,0.972096333,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Retracted%20on%20January%2026%2C%202021%3A%203D-based%20video%20recognition%20acceleration%20by%20leveraging%20temporal%20locality"],["FaultHound: value-locality-based soft-fault tolerance",0.001103486,0.973199818,"https:\u002f\u002fscholar.google.com\u002fscholar?q=FaultHound%3A%20value-locality-based%20soft-fault%20tolerance"],["Short-circuit dispatch: accelerating virtual machine interpreters on embedded processors",0.001103486,0.974303304,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Short-circuit%20dispatch%3A%20accelerating%20virtual%20machine%20interpreters%20on%20embedded%20processors"],["TCEP: traffic consolidation for energy-proportional high-radix networks",0.001103486,0.97540679,"https:\u002f\u002fscholar.google.com\u002fscholar?q=TCEP%3A%20traffic%20consolidation%20for%20energy-proportional%20high-radix%20networks"],["Data compression accelerator on IBM POWER9 and z15 processors",0.001103486,0.976510275,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Data%20compression%20accelerator%20on%20IBM%20POWER9%20and%20z15%20processors"],["Auto-predication of critical branches",0.001103486,0.977613761,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Auto-predication%20of%20critical%20branches"],["Lelantus: fine-granularity copy-on-write operations for secure non-volatile memories",0.001103486,0.978717246,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Lelantus%3A%20fine-granularity%20copy-on-write%20operations%20for%20secure%20non-volatile%20memories"],["Check-in: in-storage checkpointing for key-value store system leveraging flash-based SSDs",0.001103486,0.979820732,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Check-in%3A%20in-storage%20checkpointing%20for%20key-value%20store%20system%20leveraging%20flash-based%20SSDs"],["Independent forward progress of work-groups",0.001103486,0.980924218,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Independent%20forward%20progress%20of%20work-groups"],["Supporting legacy libraries on non-volatile memory: a user-transparent approach",0.001103486,0.982027703,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Supporting%20legacy%20libraries%20on%20non-volatile%20memory%3A%20a%20user-transparent%20approach"],["Revamping storage class memory with hardware automated memory-over-storage solution",0.001103486,0.983131189,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Revamping%20storage%20class%20memory%20with%20hardware%20automated%20memory-over-storage%20solution"],["NASGuard: a novel accelerator architecture for robust neural architecture search (NAS) networks",0.001103486,0.984234674,"https:\u002f\u002fscholar.google.com\u002fscholar?q=NASGuard%3A%20a%20novel%20accelerator%20architecture%20for%20robust%20neural%20architecture%20search%20%28NAS%29%20networks"],["Charm: a language for closed-form high-level architecture modeling",0.001052993,0.985287667,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Charm%3A%20a%20language%20for%20closed-form%20high-level%20architecture%20modeling"],["FFCCD: fence-free crash-consistent concurrent defragmentation for persistent memory",0.001052993,0.98634066,"https:\u002f\u002fscholar.google.com\u002fscholar?q=FFCCD%3A%20fence-free%20crash-consistent%20concurrent%20defragmentation%20for%20persistent%20memory"],["HyperTRIO: hyper-tenant translation of I\u002fO addresses",0.000996547,0.987337207,"https:\u002f\u002fscholar.google.com\u002fscholar?q=HyperTRIO%3A%20hyper-tenant%20translation%20of%20I\u002fO%20addresses"],["Commutative data reordering: a new technique to reduce data movement energy on sparse inference workloads",0.000996547,0.988333754,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Commutative%20data%20reordering%3A%20a%20new%20technique%20to%20reduce%20data%20movement%20energy%20on%20sparse%20inference%20workloads"],["Zero inclusion victim: isolating core caches from inclusive last-level cache evictions",0.000996547,0.9893303,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Zero%20inclusion%20victim%3A%20isolating%20core%20caches%20from%20inclusive%20last-level%20cache%20evictions"],["LaZy superscalar",0.000932554,0.990262854,"https:\u002f\u002fscholar.google.com\u002fscholar?q=LaZy%20superscalar"],["Lemonade from Lemons: Harnessing Device Wearout to Create Limited-Use Security Architectures",0.000932554,0.991195407,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Lemonade%20from%20Lemons%3A%20Harnessing%20Device%20Wearout%20to%20Create%20Limited-Use%20Security%20Architectures"],["Maya: using formal control to obfuscate power side channels",0.000932554,0.992127961,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Maya%3A%20using%20formal%20control%20to%20obfuscate%20power%20side%20channels"],["LightPC: hardware and software co-design for energy-efficient full system persistence",0.000932554,0.993060514,"https:\u002f\u002fscholar.google.com\u002fscholar?q=LightPC%3A%20hardware%20and%20software%20co-design%20for%20energy-efficient%20full%20system%20persistence"],["Rethinking programmable earable processors",0.000858679,0.993919193,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Rethinking%20programmable%20earable%20processors"],["Production-run software failure diagnosis via Adaptive Communication Tracking",0.000771303,0.994690496,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Production-run%20software%20failure%20diagnosis%20via%20Adaptive%20Communication%20Tracking"],["Bouncer: static program analysis in hardware",0.000771303,0.9954618,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Bouncer%3A%20static%20program%20analysis%20in%20hardware"],["Execution dependence extension (EDE): isa support for eliminating fences",0.000771303,0.996233103,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Execution%20dependence%20extension%20%28EDE%29%3A%20isa%20support%20for%20eliminating%20fences"],["Decoupling loads for nano-instruction set computers",0.000664365,0.996897468,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Decoupling%20loads%20for%20nano-instruction%20set%20computers"],["BOSS: bandwidth-optimized search accelerator for storage-class memory",0.000664365,0.997561832,"https:\u002f\u002fscholar.google.com\u002fscholar?q=BOSS%3A%20bandwidth-optimized%20search%20accelerator%20for%20storage-class%20memory"],["Constructing a weak memory model",0.000526496,0.998088328,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Constructing%20a%20weak%20memory%20model"],["Dynamic memory dependence predication",0.000526496,0.998614825,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Dynamic%20memory%20dependence%20predication"],["Space-time algebra: a model for neocortical computation",0.000526496,0.999141321,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Space-time%20algebra%3A%20a%20model%20for%20neocortical%20computation"],["Speculative vectorisation with selective replay",0.000526496,0.999667818,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Speculative%20vectorisation%20with%20selective%20replay"],["Ghost routing to enable oblivious computation on memory-centric networks",0.000332182,1.0,"https:\u002f\u002fscholar.google.com\u002fscholar?q=Ghost%20routing%20to%20enable%20oblivious%20computation%20on%20memory-centric%20networks"]],"hovertemplate":"\u003cb\u003e%{customdata[0]}\u003c\u002fb\u003e\u003cbr\u003eCitations: %{y}\u003cbr\u003e% of Total: %{customdata[1]:.2%}\u003cbr\u003e\u003ci\u003eClick bar to search\u003c\u002fi\u003e\u003cextra\u003e\u003c\u002fextra\u003e","marker":{"color":"rgb(55, 83, 109)"},"name":"Citations","x":["In-Datacenter Performance Analysis of a ...","EIE: efficient inference engine on compr...","ISAAC: a convolutional neural network ac...","Eyeriss: a spatial architecture for ener...","PRIME: a novel processing-in-memory arch...","SCNN: An Accelerator for Compressed-spar...","ShiDianNao: shifting vision processing c...","A scalable processing-in-memory accelera...","Cnvlutin: ineffectual-neuron-free deep n...","Minerva: enabling low-power, highly-accu...","Bit fusion: bit-level dynamically compos...","A configurable cloud-scale DNN processor...","Heracles: improving resource efficiency ...","MLPerf inference benchmark","Profiling a warehouse-scale computer","PIM-enabled instructions: a low-overhead...","Neurocube: a programmable digital neurom...","Ten lessons from three generations shape...","Neural cache: bit-serial in-cache accele...","Scalpel: Customizing DNN Pruning to the ...","Cambricon: an instruction set architectu...","Maximizing CNN Accelerator Efficiency Th...","Accel-sim: an extensible simulation fram...","Plasticine: A Reconfigurable Architectur...","Firesim: FPGA-accelerated cycle-exact sc...","Biscuit: a framework for near-data proce...","Transparent offloading and mapping (TOM)...","Hardware architecture and software stack...","RecNMP: accelerating personalized recomm...","UCNN: exploiting computational reuse in ...","MCM-GPU: Multi-Chip-Module GPUs for Cont...","ScaleDeep: A Scalable Compute Architectu...","RedEye: analog ConvNet image sensor arch...","Back to the future: leveraging Belady's ...","Data reorganization in memory using 3D-s...","BlueDBM: an appliance for big data analy...","Pioneering chiplet technology and design...","Stream-Dataflow Acceleration","Redundant memory mappings for fast acces...","CraterLake: a hardware accelerator for e...","Revisiting RowHammer: an experimental an...","ACT: designing sustainable computer syst...","Sparse ReRAM engine: joint exploration o...","Energy-efficient neural network accelera...","DeepRecSys: a system for optimizing end-...","SnaPEA: predictive early activation for ...","Full-stack, real-system quantum computer...","BTS: an accelerator for bootstrappable f...","ELSA: hardware-software co-design for ef...","Energy efficient architecture for graph ...","New attacks and defense for encrypted-ad...","DjiNN and Tonic: DNN as a service and it...","EDDIE: EM-Based Detection of Deviations ...","Regaining Lost Cycles with HotCalls: A F...","Dynamo: facebook's data center-wide powe...","Understanding and Optimizing Asynchronou...","Rumba: an online quality management syst...","Asymptotic improvements to quantum circu...","Statistical assertions for validating pa...","Gist: efficient data encoding for deep n...","Clank: Architectural Support for Intermi...","Accelerating distributed reinforcement l...","DSAGEN: synthesizing programmable spatia...","CoSA: scheduling by constrained optimiza...","TWiCe: preventing row-hammering by explo...","HeteroOS: OS Design for Heterogeneous Me...","The Reach Profiler (REAPER): Enabling th...","Duality cache for data parallel accelera...","ASIC clouds: specializing the datacenter","Secure Hierarchy-Aware Cache Replacement...","Architecting to achieve a billion reques...","Automatic generation of efficient accele...","Modular routing design for chiplet-based...","CoNDA: efficient cache coherence support...","The Mondrian Data Engine","Efficient invisible speculative executio...","SoftSKU: optimizing server architectures...","MGPUSim: enabling multi-GPU performance ...","Centaur: a chiplet-based, hybrid sparse-...","Warped-compression: enabling power effic...","Warped-slicer: efficient intra-SM slicin...","Laconic deep learning inference accelera...","Semantic locality and context-based pref...","Think fast: a tensor streaming processor...","A case for core-assisted bottleneck acce...","Xuantie-910: a commercial multi-core 12-...","A fully associative, tagless DRAM cache","Accelerating dependent cache misses with...","GraFboost: using accelerated flash stora...","Perceptron-based prefetch filtering","Flexible software profiling of GPU archi...","A multi-neural network acceleration arch...","Flexible auto-refresh: enabling scalable...","Quantitative comparison of hardware tran...","Bit-plane compression: transforming data...","SpinalFlow: an architecture and dataflow...","Sparsity-aware and re-configurable NPU a...","Language-level persistency","PACMAN: attacking ARM pointer authentica...","CAWA: coordinated warp scheduling and ca...","Genax: a genome sequencing accelerator","Enabling scientific computing on memrist...","AsmDB: understanding and mitigating fron...","Treadmill: attributing the source of tai...","MuonTrap: preventing cross-domain spectr...","DRQ: dynamic region-based quantization f...","Euphrates: algorithm-SoC co-design for l...","Mitigating wordline crosstalk using adap...","GANAX: a unified MIMD-SIMD acceleration ...","FLIN: enabling fairness and enhancing pe...","Dual-side sparse tensor core","CROW: a low-cost substrate for improving...","MicroScope: enabling microarchitectural ...","RaPiD: AI accelerator for ultra-low prec...","Hybrid TLB Coalescing: Improving TLB Tra...","Computation reuse in DNNs by exploiting ...","EVA2: exploiting temporal redundancy in ...","Bouquet of instruction pointers: instruc...","Snafu: an ultra-low-power, energy-minima...","ObfusMem: A Low-Overhead Access Obfuscat...","Architecting noisy intermediate-scale tr...","iPIM: programmable in-memory image proce...","Interplay between hardware prefetcher an...","BEAR: techniques for mitigating bandwidt...","PipeZK: accelerating zero-knowledge proo...","Hi-fi playback: tolerating position erro...","XED: exposing on-die error detection inf...","Mellow writes: extending lifetime in res...","Rethinking TLB Designs in Virtualized En...","Stash: have your scratchpad and cache it...","TIMELY: pushing data movements and inter...","InvisiMem: Smart Memory Defenses for Mem...","I see dead \u03bcops: leaking secrets via Int...","APPROX-NoC: A Data Approximation Framewo...","GraphSSD: graph semantics aware SSD","Evolution of the samsung exynos CPU micr...","FORMS: fine-grained polarized ReRAM-base...","Agile paging: exceeding the best of nest...","Access Pattern-Aware Cache Management fo...","Prediction based execution on deep neura...","Hydra: enabling low-overhead mitigation ...","Triad-NVM: persistency for integrity-pro...","GoSPA: an energy-efficient high-performa...","MnnFast: a fast and scalable system arch...","TIE: energy-efficient tensor train-based...","TENET: a framework for modeling tensor d...","Morpheus: creating application objects e...","HASCO: towards agile hardware and softwa...","Exploring the potential of heterogeneous...","Unified address translation for memory-m...","Anubis: ultra-low overhead and recovery ...","Eager pruning: algorithm and architectur...","Jenga: Software-Defined Cache Hierarchie...","RANA: towards efficient neural accelerat...","NN-baton: DNN workload orchestration and...","Harmonia: balancing compute and memory p...","Dynamic thread block launch: a lightweig...","DeepAttest: an end-to-end attestation fr...","QUAC-TRNG: high-throughput true random n...","GaaS-X: graph analytics accelerator supp...","Translation ranger: operating system sup...","Do-It-Yourself Virtual Memory Translatio...","Scheduling page table walks for irregula...","PolyGraph: exposing the value of flexibi...","The load slice core microarchitecture","FASE: finding amplitude-modulated side-c...","The locality descriptor: a holistic cros...","BioHD: an efficient genome sequence sear...","An in-network architecture for accelerat...","Demystifying the system vulnerability st...","DHTM: durable hardware transactional mem...","Energy-efficient video processing for vi...","Speculative data-oblivious execution: mo...","ARM virtualization: performance and arch...","Density tradeoffs of non-volatile memory...","Janus: optimizing memory and storage sup...","Using multiple input, multiple output fo...","Efficient metadata management for irregu...","PROMISE: an end-to-end design of a progr...","Cost-efficient overclocking in immersion...","Towards sustainable in-situ server syste...","HEB: deploying and managing hybrid energ...","CLR-DRAM: a low-cost DRAM architecture e...","2QAN: a quantum compiler for 2-local qub...","LaPerm: locality aware scheduler for dyn...","Virtual thread: maximizing thread-level ...","Guaranteeing local differential privacy ...","A quantum computational compiler and des...","SARA: scaling a reconfigurable dataflow ...","Rethinking belady's algorithm to accommo...","Hiding the Long Latency of Persist Barri...","Quality of Service Support for Fine-Grai...","EQC: ensembled quantum computing for var...","NEBULA: a neuromorphic spin-based ultra-...","Multiple clone row DRAM: a low latency a...","Towards statistical guarantees in contro...","Generative and multi-phase learning for ...","uGEMM: unary computing architecture for ...","DIMMining: pruning-efficient and paralle...","MeRLiN: Exploiting Dynamic Instruction B...","Spandex: a flexible interface for effici...","Buddy compression: enabling larger memor...","MITTS: memory inter-arrival time traffic...","Hardware Translation Coherence for Virtu...","EbDa: A New Theory on Design and Verific...","SeGraM: a universal hardware accelerator...","A software-defined tensor streaming mult...","Page overlays: an enhanced virtual memor...","Power attack defense: securing battery-b...","To PIM or not for emerging general purpo...","Stream-based memory access specializatio...","Bonsai: high-performance adaptive merge ...","Enhancing and exploiting contiguity for ...","A variable warp size architecture","PowerChief: Intelligent Power Allocation...","Chasing Away RAts: Semantics and Evaluat...","LogCA: A High-Level Performance Model fo...","Don't forget the I\u002fO when allocating you...","Opening pandora's box: a systematic stud...","Thermal time shifting: leveraging phase ...","ActivePointers: a case for software addr...","PrORAM: dynamic prefetcher for oblivious...","Exploiting dynamic timing slack for ener...","2B-SSD: the case for dual, byte- and blo...","RegMutex: inter-warp GPU register time-s...","Opportunistic computing in GPU architect...","NISQ+: boosting quantum computing power ...","SQUARE: strategic quantum ancilla reuse ...","Flex: high-availability datacenters with...","SPACE: locality-aware processing in hete...","CloudMonatt: an architecture for securit...","Parallel Automata Processor","Secure TLBs","The NeBuLa RPC-optimized architecture","Enabling compute-communication overlap i...","A stochastic-computing based deep learni...","Hyper-AP: enhancing associative processi...","Ripple: profile-guided instruction cache...","COP: to compress and protect main memory","SmartExchange: trading higher-cost memor...","Leaky buddies: cross-component covert ch...","Sibyl: adaptive and extensible data plac...","Geyser: a compilation framework for quan...","Nested enclave: supporting fine-grained ...","Near data acceleration with concurrent h...","ABC-DIMM: alleviating the bottleneck of ...","Exploiting long-distance interactions an...","A case for richer cross-layer abstractio...","SysScale: exploiting multi-domain dynami...","A hardware accelerator for tracing garba...","Albireo: energy-efficient acceleration o...","Themis: a network bandwidth-aware collec...","Practical memory safety with REST","Synchronized progress in interconnection...","Strober: fast and accurate sample-based ...","Designing vertical processors in monolit...","Cryogenic computer architecture modeling...","Axiomatic hardware-software contracts fo...","DynaSpAM: dynamic spatial architecture m...","Sieve: scalable in-situ DRAM-based accel...","ArMOR: defending against memory consiste...","Probable cause: the deanonymizing effect...","Fusion: design tradeoffs in coherent cac...","Boosting access parallelism to PCM-based...","Lazy persistency: a high-performing and ...","IntelliNoC: a holistic design framework ...","Perforated page: supporting fragmented m...","CODIC: a low-cost substrate for enabling...","MOESI-prime: preventing coherence-induce...","Lukewarm serverless functions: character...","Efficient execution of memory access pha...","Peak efficiency aware scheduling for hig...","CASH: supporting IaaS customers with a s...","Bespoke Processors for Applications with...","Criticality aware tiered cache hierarchy...","CryoCore: a fast and dense processor arc...","FlexMiner: a pattern-aware accelerator f...","APRES: improving cache efficiency by exp...","Viyojit: Decoupling Battery and DRAM Cap...","Robox: an end-to-end solution to acceler...","CHEx86: context-sensitive enforcement of...","JPEG-ACT: accelerating deep learning via...","Echo: compiler-based GPU memory footprin...","Confidential serverless made efficient w...","Aggressive Pipelining of Irregular Appli...","Genesis: a hardware acceleration framewo...","Gorgon: accelerating machine learning fr...","No-FAT: architectural support for low ov...","Manycore network interfaces for in-memor...","Generic system calls for GPUs","Relaxed persist ordering using strand pe...","Communication algorithm-architecture co-...","There's always a bigger fish: a clarifyi...","Coherence protocol for transparent manag...","All-inclusive ECC: thorough end-to-end p...","AccQOC: accelerating quantum optimal con...","Hoop: efficient hardware-assisted out-of...","The anytime automaton","DICE: Compressing DRAM Caches for Bandwi...","Software-hardware co-optimization for co...","A RISC-V in-network accelerator for flex...","VIP: virtualizing IP chains on handheld ...","Evaluation of an analog accelerator for ...","Exploiting page table locality for agile...","Unlimited vector extension with data str...","Energy efficient data encoding in DRAM c...","ACCORD: enabling associativity for gigas...","Stitch: fusible heterogeneous accelerato...","Rebooting virtual memory with midgard","SNS's not a synthesizer: a deep-learning...","Training personalized recommendation sys...","LAP: loop-block aware inclusion properti...","Scalable interconnects for reconfigurabl...","Printed microprocessors","D\u00e9j\u00e0 view: spatio-temporal compute reuse...","A specialized architecture for object se...","DRAF: a low-power DRAM-based reconfigura...","Get out of the valley: power-efficient a...","SecDir: a secure directory to defeat dir...","T4: compiling sequential code for effect...","Hardware-based domain virtualization for...","The virtual block interface: a flexible ...","Designing calibration and expressivity-e...","NDMiner: accelerating graph pattern mini...","Fractal: An Execution Model for Fine-Gra...","Bit-level perceptron prediction for indi...","Hardware-software co-design for brain-co...","CryoGuard: a near refresh-free robust DR...","MeNDA: a near-memory multi-way merge sol...","Base-victim compression: an opportunisti...","Non-Speculative Load-Load Reordering in ...","Division of labor: a more effective appr...","Flexon: a flexible digital neuron for ef...","Tiny but mighty: designing and realizing...","Reducing world switches in virtualized e...","SEESAW: using superpages to improve VIPT...","Adaptive memory-side last-level GPU cach...","XPC: architectural support for secure an...","The dark side of DNN pruning","Divide and conquer frontend bottleneck","Accelerated seeding for genome sequence ...","Large-scale graph processing on FPGAs wi...","IntroSpectre: a pre-silicon framework fo...","EDAM: edit distance tolerant approximate...","Asymmetry-aware work-stealing runtimes","The IBM zl5 high frequency mainframe bra...","A case for hardware-based demand paging","SHRINK: Reducing the ISA complexity via ...","Callback: efficient synchronization with...","ThermoGater: Thermally-Aware On-Chip Vol...","Tailored page sizes","A cost-effective entangling prefetcher f...","SpZip: architectural support for effecti...","INSPIRE: in-storage private information ...","Increasing ising machine capacity with m...","MGX: near-zero overhead memory protectio...","Accelerating asynchronous programs throu...","Hiding intermittent information leakage ...","A bus authentication and anti-probing ar...","ZnG: architecting GPU multi-processors w...","REDUCT: keep it close, keep it cool!: ef...","Satori: efficient and fair resource part...","HiveMind: a hardware-software system sta...","Efficient synonym filtering and scalable...","Efficient synonym filtering and scalable...","Decoupled Affine Computation for SIMT GP...","Exploring predictive replacement policie...","Fine-grained warm water cooling for impr...","NvMR: non-volatile memory renaming for i...","Computer performance microscopy with Shi...","SLIP: reducing wire energy in the memory...","MBus: an ultra-low power interconnect bu...","Architectural Support for Server-Side PH...","MorLog: morphable hardware logging for a...","Hetero-ViTAL: a virtualization stack for...","Failure sentinels: ubiquitous just-in-ti...","Cambricon-Q: a hybrid architecture for e...","ZeR\u00d8: zero-overhead resilient operation ...","t\u00e4k\u014d: a polymorphic cache hierarchy for ...","PPMLAC: high performance chipset archite...","A synthesis framework for stitching surf...","XQsim: modeling cross-technology control...","Protogen: automatically generating direc...","SCU: a GPU stream compaction unit for gr...","HALO: accelerating flow classification f...","Taming the zoo: the unified GraphIt comp...","MiSAR: minimalistic synchronization acce...","Rescuing uncorrectable fault patterns in...","Post-silicon CPU adaptation made practic...","Compact leakage-free support for integri...","Vector runahead","IChannels: exploiting current management...","RACOD: algorithm\u002fhardware co-design for ...","There and Back Again: Optimizing the Int...","Footprint: Regulating Routing Adaptivene...","Aurochs: an architecture for dataflow th...","Superconducting computing with alternati...","TDGraph: a topology-driven accelerator f...","uBrain: a unary brain computer interface","Cascading structured pruning: enabling h...","A Programmable Galois Field Processor fo...","Yukta: multilayer resource controllers t...","Nonblocking memory refresh","Mobilizing the micro-ops: exploiting con...","OO- VR: NUMA friendly object-oriented VR...","InvisiPage: oblivious demand paging for ...","GraphABCD: scaling out graph analytics w...","Packet chasing: spying on network packet...","Thermometer: profile-guided btb replacem...","Emerald: graphics modeling for SoC syste...","SOFF: an OpenCL high-level synthesis fra...","GCoM: a detailed GPU core model for accu...","Accelerating markov random field inferen...","Scaling datacenter accelerators with com...","BabelFish: fusing address translations f...","\u03b7-LSTM: co-designing highly-efficient la...","A Programmable Hardware Accelerator for ...","Securing GPU via region-based bounds che...","FlexiCores: low footprint, high yield, f...","Efficiently scaling out-of-order cores f...","XPro: A Cross-End Processing Architectur...","Accelerating GPU Hardware Transactional ...","Focused value prediction","Efficiently supporting dynamic task para...","TimeCache: using time to eliminate cache...","Accelerating database analytic query wor...","Master of none acceleration: a compariso...","Tvarak: software-managed hardware offloa...","Efficient multi-GPU shared memory via au...","BlockMaestro: enabling programmer-transp...","Gearbox: a case for supporting accumulat...","RelaxFault memory repair","Architecting a stochastic computing unit...","Linebacker: preserving victim cache line...","Free atomics: hardware atomic operations...","Clean: a race detector with cleaner sema...","CHARSTAR: Clock Hierarchy Aware Resource...","Non-speculative store coalescing in tota...","PES: proactive event scheduling for resp...","AxMemo: hardware-compiler co-design for ...","Using SMT to accelerate nested virtualiz...","Energy efficiency boost in the AI-infuse...","SIMD2: a generalized matrix instruction ...","Cost-effective speculative scheduling in...","Heat to power: thermal energy harvesting...","RingCNN: exploiting algebraically-sparse...","X-cache: a modular architecture for doma...","EyeCoD: eye tracking system acceleration...","DCS-ctrl: a fast and flexible device-con...","PMNet: in-network data persistence","A scalable architecture for reprioritizi...","Dynamic global adaptive routing in high-...","Opportunistic competition overhead reduc...","MTraceCheck: Validating Non-Deterministi...","Redundant Memory Array Architecture for ...","Slipstream processors revisited: exploit...","Dv\u00e9: improving DRAM reliability and perf...","CaSMap: agile mapper for reconfigurable ...","Branch vanguard: decomposing branch func...","ShortCut: Architectural Support for Fast...","Filter caching for free: the untapped po...","High-performance deep-learning coprocess...","Mocktails: capturing the memory behaviou...","HieraGen: automated generation of concur...","ScoRD: a scoped race detector for GPUs","NVOverlay: enabling efficient and scalab...","PS-ORAM: efficient crash consistency sup...","ASAP: architecture support for asynchron...","Register file prefetching","HetCore: TFET-CMOS hetero-device archite...","PF-DRAM: a precharge-free DRAM structure","FastTrack: leveraging heterogeneous FPGA...","Time squeezing for tiny devices","Flick: fast and lightweight ISA-crossing...","TransForm: formally specifying transiste...","Managing reliability skew in DNA storage","Virtual melting temperature: managing se...","Hardware supported permission checks on ...","Retracted on May 10, 2023: TPShare: a ti...","Quantifying server memory frequency marg...","NASA: accelerating neural network design...","SoftVN: efficient memory protection via ...","Anticipating and eliminating redundant c...","PowerChop: identifying and managing non-...","Future vector microprocessor extensions ...","Retracted on January 26, 2021: 3D-based ...","FaultHound: value-locality-based soft-fa...","Short-circuit dispatch: accelerating vir...","TCEP: traffic consolidation for energy-p...","Data compression accelerator on IBM POWE...","Auto-predication of critical branches","Lelantus: fine-granularity copy-on-write...","Check-in: in-storage checkpointing for k...","Independent forward progress of work-gro...","Supporting legacy libraries on non-volat...","Revamping storage class memory with hard...","NASGuard: a novel accelerator architectu...","Charm: a language for closed-form high-l...","FFCCD: fence-free crash-consistent concu...","HyperTRIO: hyper-tenant translation of I...","Commutative data reordering: a new techn...","Zero inclusion victim: isolating core ca...","LaZy superscalar","Lemonade from Lemons: Harnessing Device ...","Maya: using formal control to obfuscate ...","LightPC: hardware and software co-design...","Rethinking programmable earable processo...","Production-run software failure diagnosi...","Bouncer: static program analysis in hard...","Execution dependence extension (EDE): is...","Decoupling loads for nano-instruction se...","BOSS: bandwidth-optimized search acceler...","Constructing a weak memory model","Dynamic memory dependence predication","Space-time algebra: a model for neocorti...","Speculative vectorisation with selective...","Ghost routing to enable oblivious comput..."],"y":{"dtype":"f8","bdata":"wzZoAz6sDkCbwn32uXgMQOCQulthQwtAXNO5K4bKCkAStrFvrYYKQMFdNVCS2glAr5jFT9lUCUCNHMu2MZAIQM0m1JTjAAhA3vXZzuVUB0AqGRz\u002f5wsHQNqLOAKQCQdAsRoEg4\u002f8BkD0UHOj3NYGQAJT\u002fpwfzgZAc+N7HlrKBkAoZTE1MikGQGoMtOek6gVAlNRn2SPkBUCQAUPQfb8FQOBDa0DigQVA2nV5t9UrBUA27DFwxCMFQKLCXzPD8QRAvNGw4MrOBEDwqIzaWJwEQEsrxVvVcARAJTJ2OVpuBEAb7Fo2yx0EQIuf3Vlv6wNAsU1Q4Q3XA0AiQMqrGtQDQC2hs7OFowNA6tCnXWWgA0AtTk80HJoDQHnQhNdCfQNAedCE10J9A0A892q8fmkDQAOw\u002fFcoZgNAMaQy4XFfA0CzweVYrlgDQLPB5ViuWANA9FRwQJc5A0BKZuMr+y4DQBF+MJBqKwNAXT\u002f19oTsAkAAYk2Q0eQCQABiTZDR5AJAyrXv8QzdAkBTw6IVJNkCQJMPzX7VqAJAvHc8fIWgAkD7IVdmIZgCQEhSsbXnkwJA53ofaoJoAkADS8f\u002fkV8CQIaS0xsRWwJABwXYWYpWAkDpn4eq\u002fVECQGVPOkXSSAJA2sVLbzNEAkBwLzabMTYCQHQsm6t5MQJADmhR7iojAkA+t1bP1wUCQIPVtjPcAAJAeIJ+WM\u002f2AUAwJAm9XOIBQHlYyUO20gFAeVjJQ7bSAUDlZu3zbs0BQFhcfAfIwgFAWFx8B8jCAUAeQScNALgBQE6RSzUWrQFATpFLNRatAUAptWgYdpwBQN5E3obZlgFA3kTehtmWAUCGtMPohIsBQM1qEqDMhQFAgyMg3QqAAUCrfI0sslwBQPAHj3iqVgFAJXH20SA+AUAlcfbRID4BQANWiPPiNwFAA1aI8+I3AUADVojz4jcBQF4vUsqZMQFAjzD9LEUrAUCPMP0sRSsBQHnAhvHkJAFAbx0e7XgeAUBvHR7teB4BQAWl3\u002fMAGAFABaXf8wAYAUAT1NTYfBEBQBw13e2l\u002fQBA5KLo7ivwAEDkoujuK\u002fAAQDF+0yFb6QBAUy+XC5fUAEC3DYISj80AQLcNghKPzQBAtw2CEo\u002fNAEBj68PBeMYAQGPrw8F4xgBAY+vDwXjGAEDw57srILgAQPDnuysguABAVjbwximiAEBWNvDGKaIAQFY28MYpogBAH3V2XbiaAEAfdXZduJoAQB91dl24mgBAGectJk98AEAZ5y0mT3wAQOM60IeKdABA4zrQh4p0AEDjOtCHinQAQCxFOWvMZABA9PIisMVUAED08iKwxVQAQHtPh0OmTABAfRN1rHNEAEB9E3Wsc0QAQH0TdaxzRABA6Vfsjy08AEBwk7SP0zMAQM6SD2DiIgBAzpIPYOIiAEAEXyxnShoAQARfLGdKGgBAwAcApdkIAEDABwCl2QgAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAZ1iUKx\u002fu\u002fz9nWJQrH+7\u002fP\u002fpaYijRyf8\u002f+lpiKNHJ\u002fz\u002f6WmIo0cn\u002fPxoJwIHBpP8\u002fGgnAgcGk\u002fz9KGzyD7pH\u002fP0obPIPukf8\u002fShs8g+6R\u002fz+nBAv6537\u002fP2z308isa\u002f8\u002fbPfTyKxr\u002fz9s99PIrGv\u002fP7JnYsk7WP8\u002fsmdiyTtY\u002fz+yZ2LJO1j\u002fP7JnYsk7WP8\u002fT7oJy5NE\u002fz9pbXOTszD\u002fPyIPvtyZHP8\u002fIg++3Jkc\u002fz8iD77cmRz\u002fP7+OGVdFCP8\u002fv44ZV0UI\u002fz+\u002fjhlXRQj\u002fP7+OGVdFCP8\u002fT0E3Yebe\u002fj\u002fiwrk6i7T+P26MD0b7nv4\u002fbowPRvue\u002fj9ujA9G+57+P84sWJYnif4\u002fzixYlieJ\u002fj86KXh8DnP+P+6TfTmuXP4\u002f7pN9Oa5c\u002fj9vK439BEb+P28rjf0ERv4\u002fOVEB5hAv\u002fj85UQHmEC\u002f+PzlRAeYQL\u002f4\u002fOVEB5hAv\u002fj9UMjj+zxf+P1QyOP7PF\u002f4\u002fVDI4\u002fs8X\u002fj9UMjj+zxf+P1QyOP7PF\u002f4\u002fpE0pPUAA\u002fj+3uyCFX+j9P7e7IIVf6P0\u002ft7sghV\u002fo\u002fT9PbZmhK9D9P4njgUeit\u002f0\u002fieOBR6K3\u002fT+J44FHorf9P4njgUeit\u002f0\u002fieOBR6K3\u002fT9obhYTwZ79P2huFhPBnv0\u002faG4WE8Ge\u002fT8\u002f\u002fUEG7Wv9Pz\u002f9QQbta\u002f0\u002fP\u002f1BBu1r\u002fT8\u002f\u002fUEG7Wv9P4lqr930Uf0\u002fTPRuNpo3\u002fT9M9G42mjf9P0z0bjaaN\u002f0\u002f9hQsGtoc\u002fT\u002f2FCwa2hz9P\u002fYULBraHP0\u002fw76sb7EB\u002fT\u002fDvqxvsQH9P8O+rG+xAf0\u002fw76sb7EB\u002fT\u002fDvqxvsQH9P8O+rG+xAf0\u002foCXFUBnK\u002fD+gJcVQGcr8PwYJXuiirfw\u002fBgle6KKt\u002fD8GCV7ooq38PwYJXuiirfw\u002fBgle6KKt\u002fD8GCV7ooq38PwYJXuiirfw\u002fBgle6KKt\u002fD8GCV7ooq38P4VeXgS2kPw\u002fhV5eBLaQ\u002fD+FXl4EtpD8P4VeXgS2kPw\u002fhV5eBLaQ\u002fD\u002fedqG5TnP8P952oblOc\u002fw\u002f3nahuU5z\u002fD9liFnraFX8P2WIWetoVfw\u002fZYhZ62hV\u002fD9liFnraFX8P2WIWetoVfw\u002fTUuxRQA3\u002fD9NS7FFADf8P01LsUUAN\u002fw\u002fTUuxRQA3\u002fD8nOKY7EBj8Pyc4pjsQGPw\u002fi5QzApT4+z+LlDMClPj7P4uUMwKU+Ps\u002fGvAGjIbY+z8a8AaMhtj7P1zp74Tit\u002fs\u002fQuluS6KW+z9C6W5Lopb7P0Lpbkuilvs\u002f\u002feck7L90+z\u002f95yTsv3T7Pyy42Bo1Uvs\u002fLLjYGjVS+z8suNgaNVL7Pyy42Bo1Uvs\u002fLLjYGjVS+z8suNgaNVL7Pyy42Bo1Uvs\u002fLLjYGjVS+z8suNgaNVL7Pyy42Bo1Uvs\u002fMgrBK\u002fsu+z8yCsEr+y77PzIKwSv7Lvs\u002fMgrBK\u002fsu+z8yCsEr+y77PzIKwSv7Lvs\u002fMgrBK\u002fsu+z+h1nYLCwv7P6HWdgsLC\u002fs\u002fodZ2CwsL+z+h1nYLCwv7P6HWdgsLC\u002fs\u002fodZ2CwsL+z+h1nYLCwv7PxigGTZd5vo\u002fGKAZNl3m+j8YoBk2Xeb6PxigGTZd5vo\u002ffSX9runA+j99Jf2u6cD6P30l\u002fa7pwPo\u002ffSX9runA+j99Jf2u6cD6P8652fSnmvo\u002fzrnZ9Kea+j\u002fOudn0p5r6P8652fSnmvo\u002fsXwP945z+j+xfA\u002f3jnP6P7F8D\u002feOc\u002fo\u002fsXwP945z+j9UGX8IlUv6P1QZfwiVS\u002fo\u002fVBl\u002fCJVL+j9UGX8IlUv6P\u002fMU2dGvIvo\u002f8xTZ0a8i+j\u002fzFNnRryL6P\u002fMU2dGvIvo\u002f8xTZ0a8i+j\u002fzFNnRryL6Py7iXD\u002fU+Pk\u002fLuJcP9T4+T8u4lw\u002f1Pj5Py7iXD\u002fU+Pk\u002fLuJcP9T4+T\u002fPThVy9s35P89OFXL2zfk\u002fz04VcvbN+T\u002fPThVy9s35P89OFXL2zfk\u002fz04VcvbN+T\u002fPThVy9s35P89OFXL2zfk\u002fbDM5qAmi+T9sMzmoCaL5P2wzOagJovk\u002fbDM5qAmi+T9sMzmoCaL5P6E1TikAdfk\u002foTVOKQB1+T+hNU4pAHX5P6E1TikAdfk\u002foTVOKQB1+T+29D0qy0b5P7b0PSrLRvk\u002ftvQ9KstG+T+29D0qy0b5P+nF4bBaF\u002fk\u002f66WHc53m+D\u002frpYdzneb4P+ulh3Od5vg\u002f66WHc53m+D\u002frpYdzneb4P4N1fLGAtPg\u002fg3V8sYC0+D+DdXyxgLT4P7t0cAnwgPg\u002fu3RwCfCA+D+7dHAJ8ID4P7t0cAnwgPg\u002fu3RwCfCA+D+7dHAJ8ID4P7t0cAnwgPg\u002fu3RwCfCA+D+7dHAJ8ID4Pygf7UXVS\u002fg\u002fKB\u002ftRdVL+D8oH+1F1Uv4Pygf7UXVS\u002fg\u002fKB\u002ftRdVL+D8oH+1F1Uv4Pygf7UXVS\u002fg\u002fplyxJBgV+D+mXLEkGBX4P6ZcsSQYFfg\u002fplyxJBgV+D+mXLEkGBX4P6ZcsSQYFfg\u002faMtjFJ7c9z9oy2MUntz3P2jLYxSe3Pc\u002faMtjFJ7c9z9oy2MUntz3P2jLYxSe3Pc\u002faMtjFJ7c9z9oy2MUntz3P2jLYxSe3Pc\u002faMtjFJ7c9z9oy2MUntz3P2jLYxSe3Pc\u002faMtjFJ7c9z8\u002fOaflSaL3Pz85p+VJovc\u002fPzmn5Umi9z8\u002fOaflSaL3P39VcnH7Zfc\u002ff1Vycftl9z9\u002fVXJx+2X3P39VcnH7Zfc\u002ff1Vycftl9z9\u002fVXJx+2X3P39VcnH7Zfc\u002fvZ70LY8n9z+9nvQtjyf3P72e9C2PJ\u002fc\u002fvZ70LY8n9z+9nvQtjyf3P72e9C2PJ\u002fc\u002fvZ70LY8n9z+P87Cw3eb2P4\u002fzsLDd5vY\u002fj\u002fOwsN3m9j+P87Cw3eb2P4\u002fzsLDd5vY\u002fj\u002fOwsN3m9j+P87Cw3eb2P4\u002fzsLDd5vY\u002fj\u002fOwsN3m9j8v8uUXu6P2Py\u002fy5Re7o\u002fY\u002fL\u002fLlF7uj9j+UzMZX9l32P5TMxlf2XfY\u002flMzGV\u002fZd9j+UzMZX9l32P3liH2JYFfY\u002feWIfYlgV9j95Yh9iWBX2PwDEmiCjyfU\u002fAMSaIKPJ9T8AxJogo8n1PwDEmiCjyfU\u002fAMSaIKPJ9T8AxJogo8n1PwDEmiCjyfU\u002fttuENJB69T+QpGJrzyf1P5CkYmvPJ\u002fU\u002fkKRia88n9T+QpGJrzyf1P871PtQE0fQ\u002fzvU+1ATR9D\u002fO9T7UBNH0P871PtQE0fQ\u002f6P7+VcZ19D\u002fo\u002fv5VxnX0P+j+\u002flXGdfQ\u002f6P7+VcZ19D\u002fo\u002fv5VxnX0P+j+\u002flXGdfQ\u002f6P7+VcZ19D\u002fo\u002fv5VxnX0Px2wSJ+YFfQ\u002fHbBIn5gV9D8dsEifmBX0Px2wSJ+YFfQ\u002fHbBIn5gV9D\u002ftfjE166\u002fzP+1+MTXrr\u002fM\u002f7X4xNeuv8z\u002ftfjE166\u002fzPwgft1ATRPM\u002fCB+3UBNE8z8IH7dQE0TzPwgft1ATRPM\u002fCB+3UBNE8z8IH7dQE0TzP3JDaBFF0fI\u002fckNoEUXR8j9yQ2gRRdHyP3JDaBFF0fI\u002fckNoEUXR8j9yQ2gRRdHyP3JDaBFF0fI\u002fckNoEUXR8j9yQ2gRRdHyP3JDaBFF0fI\u002fckNoEUXR8j8eYfpZilbyPx5h+lmKVvI\u002fYfymQ7bS8T9h\u002fKZDttLxP2H8pkO20vE\u002fYfymQ7bS8T9h\u002fKZDttLxP6xs4I1TRPE\u002frGzgjVNE8T+sbOCNU0TxP6xs4I1TRPE\u002frGzgjVNE8T+sbOCNU0TxP6xs4I1TRPE\u002f6OVFYIup8D\u002fo5UVgi6nwP+jlRWCLqfA\u002fAAAAAAAA8D8AAAAAAADwPwAAAAAAAPA\u002fAAAAAAAA8D8AAAAAAADwPwAAAAAAAPA\u002fAAAAAAAA8D8AAAAAAADwPwAAAAAAAPA\u002fAAAAAAAA8D8AAAAAAADwP550E5Ynie4\u002fnnQTlieJ7j90UvD4HObsP3RS8Pgc5uw\u002fdFLw+Bzm7D+h1nYLCwvrP6HWdgsLC+s\u002fodZ2CwsL6z+h1nYLCwvrP7vtQnOd5ug\u002fZBSCV\u002fZd5j9kFIJX9l3mP2QUglf2XeY\u002f2GZyUBNE4z\u002fYZnJQE0TjP\u002f7knJYnid4\u002f\u002fuSclieJ3j\u002f+5JyWJ4neP\u002f7knJYnid4\u002fONf7UBNE0z8="},"type":"bar","xaxis":"x","yaxis":"y"},{"customdata":[["In-Datacenter Performance Analysis of a Tensor Processing Unit"],["EIE: efficient inference engine on compressed deep neural network"],["ISAAC: a convolutional neural network accelerator with in-situ analog arithmetic in crossbars"],["Eyeriss: a spatial architecture for energy-efficient dataflow for convolutional neural networks"],["PRIME: a novel processing-in-memory architecture for neural network computation in ReRAM-based main memory"],["SCNN: An Accelerator for Compressed-sparse Convolutional Neural Networks"],["ShiDianNao: shifting vision processing closer to the sensor"],["A scalable processing-in-memory accelerator for parallel graph processing"],["Cnvlutin: ineffectual-neuron-free deep neural network computing"],["Minerva: enabling low-power, highly-accurate deep neural network accelerators"],["Bit fusion: bit-level dynamically composable architecture for accelerating deep neural networks"],["A configurable cloud-scale DNN processor for real-time AI"],["Heracles: improving resource efficiency at scale"],["MLPerf inference benchmark"],["Profiling a warehouse-scale computer"],["PIM-enabled instructions: a low-overhead, locality-aware processing-in-memory architecture"],["Neurocube: a programmable digital neuromorphic architecture with high-density 3D memory"],["Ten lessons from three generations shaped Google's TPUv4i"],["Neural cache: bit-serial in-cache acceleration of deep neural networks"],["Scalpel: Customizing DNN Pruning to the Underlying Hardware Parallelism"],["Cambricon: an instruction set architecture for neural networks"],["Maximizing CNN Accelerator Efficiency Through Resource Partitioning"],["Accel-sim: an extensible simulation framework for validated GPU modeling"],["Plasticine: A Reconfigurable Architecture For Parallel Paterns"],["Firesim: FPGA-accelerated cycle-exact scale-out system simulation in the public cloud"],["Biscuit: a framework for near-data processing of big data workloads"],["Transparent offloading and mapping (TOM): enabling programmer-transparent near-data processing in GPU systems"],["Hardware architecture and software stack for PIM based on commercial DRAM technology"],["RecNMP: accelerating personalized recommendation with near-memory processing"],["UCNN: exploiting computational reuse in deep neural networks via weight repetition"],["MCM-GPU: Multi-Chip-Module GPUs for Continued Performance Scalability"],["ScaleDeep: A Scalable Compute Architecture for Learning and Evaluating Deep Networks"],["RedEye: analog ConvNet image sensor architecture for continuous mobile vision"],["Back to the future: leveraging Belady's algorithm for improved cache replacement"],["Data reorganization in memory using 3D-stacked DRAM"],["BlueDBM: an appliance for big data analytics"],["Pioneering chiplet technology and design for the AMD EPYC\u2122and Ryzen\u2122processor families"],["Stream-Dataflow Acceleration"],["Redundant memory mappings for fast access to large memories"],["CraterLake: a hardware accelerator for efficient unbounded computation on encrypted data"],["Revisiting RowHammer: an experimental analysis of modern DRAM devices and mitigation techniques"],["ACT: designing sustainable computer systems with an architectural carbon modeling tool"],["Sparse ReRAM engine: joint exploration of activation and weight sparsity in compressed neural networks"],["Energy-efficient neural network accelerator based on outlier-aware low-precision computation"],["DeepRecSys: a system for optimizing end-to-end at-scale neural recommendation inference"],["SnaPEA: predictive early activation for reducing computation in deep convolutional neural networks"],["Full-stack, real-system quantum computer studies: architectural comparisons and design insights"],["BTS: an accelerator for bootstrappable fully homomorphic encryption"],["ELSA: hardware-software co-design for efficient, lightweight self-attention mechanism in neural networks"],["Energy efficient architecture for graph analytics accelerators"],["New attacks and defense for encrypted-address cache"],["DjiNN and Tonic: DNN as a service and its implications for future warehouse scale computers"],["EDDIE: EM-Based Detection of Deviations in Program Execution"],["Regaining Lost Cycles with HotCalls: A Fast Interface for SGX Secure Enclaves"],["Dynamo: facebook's data center-wide power management system"],["Understanding and Optimizing Asynchronous Low-Precision Stochastic Gradient Descent"],["Rumba: an online quality management system for approximate computing"],["Asymptotic improvements to quantum circuits via qutrits"],["Statistical assertions for validating patterns and finding bugs in quantum programs"],["Gist: efficient data encoding for deep neural network training"],["Clank: Architectural Support for Intermittent Computation"],["Accelerating distributed reinforcement learning with in-switch computing"],["DSAGEN: synthesizing programmable spatial accelerators"],["CoSA: scheduling by constrained optimization for spatial accelerators"],["TWiCe: preventing row-hammering by exploiting time window counters"],["HeteroOS: OS Design for Heterogeneous Memory Management in Datacenter"],["The Reach Profiler (REAPER): Enabling the Mitigation of DRAM Retention Failures via Profiling at Aggressive Conditions"],["Duality cache for data parallel acceleration"],["ASIC clouds: specializing the datacenter"],["Secure Hierarchy-Aware Cache Replacement Policy (SHARP): Defending Against Cache-Based Side Channel Atacks"],["Architecting to achieve a billion requests per second throughput on a single key-value store server platform"],["Automatic generation of efficient accelerators for reconfigurable hardware"],["Modular routing design for chiplet-based systems"],["CoNDA: efficient cache coherence support for near-data accelerators"],["The Mondrian Data Engine"],["Efficient invisible speculative execution through selective delay and value prediction"],["SoftSKU: optimizing server architectures for microservice diversity @scale"],["MGPUSim: enabling multi-GPU performance modeling and optimization"],["Centaur: a chiplet-based, hybrid sparse-dense accelerator for personalized recommendations"],["Warped-compression: enabling power efficient GPUs through register compression"],["Warped-slicer: efficient intra-SM slicing through dynamic resource partitioning for GPU multiprogramming"],["Laconic deep learning inference acceleration"],["Semantic locality and context-based prefetching using reinforcement learning"],["Think fast: a tensor streaming processor (TSP) for accelerating deep learning workloads"],["A case for core-assisted bottleneck acceleration in GPUs: enabling flexible data compression with assist warps"],["Xuantie-910: a commercial multi-core 12-stage pipeline out-of-order 64-bit high performance RISC-V processor with vector extension"],["A fully associative, tagless DRAM cache"],["Accelerating dependent cache misses with an enhanced memory controller"],["GraFboost: using accelerated flash storage for external graph analytics"],["Perceptron-based prefetch filtering"],["Flexible software profiling of GPU architectures"],["A multi-neural network acceleration architecture"],["Flexible auto-refresh: enabling scalable and energy-efficient DRAM refresh reductions"],["Quantitative comparison of hardware transactional memory for Blue Gene\u002fQ, zEnterprise EC12, Intel Core, and POWER8"],["Bit-plane compression: transforming data for better compression in many-core architectures"],["SpinalFlow: an architecture and dataflow tailored for spiking neural networks"],["Sparsity-aware and re-configurable NPU architecture for samsung flagship mobile SoC"],["Language-level persistency"],["PACMAN: attacking ARM pointer authentication with speculative execution"],["CAWA: coordinated warp scheduling and cache prioritization for critical warp acceleration of GPGPU workloads"],["Genax: a genome sequencing accelerator"],["Enabling scientific computing on memristive accelerators"],["AsmDB: understanding and mitigating front-end stalls in warehouse-scale computers"],["Treadmill: attributing the source of tail latency through precise load testing and statistical inference"],["MuonTrap: preventing cross-domain spectre-like attacks by capturing speculative state"],["DRQ: dynamic region-based quantization for deep neural network acceleration"],["Euphrates: algorithm-SoC co-design for low-power mobile continuous vision"],["Mitigating wordline crosstalk using adaptive trees of counters"],["GANAX: a unified MIMD-SIMD acceleration for generative adversarial networks"],["FLIN: enabling fairness and enhancing performance in modern NVMe solid state drives"],["Dual-side sparse tensor core"],["CROW: a low-cost substrate for improving DRAM performance, energy efficiency, and reliability"],["MicroScope: enabling microarchitectural replay attacks"],["RaPiD: AI accelerator for ultra-low precision training and inference"],["Hybrid TLB Coalescing: Improving TLB Translation Coverage under Diverse Fragmented Memory Allocations"],["Computation reuse in DNNs by exploiting input similarity"],["EVA2: exploiting temporal redundancy in live computer vision"],["Bouquet of instruction pointers: instruction pointer classifier-based spatial hardware prefetching"],["Snafu: an ultra-low-power, energy-minimal CGRA-generation framework and architecture"],["ObfusMem: A Low-Overhead Access Obfuscation for Trusted Memories"],["Architecting noisy intermediate-scale trapped ion quantum computers"],["iPIM: programmable in-memory image processing accelerator using near-bank architecture"],["Interplay between hardware prefetcher and page eviction policy in CPU-GPU unified virtual memory"],["BEAR: techniques for mitigating bandwidth bloat in gigascale DRAM caches"],["PipeZK: accelerating zero-knowledge proof with a pipelined architecture"],["Hi-fi playback: tolerating position errors in shift operations of racetrack memory"],["XED: exposing on-die error detection information for strong memory reliability"],["Mellow writes: extending lifetime in resistive memories through selective slow write backs"],["Rethinking TLB Designs in Virtualized Environments: A Very Large Part-of-Memory TLB"],["Stash: have your scratchpad and cache it too"],["TIMELY: pushing data movements and interfaces in PIM accelerators towards local and in time domain"],["InvisiMem: Smart Memory Defenses for Memory Bus Side Channel"],["I see dead \u03bcops: leaking secrets via Intel\u002fAMD micro-op caches"],["APPROX-NoC: A Data Approximation Framework for Network-On-Chip Architectures"],["GraphSSD: graph semantics aware SSD"],["Evolution of the samsung exynos CPU microarchitecture"],["FORMS: fine-grained polarized ReRAM-based in-situ computation for mixed-signal DNN accelerator"],["Agile paging: exceeding the best of nested and shadow paging"],["Access Pattern-Aware Cache Management for Improving Data Utilization in GPU"],["Prediction based execution on deep neural networks"],["Hydra: enabling low-overhead mitigation of row-hammer at ultra-low thresholds via hybrid tracking"],["Triad-NVM: persistency for integrity-protected and encrypted non-volatile memories"],["GoSPA: an energy-efficient high-performance globally optimized sparse convolutional neural network accelerator"],["MnnFast: a fast and scalable system architecture for memory-augmented neural networks"],["TIE: energy-efficient tensor train-based inference engine for deep neural network"],["TENET: a framework for modeling tensor dataflow based on relation-centric notation"],["Morpheus: creating application objects efficiently for heterogeneous computing"],["HASCO: towards agile hardware and software co-design for tensor computation"],["Exploring the potential of heterogeneous von neumann\u002fdataflow execution models"],["Unified address translation for memory-mapped SSDs with FlashMap"],["Anubis: ultra-low overhead and recovery time for secure non-volatile memories"],["Eager pruning: algorithm and architecture support for fast training of deep neural networks"],["Jenga: Software-Defined Cache Hierarchies"],["RANA: towards efficient neural acceleration with refresh-optimized embedded DRAM"],["NN-baton: DNN workload orchestration and chiplet granularity exploration for multichip accelerators"],["Harmonia: balancing compute and memory power in high-performance GPUs"],["Dynamic thread block launch: a lightweight execution mechanism to support irregular applications on GPUs"],["DeepAttest: an end-to-end attestation framework for deep neural networks"],["QUAC-TRNG: high-throughput true random number generation using quadruple row activation in commodity DRAM chips"],["GaaS-X: graph analytics accelerator supporting sparse data representation using crossbar architectures"],["Translation ranger: operating system support for contiguity-aware TLBs"],["Do-It-Yourself Virtual Memory Translation"],["Scheduling page table walks for irregular GPU applications"],["PolyGraph: exposing the value of flexibility for graph processing accelerators"],["The load slice core microarchitecture"],["FASE: finding amplitude-modulated side-channel emanations"],["The locality descriptor: a holistic cross-layer abstraction to express data locality in GPUs"],["BioHD: an efficient genome sequence search platform using HyperDimensional memorization"],["An in-network architecture for accelerating shared-memory multiprocessor collectives"],["Demystifying the system vulnerability stack: transient fault effects across the layers"],["DHTM: durable hardware transactional memory"],["Energy-efficient video processing for virtual reality"],["Speculative data-oblivious execution: mobilizing safe prediction for safe and efficient speculative execution"],["ARM virtualization: performance and architectural implications"],["Density tradeoffs of non-volatile memory as a replacement for SRAM based last level cache"],["Janus: optimizing memory and storage support for non-volatile memory systems"],["Using multiple input, multiple output formal control to maximize resource efficiency in architectures"],["Efficient metadata management for irregular data prefetching"],["PROMISE: an end-to-end design of a programmable mixed-signal accelerator for machine-learning algorithms"],["Cost-efficient overclocking in immersion-cooled datacenters"],["Towards sustainable in-situ server systems in the big data era"],["HEB: deploying and managing hybrid energy buffers for improving datacenter efficiency and economy"],["CLR-DRAM: a low-cost DRAM architecture enabling dynamic capacity-latency trade-off"],["2QAN: a quantum compiler for 2-local qubit hamiltonian simulation algorithms"],["LaPerm: locality aware scheduler for dynamic parallelism on GPUs"],["Virtual thread: maximizing thread-level parallelism beyond GPU scheduling limit"],["Guaranteeing local differential privacy on ultra-low-power systems"],["A quantum computational compiler and design tool for technology-specific targets"],["SARA: scaling a reconfigurable dataflow accelerator"],["Rethinking belady's algorithm to accommodate prefetching"],["Hiding the Long Latency of Persist Barriers Using Speculative Execution"],["Quality of Service Support for Fine-Grained Sharing on GPUs"],["EQC: ensembled quantum computing for variational quantum algorithms"],["NEBULA: a neuromorphic spin-based ultra-low power architecture for SNNs and ANNs"],["Multiple clone row DRAM: a low latency and area optimized DRAM"],["Towards statistical guarantees in controlling quality tradeoffs for approximate acceleration"],["Generative and multi-phase learning for computer systems optimization"],["uGEMM: unary computing architecture for GEMM applications"],["DIMMining: pruning-efficient and parallel graph mining on near-memory-computing"],["MeRLiN: Exploiting Dynamic Instruction Behavior for Fast and Accurate Microarchitecture Level Reliability Assessment"],["Spandex: a flexible interface for efficient heterogeneous coherence"],["Buddy compression: enabling larger memory for deep learning and HPC workloads on GPUs"],["MITTS: memory inter-arrival time traffic shaping"],["Hardware Translation Coherence for Virtualized Systems"],["EbDa: A New Theory on Design and Verification of Deadlock-free Interconnection Networks"],["SeGraM: a universal hardware accelerator for genomic sequence-to-graph and sequence-to-sequence mapping"],["A software-defined tensor streaming multiprocessor for large-scale machine learning"],["Page overlays: an enhanced virtual memory framework to enable fine-grained memory management"],["Power attack defense: securing battery-backed data centers"],["To PIM or not for emerging general purpose processing in DDR memory systems"],["Stream-based memory access specialization for general purpose processors"],["Bonsai: high-performance adaptive merge tree sorting"],["Enhancing and exploiting contiguity for fast memory virtualization"],["A variable warp size architecture"],["PowerChief: Intelligent Power Allocation for Multi-Stage Applications to Improve Responsiveness on Power Constrained CMP"],["Chasing Away RAts: Semantics and Evaluation for Relaxed Atomics on Heterogeneous Systems"],["LogCA: A High-Level Performance Model for Hardware Accelerators"],["Don't forget the I\u002fO when allocating your LLC"],["Opening pandora's box: a systematic study of new ways microarchitecture can leak private data"],["Thermal time shifting: leveraging phase change materials to reduce cooling costs in warehouse-scale computers"],["ActivePointers: a case for software address translation on GPUs"],["PrORAM: dynamic prefetcher for oblivious RAM"],["Exploiting dynamic timing slack for energy efficiency in ultra-low-power embedded systems"],["2B-SSD: the case for dual, byte- and block-addressable solid-state drives"],["RegMutex: inter-warp GPU register time-sharing"],["Opportunistic computing in GPU architectures"],["NISQ+: boosting quantum computing power by approximating quantum error correction"],["SQUARE: strategic quantum ancilla reuse for modular quantum programs via cost-effective uncomputation"],["Flex: high-availability datacenters with zero reserved power"],["SPACE: locality-aware processing in heterogeneous memory for personalized recommendations"],["CloudMonatt: an architecture for security health monitoring and attestation of virtual machines in cloud computing"],["Parallel Automata Processor"],["Secure TLBs"],["The NeBuLa RPC-optimized architecture"],["Enabling compute-communication overlap in distributed deep learning training platforms"],["A stochastic-computing based deep learning framework using adiabatic quantum-flux-parametron superconducting technology"],["Hyper-AP: enhancing associative processing through a full-stack optimization"],["Ripple: profile-guided instruction cache replacement for data center applications"],["COP: to compress and protect main memory"],["SmartExchange: trading higher-cost memory storage\u002faccess for lower-cost computation"],["Leaky buddies: cross-component covert channels on integrated CPU-GPU systems"],["Sibyl: adaptive and extensible data placement in hybrid storage systems using online reinforcement learning"],["Geyser: a compilation framework for quantum computing with neutral atoms"],["Nested enclave: supporting fine-grained hierarchical isolation with SGX"],["Near data acceleration with concurrent host access"],["ABC-DIMM: alleviating the bottleneck of communication in DIMM-based near-memory processing with inter-DIMM broadcast"],["Exploiting long-distance interactions and tolerating atom loss in neutral atom quantum architectures"],["A case for richer cross-layer abstractions: bridging the semantic gap with expressive memory"],["SysScale: exploiting multi-domain dynamic voltage and frequency scaling for energy efficient mobile processors"],["A hardware accelerator for tracing garbage collection"],["Albireo: energy-efficient acceleration of convolutional neural networks via silicon photonics"],["Themis: a network bandwidth-aware collective scheduling policy for distributed training of DL models"],["Practical memory safety with REST"],["Synchronized progress in interconnection networks (SPIN): a new theory for deadlock freedom"],["Strober: fast and accurate sample-based energy simulation for arbitrary RTL"],["Designing vertical processors in monolithic 3D"],["Cryogenic computer architecture modeling with memory-side case studies"],["Axiomatic hardware-software contracts for security"],["DynaSpAM: dynamic spatial architecture mapping using out of order instruction schedules"],["Sieve: scalable in-situ DRAM-based accelerator designs for massively parallel k-mer matching"],["ArMOR: defending against memory consistency model mismatches in heterogeneous architectures"],["Probable cause: the deanonymizing effects of approximate DRAM"],["Fusion: design tradeoffs in coherent cache hierarchies for accelerators"],["Boosting access parallelism to PCM-based main memory"],["Lazy persistency: a high-performing and write-efficient software persistency technique"],["IntelliNoC: a holistic design framework for energy-efficient and reliable on-chip communication for manycores"],["Perforated page: supporting fragmented memory allocation for large pages"],["CODIC: a low-cost substrate for enabling custom in-DRAM functionalities and optimizations"],["MOESI-prime: preventing coherence-induced hammering in commodity workloads"],["Lukewarm serverless functions: characterization and optimization"],["Efficient execution of memory access phases using dataflow specialization"],["Peak efficiency aware scheduling for highly energy proportional servers"],["CASH: supporting IaaS customers with a sub-core configurable architecture"],["Bespoke Processors for Applications with Ultra-low Area and Power Constraints"],["Criticality aware tiered cache hierarchy: a fundamental relook at multi-level cache hierarchies"],["CryoCore: a fast and dense processor architecture for cryogenic computing"],["FlexMiner: a pattern-aware accelerator for graph pattern mining"],["APRES: improving cache efficiency by exploiting load characteristics on GPUs"],["Viyojit: Decoupling Battery and DRAM Capacities for Battery-Backed DRAM"],["Robox: an end-to-end solution to accelerate autonomous control in robotics"],["CHEx86: context-sensitive enforcement of memory safety via microcode-enabled capabilities"],["JPEG-ACT: accelerating deep learning via transform-based lossy compression"],["Echo: compiler-based GPU memory footprint reduction for LSTM RNN training"],["Confidential serverless made efficient with plug-in enclaves"],["Aggressive Pipelining of Irregular Applications on Reconfigurable Hardware"],["Genesis: a hardware acceleration framework for genomic data analysis"],["Gorgon: accelerating machine learning from relational data"],["No-FAT: architectural support for low overhead memory safety checks"],["Manycore network interfaces for in-memory rack-scale computing"],["Generic system calls for GPUs"],["Relaxed persist ordering using strand persistency"],["Communication algorithm-architecture co-design for distributed deep learning"],["There's always a bigger fish: a clarifying analysis of a machine-learning-assisted side-channel attack"],["Coherence protocol for transparent management of scratchpad memories in shared memory manycore architectures"],["All-inclusive ECC: thorough end-to-end protection for reliable computer memory"],["AccQOC: accelerating quantum optimal control based pulse generation"],["Hoop: efficient hardware-assisted out-of-place update for non-volatile memory"],["The anytime automaton"],["DICE: Compressing DRAM Caches for Bandwidth and Capacity"],["Software-hardware co-optimization for computational chemistry on superconducting quantum processors"],["A RISC-V in-network accelerator for flexible high-performance low-power packet processing"],["VIP: virtualizing IP chains on handheld platforms"],["Evaluation of an analog accelerator for linear algebra"],["Exploiting page table locality for agile TLB prefetching"],["Unlimited vector extension with data streaming support"],["Energy efficient data encoding in DRAM channels exploiting data value similarity"],["ACCORD: enabling associativity for gigascale DRAM caches by coordinating way-install and way-prediction"],["Stitch: fusible heterogeneous accelerators enmeshed with many-core architecture for wearables"],["Rebooting virtual memory with midgard"],["SNS's not a synthesizer: a deep-learning-based synthesis predictor"],["Training personalized recommendation systems from (GPU) scratch: look forward not backwards"],["LAP: loop-block aware inclusion properties for energy-efficient asymmetric last level caches"],["Scalable interconnects for reconfigurable spatial architectures"],["Printed microprocessors"],["D\u00e9j\u00e0 view: spatio-temporal compute reuse for energy-efficient 360\u00b0 VR video streaming"],["A specialized architecture for object serialization with applications to big data analytics"],["DRAF: a low-power DRAM-based reconfigurable acceleration fabric"],["Get out of the valley: power-efficient address mapping for GPUs"],["SecDir: a secure directory to defeat directory side-channel attacks"],["T4: compiling sequential code for effective speculative parallelization in hardware"],["Hardware-based domain virtualization for intra-process isolation of persistent memory objects"],["The virtual block interface: a flexible alternative to the conventional virtual memory framework"],["Designing calibration and expressivity-efficient instruction sets for quantum computing"],["NDMiner: accelerating graph pattern mining using near data processing"],["Fractal: An Execution Model for Fine-Grain Nested Speculative Parallelism"],["Bit-level perceptron prediction for indirect branches"],["Hardware-software co-design for brain-computer interfaces"],["CryoGuard: a near refresh-free robust DRAM design for cryogenic computing"],["MeNDA: a near-memory multi-way merge solution for sparse transposition and dataflows"],["Base-victim compression: an opportunistic cache compression architecture"],["Non-Speculative Load-Load Reordering in TSO"],["Division of labor: a more effective approach to prefetching"],["Flexon: a flexible digital neuron for efficient spiking neural network simulations"],["Tiny but mighty: designing and realizing scalable latency tolerance for manycore SoCs"],["Reducing world switches in virtualized environment with flexible cross-world calls"],["SEESAW: using superpages to improve VIPT caches"],["Adaptive memory-side last-level GPU caching"],["XPC: architectural support for secure and efficient cross process call"],["The dark side of DNN pruning"],["Divide and conquer frontend bottleneck"],["Accelerated seeding for genome sequence alignment with enumerated radix trees"],["Large-scale graph processing on FPGAs with caches for thousands of simultaneous misses"],["IntroSpectre: a pre-silicon framework for discovery and analysis of transient execution vulnerabilities"],["EDAM: edit distance tolerant approximate matching content addressable memory"],["Asymmetry-aware work-stealing runtimes"],["The IBM zl5 high frequency mainframe branch predictor"],["A case for hardware-based demand paging"],["SHRINK: Reducing the ISA complexity via instruction recycling"],["Callback: efficient synchronization without invalidation with a directory just for spin-waiting"],["ThermoGater: Thermally-Aware On-Chip Voltage Regulation"],["Tailored page sizes"],["A cost-effective entangling prefetcher for instructions"],["SpZip: architectural support for effective data compression in irregular applications"],["INSPIRE: in-storage private information retrieval via protocol and architecture co-design"],["Increasing ising machine capacity with multi-chip architectures"],["MGX: near-zero overhead memory protection for data-intensive accelerators"],["Accelerating asynchronous programs through event sneak peek"],["Hiding intermittent information leakage with architectural support for blinking"],["A bus authentication and anti-probing architecture extending hardware trusted computing base off CPU chips and beyond"],["ZnG: architecting GPU multi-processors with new flash for scalable data analysis"],["REDUCT: keep it close, keep it cool!: efficient scaling of DNN inference on multi-core CPUs with near-cache compute"],["Satori: efficient and fair resource partitioning by sacrificing short-term benefits for long-term gains"],["HiveMind: a hardware-software system stack for serverless edge swarms"],["Efficient synonym filtering and scalable delayed translation for hybrid virtual caching"],["Efficient synonym filtering and scalable delayed translation for hybrid virtual caching"],["Decoupled Affine Computation for SIMT GPUs"],["Exploring predictive replacement policies for instruction cache and branch target buffer"],["Fine-grained warm water cooling for improving datacenter economy"],["NvMR: non-volatile memory renaming for intermittent computing"],["Computer performance microscopy with Shim"],["SLIP: reducing wire energy in the memory hierarchy"],["MBus: an ultra-low power interconnect bus for next generation nanopower systems"],["Architectural Support for Server-Side PHP Processing"],["MorLog: morphable hardware logging for atomic persistence in non-volatile main memory"],["Hetero-ViTAL: a virtualization stack for heterogeneous FPGA clusters"],["Failure sentinels: ubiquitous just-in-time intermittent computation via low-cost hardware support for voltage monitoring"],["Cambricon-Q: a hybrid architecture for efficient training"],["ZeR\u00d8: zero-overhead resilient operation under pointer integrity attacks"],["t\u00e4k\u014d: a polymorphic cache hierarchy for general-purpose optimization of data movement"],["PPMLAC: high performance chipset architecture for secure multi-party computation"],["A synthesis framework for stitching surface code with superconducting quantum devices"],["XQsim: modeling cross-technology control processors for 10+K qubit quantum computers"],["Protogen: automatically generating directory cache coherence protocols from atomic specifications"],["SCU: a GPU stream compaction unit for graph processing"],["HALO: accelerating flow classification for scalable packet processing in NFV"],["Taming the zoo: the unified GraphIt compiler framework for novel architectures"],["MiSAR: minimalistic synchronization accelerator with resource overflow management"],["Rescuing uncorrectable fault patterns in on-chip memories through error pattern transformation"],["Post-silicon CPU adaptation made practical using machine learning"],["Compact leakage-free support for integrity and reliability"],["Vector runahead"],["IChannels: exploiting current management mechanisms to create covert channels in modern processors"],["RACOD: algorithm\u002fhardware co-design for mobile robot path planning"],["There and Back Again: Optimizing the Interconnect in Networks of Memory Cubes"],["Footprint: Regulating Routing Adaptiveness in Networks-on-Chip"],["Aurochs: an architecture for dataflow threads"],["Superconducting computing with alternating logic elements"],["TDGraph: a topology-driven accelerator for high-performance streaming graph processing"],["uBrain: a unary brain computer interface"],["Cascading structured pruning: enabling high data reuse for sparse DNN accelerators"],["A Programmable Galois Field Processor for the Internet of Things"],["Yukta: multilayer resource controllers to maximize efficiency"],["Nonblocking memory refresh"],["Mobilizing the micro-ops: exploiting context sensitive decoding for security and energy efficiency"],["OO- VR: NUMA friendly object-oriented VR rendering framework for future NUMA-based multi-GPU systems"],["InvisiPage: oblivious demand paging for secure enclaves"],["GraphABCD: scaling out graph analytics with asynchronous block coordinate descent"],["Packet chasing: spying on network packets over a cache side-channel"],["Thermometer: profile-guided btb replacement for data center applications"],["Emerald: graphics modeling for SoC systems"],["SOFF: an OpenCL high-level synthesis framework for FPGAs"],["GCoM: a detailed GPU core model for accurate analytical modeling of modern GPUs"],["Accelerating markov random field inference using molecular optical gibbs sampling units"],["Scaling datacenter accelerators with compute-reuse architectures"],["BabelFish: fusing address translations for containers"],["\u03b7-LSTM: co-designing highly-efficient large LSTM training via exploiting memory-saving and architectural design opportunities"],["A Programmable Hardware Accelerator for Simulating Dynamical Systems"],["Securing GPU via region-based bounds checking"],["FlexiCores: low footprint, high yield, field reprogrammable flexible microprocessors"],["Efficiently scaling out-of-order cores for simultaneous multithreading"],["XPro: A Cross-End Processing Architecture for Data Analytics in Wearables"],["Accelerating GPU Hardware Transactional Memory with Snapshot Isolation"],["Focused value prediction"],["Efficiently supporting dynamic task parallelism on heterogeneous cache-coherent systems"],["TimeCache: using time to eliminate cache side channels when sharing software"],["Accelerating database analytic query workloads using an associative processor"],["Master of none acceleration: a comparison of accelerator architectures for analytical query processing"],["Tvarak: software-managed hardware offload for redundancy in direct-access NVM storage"],["Efficient multi-GPU shared memory via automatic optimization of fine-grained transfers"],["BlockMaestro: enabling programmer-transparent task-based execution in GPU systems"],["Gearbox: a case for supporting accumulation dispatching and hybrid partitioning in PIM-based accelerators"],["RelaxFault memory repair"],["Architecting a stochastic computing unit with molecular optical devices"],["Linebacker: preserving victim cache lines in idle register files of GPUs"],["Free atomics: hardware atomic operations without fences"],["Clean: a race detector with cleaner semantics"],["CHARSTAR: Clock Hierarchy Aware Resource Scaling in Tiled ARchitectures"],["Non-speculative store coalescing in total store order"],["PES: proactive event scheduling for responsive and energy-efficient mobile web computing"],["AxMemo: hardware-compiler co-design for approximate code memoization"],["Using SMT to accelerate nested virtualization"],["Energy efficiency boost in the AI-infused POWER10 processor"],["SIMD2: a generalized matrix instruction set for accelerating tensor computation beyond GEMM"],["Cost-effective speculative scheduling in high performance processors"],["Heat to power: thermal energy harvesting and recycling for warm water-cooled datacenters"],["RingCNN: exploiting algebraically-sparse ring tensors for energy-efficient CNN-based computational imaging"],["X-cache: a modular architecture for domain-specific caches"],["EyeCoD: eye tracking system acceleration via flatcam-based algorithm & accelerator co-design"],["DCS-ctrl: a fast and flexible device-control mechanism for device-centric server architecture"],["PMNet: in-network data persistence"],["A scalable architecture for reprioritizing ordered parallelism"],["Dynamic global adaptive routing in high-radix networks"],["Opportunistic competition overhead reduction for expediting critical section in NoC based CMPs"],["MTraceCheck: Validating Non-Deterministic Behavior of Memory Consistency Models in Post-Silicon Validation"],["Redundant Memory Array Architecture for Efficient Selective Protection"],["Slipstream processors revisited: exploiting branch sets"],["Dv\u00e9: improving DRAM reliability and performance on-demand via coherent replication"],["CaSMap: agile mapper for reconfigurable spatial architectures by automatically clustering intermediate representations and scattering mapping process"],["Branch vanguard: decomposing branch functionality into prediction and resolution instructions"],["ShortCut: Architectural Support for Fast Object Access in Scripting Languages"],["Filter caching for free: the untapped potential of the store-buffer"],["High-performance deep-learning coprocessor integrated into x86 SoC with server-class CPUs"],["Mocktails: capturing the memory behaviour of proprietary mobile architectures"],["HieraGen: automated generation of concurrent, hierarchical cache coherence protocols"],["ScoRD: a scoped race detector for GPUs"],["NVOverlay: enabling efficient and scalable high-frequency snapshotting to NVM"],["PS-ORAM: efficient crash consistency support for oblivious RAM on NVM"],["ASAP: architecture support for asynchronous persistence"],["Register file prefetching"],["HetCore: TFET-CMOS hetero-device architecture for CPUs and GPUs"],["PF-DRAM: a precharge-free DRAM structure"],["FastTrack: leveraging heterogeneous FPGA wires to design low-cost high-performance soft NoCs"],["Time squeezing for tiny devices"],["Flick: fast and lightweight ISA-crossing call for heterogeneous-ISA environments"],["TransForm: formally specifying transistency models and synthesizing enhanced litmus tests"],["Managing reliability skew in DNA storage"],["Virtual melting temperature: managing server load to minimize cooling overhead with phase change materials"],["Hardware supported permission checks on persistent objects for performance and programmability"],["Retracted on May 10, 2023: TPShare: a time-space sharing scheduling abstraction for shared cloud via vertical labels"],["Quantifying server memory frequency margin and using it to improve performance in HPC systems"],["NASA: accelerating neural network design with a NAS processor"],["SoftVN: efficient memory protection via software-provided version numbers"],["Anticipating and eliminating redundant computations in accelerated sparse training"],["PowerChop: identifying and managing non-critical units in hybrid processor architectures"],["Future vector microprocessor extensions for data aggregations"],["Retracted on January 26, 2021: 3D-based video recognition acceleration by leveraging temporal locality"],["FaultHound: value-locality-based soft-fault tolerance"],["Short-circuit dispatch: accelerating virtual machine interpreters on embedded processors"],["TCEP: traffic consolidation for energy-proportional high-radix networks"],["Data compression accelerator on IBM POWER9 and z15 processors"],["Auto-predication of critical branches"],["Lelantus: fine-granularity copy-on-write operations for secure non-volatile memories"],["Check-in: in-storage checkpointing for key-value store system leveraging flash-based SSDs"],["Independent forward progress of work-groups"],["Supporting legacy libraries on non-volatile memory: a user-transparent approach"],["Revamping storage class memory with hardware automated memory-over-storage solution"],["NASGuard: a novel accelerator architecture for robust neural architecture search (NAS) networks"],["Charm: a language for closed-form high-level architecture modeling"],["FFCCD: fence-free crash-consistent concurrent defragmentation for persistent memory"],["HyperTRIO: hyper-tenant translation of I\u002fO addresses"],["Commutative data reordering: a new technique to reduce data movement energy on sparse inference workloads"],["Zero inclusion victim: isolating core caches from inclusive last-level cache evictions"],["LaZy superscalar"],["Lemonade from Lemons: Harnessing Device Wearout to Create Limited-Use Security Architectures"],["Maya: using formal control to obfuscate power side channels"],["LightPC: hardware and software co-design for energy-efficient full system persistence"],["Rethinking programmable earable processors"],["Production-run software failure diagnosis via Adaptive Communication Tracking"],["Bouncer: static program analysis in hardware"],["Execution dependence extension (EDE): isa support for eliminating fences"],["Decoupling loads for nano-instruction set computers"],["BOSS: bandwidth-optimized search accelerator for storage-class memory"],["Constructing a weak memory model"],["Dynamic memory dependence predication"],["Space-time algebra: a model for neocortical computation"],["Speculative vectorisation with selective replay"],["Ghost routing to enable oblivious computation on memory-centric networks"]],"hovertemplate":"\u003cb\u003e%{customdata[0]}\u003c\u002fb\u003e\u003cbr\u003eCumulative: %{y:.2%}\u003cextra\u003e\u003c\u002fextra\u003e","marker":{"color":"rgb(26, 118, 255)"},"mode":"lines+markers","name":"Cumulative %","x":["In-Datacenter Performance Analysis of a ...","EIE: efficient inference engine on compr...","ISAAC: a convolutional neural network ac...","Eyeriss: a spatial architecture for ener...","PRIME: a novel processing-in-memory arch...","SCNN: An Accelerator for Compressed-spar...","ShiDianNao: shifting vision processing c...","A scalable processing-in-memory accelera...","Cnvlutin: ineffectual-neuron-free deep n...","Minerva: enabling low-power, highly-accu...","Bit fusion: bit-level dynamically compos...","A configurable cloud-scale DNN processor...","Heracles: improving resource efficiency ...","MLPerf inference benchmark","Profiling a warehouse-scale computer","PIM-enabled instructions: a low-overhead...","Neurocube: a programmable digital neurom...","Ten lessons from three generations shape...","Neural cache: bit-serial in-cache accele...","Scalpel: Customizing DNN Pruning to the ...","Cambricon: an instruction set architectu...","Maximizing CNN Accelerator Efficiency Th...","Accel-sim: an extensible simulation fram...","Plasticine: A Reconfigurable Architectur...","Firesim: FPGA-accelerated cycle-exact sc...","Biscuit: a framework for near-data proce...","Transparent offloading and mapping (TOM)...","Hardware architecture and software stack...","RecNMP: accelerating personalized recomm...","UCNN: exploiting computational reuse in ...","MCM-GPU: Multi-Chip-Module GPUs for Cont...","ScaleDeep: A Scalable Compute Architectu...","RedEye: analog ConvNet image sensor arch...","Back to the future: leveraging Belady's ...","Data reorganization in memory using 3D-s...","BlueDBM: an appliance for big data analy...","Pioneering chiplet technology and design...","Stream-Dataflow Acceleration","Redundant memory mappings for fast acces...","CraterLake: a hardware accelerator for e...","Revisiting RowHammer: an experimental an...","ACT: designing sustainable computer syst...","Sparse ReRAM engine: joint exploration o...","Energy-efficient neural network accelera...","DeepRecSys: a system for optimizing end-...","SnaPEA: predictive early activation for ...","Full-stack, real-system quantum computer...","BTS: an accelerator for bootstrappable f...","ELSA: hardware-software co-design for ef...","Energy efficient architecture for graph ...","New attacks and defense for encrypted-ad...","DjiNN and Tonic: DNN as a service and it...","EDDIE: EM-Based Detection of Deviations ...","Regaining Lost Cycles with HotCalls: A F...","Dynamo: facebook's data center-wide powe...","Understanding and Optimizing Asynchronou...","Rumba: an online quality management syst...","Asymptotic improvements to quantum circu...","Statistical assertions for validating pa...","Gist: efficient data encoding for deep n...","Clank: Architectural Support for Intermi...","Accelerating distributed reinforcement l...","DSAGEN: synthesizing programmable spatia...","CoSA: scheduling by constrained optimiza...","TWiCe: preventing row-hammering by explo...","HeteroOS: OS Design for Heterogeneous Me...","The Reach Profiler (REAPER): Enabling th...","Duality cache for data parallel accelera...","ASIC clouds: specializing the datacenter","Secure Hierarchy-Aware Cache Replacement...","Architecting to achieve a billion reques...","Automatic generation of efficient accele...","Modular routing design for chiplet-based...","CoNDA: efficient cache coherence support...","The Mondrian Data Engine","Efficient invisible speculative executio...","SoftSKU: optimizing server architectures...","MGPUSim: enabling multi-GPU performance ...","Centaur: a chiplet-based, hybrid sparse-...","Warped-compression: enabling power effic...","Warped-slicer: efficient intra-SM slicin...","Laconic deep learning inference accelera...","Semantic locality and context-based pref...","Think fast: a tensor streaming processor...","A case for core-assisted bottleneck acce...","Xuantie-910: a commercial multi-core 12-...","A fully associative, tagless DRAM cache","Accelerating dependent cache misses with...","GraFboost: using accelerated flash stora...","Perceptron-based prefetch filtering","Flexible software profiling of GPU archi...","A multi-neural network acceleration arch...","Flexible auto-refresh: enabling scalable...","Quantitative comparison of hardware tran...","Bit-plane compression: transforming data...","SpinalFlow: an architecture and dataflow...","Sparsity-aware and re-configurable NPU a...","Language-level persistency","PACMAN: attacking ARM pointer authentica...","CAWA: coordinated warp scheduling and ca...","Genax: a genome sequencing accelerator","Enabling scientific computing on memrist...","AsmDB: understanding and mitigating fron...","Treadmill: attributing the source of tai...","MuonTrap: preventing cross-domain spectr...","DRQ: dynamic region-based quantization f...","Euphrates: algorithm-SoC co-design for l...","Mitigating wordline crosstalk using adap...","GANAX: a unified MIMD-SIMD acceleration ...","FLIN: enabling fairness and enhancing pe...","Dual-side sparse tensor core","CROW: a low-cost substrate for improving...","MicroScope: enabling microarchitectural ...","RaPiD: AI accelerator for ultra-low prec...","Hybrid TLB Coalescing: Improving TLB Tra...","Computation reuse in DNNs by exploiting ...","EVA2: exploiting temporal redundancy in ...","Bouquet of instruction pointers: instruc...","Snafu: an ultra-low-power, energy-minima...","ObfusMem: A Low-Overhead Access Obfuscat...","Architecting noisy intermediate-scale tr...","iPIM: programmable in-memory image proce...","Interplay between hardware prefetcher an...","BEAR: techniques for mitigating bandwidt...","PipeZK: accelerating zero-knowledge proo...","Hi-fi playback: tolerating position erro...","XED: exposing on-die error detection inf...","Mellow writes: extending lifetime in res...","Rethinking TLB Designs in Virtualized En...","Stash: have your scratchpad and cache it...","TIMELY: pushing data movements and inter...","InvisiMem: Smart Memory Defenses for Mem...","I see dead \u03bcops: leaking secrets via Int...","APPROX-NoC: A Data Approximation Framewo...","GraphSSD: graph semantics aware SSD","Evolution of the samsung exynos CPU micr...","FORMS: fine-grained polarized ReRAM-base...","Agile paging: exceeding the best of nest...","Access Pattern-Aware Cache Management fo...","Prediction based execution on deep neura...","Hydra: enabling low-overhead mitigation ...","Triad-NVM: persistency for integrity-pro...","GoSPA: an energy-efficient high-performa...","MnnFast: a fast and scalable system arch...","TIE: energy-efficient tensor train-based...","TENET: a framework for modeling tensor d...","Morpheus: creating application objects e...","HASCO: towards agile hardware and softwa...","Exploring the potential of heterogeneous...","Unified address translation for memory-m...","Anubis: ultra-low overhead and recovery ...","Eager pruning: algorithm and architectur...","Jenga: Software-Defined Cache Hierarchie...","RANA: towards efficient neural accelerat...","NN-baton: DNN workload orchestration and...","Harmonia: balancing compute and memory p...","Dynamic thread block launch: a lightweig...","DeepAttest: an end-to-end attestation fr...","QUAC-TRNG: high-throughput true random n...","GaaS-X: graph analytics accelerator supp...","Translation ranger: operating system sup...","Do-It-Yourself Virtual Memory Translatio...","Scheduling page table walks for irregula...","PolyGraph: exposing the value of flexibi...","The load slice core microarchitecture","FASE: finding amplitude-modulated side-c...","The locality descriptor: a holistic cros...","BioHD: an efficient genome sequence sear...","An in-network architecture for accelerat...","Demystifying the system vulnerability st...","DHTM: durable hardware transactional mem...","Energy-efficient video processing for vi...","Speculative data-oblivious execution: mo...","ARM virtualization: performance and arch...","Density tradeoffs of non-volatile memory...","Janus: optimizing memory and storage sup...","Using multiple input, multiple output fo...","Efficient metadata management for irregu...","PROMISE: an end-to-end design of a progr...","Cost-efficient overclocking in immersion...","Towards sustainable in-situ server syste...","HEB: deploying and managing hybrid energ...","CLR-DRAM: a low-cost DRAM architecture e...","2QAN: a quantum compiler for 2-local qub...","LaPerm: locality aware scheduler for dyn...","Virtual thread: maximizing thread-level ...","Guaranteeing local differential privacy ...","A quantum computational compiler and des...","SARA: scaling a reconfigurable dataflow ...","Rethinking belady's algorithm to accommo...","Hiding the Long Latency of Persist Barri...","Quality of Service Support for Fine-Grai...","EQC: ensembled quantum computing for var...","NEBULA: a neuromorphic spin-based ultra-...","Multiple clone row DRAM: a low latency a...","Towards statistical guarantees in contro...","Generative and multi-phase learning for ...","uGEMM: unary computing architecture for ...","DIMMining: pruning-efficient and paralle...","MeRLiN: Exploiting Dynamic Instruction B...","Spandex: a flexible interface for effici...","Buddy compression: enabling larger memor...","MITTS: memory inter-arrival time traffic...","Hardware Translation Coherence for Virtu...","EbDa: A New Theory on Design and Verific...","SeGraM: a universal hardware accelerator...","A software-defined tensor streaming mult...","Page overlays: an enhanced virtual memor...","Power attack defense: securing battery-b...","To PIM or not for emerging general purpo...","Stream-based memory access specializatio...","Bonsai: high-performance adaptive merge ...","Enhancing and exploiting contiguity for ...","A variable warp size architecture","PowerChief: Intelligent Power Allocation...","Chasing Away RAts: Semantics and Evaluat...","LogCA: A High-Level Performance Model fo...","Don't forget the I\u002fO when allocating you...","Opening pandora's box: a systematic stud...","Thermal time shifting: leveraging phase ...","ActivePointers: a case for software addr...","PrORAM: dynamic prefetcher for oblivious...","Exploiting dynamic timing slack for ener...","2B-SSD: the case for dual, byte- and blo...","RegMutex: inter-warp GPU register time-s...","Opportunistic computing in GPU architect...","NISQ+: boosting quantum computing power ...","SQUARE: strategic quantum ancilla reuse ...","Flex: high-availability datacenters with...","SPACE: locality-aware processing in hete...","CloudMonatt: an architecture for securit...","Parallel Automata Processor","Secure TLBs","The NeBuLa RPC-optimized architecture","Enabling compute-communication overlap i...","A stochastic-computing based deep learni...","Hyper-AP: enhancing associative processi...","Ripple: profile-guided instruction cache...","COP: to compress and protect main memory","SmartExchange: trading higher-cost memor...","Leaky buddies: cross-component covert ch...","Sibyl: adaptive and extensible data plac...","Geyser: a compilation framework for quan...","Nested enclave: supporting fine-grained ...","Near data acceleration with concurrent h...","ABC-DIMM: alleviating the bottleneck of ...","Exploiting long-distance interactions an...","A case for richer cross-layer abstractio...","SysScale: exploiting multi-domain dynami...","A hardware accelerator for tracing garba...","Albireo: energy-efficient acceleration o...","Themis: a network bandwidth-aware collec...","Practical memory safety with REST","Synchronized progress in interconnection...","Strober: fast and accurate sample-based ...","Designing vertical processors in monolit...","Cryogenic computer architecture modeling...","Axiomatic hardware-software contracts fo...","DynaSpAM: dynamic spatial architecture m...","Sieve: scalable in-situ DRAM-based accel...","ArMOR: defending against memory consiste...","Probable cause: the deanonymizing effect...","Fusion: design tradeoffs in coherent cac...","Boosting access parallelism to PCM-based...","Lazy persistency: a high-performing and ...","IntelliNoC: a holistic design framework ...","Perforated page: supporting fragmented m...","CODIC: a low-cost substrate for enabling...","MOESI-prime: preventing coherence-induce...","Lukewarm serverless functions: character...","Efficient execution of memory access pha...","Peak efficiency aware scheduling for hig...","CASH: supporting IaaS customers with a s...","Bespoke Processors for Applications with...","Criticality aware tiered cache hierarchy...","CryoCore: a fast and dense processor arc...","FlexMiner: a pattern-aware accelerator f...","APRES: improving cache efficiency by exp...","Viyojit: Decoupling Battery and DRAM Cap...","Robox: an end-to-end solution to acceler...","CHEx86: context-sensitive enforcement of...","JPEG-ACT: accelerating deep learning via...","Echo: compiler-based GPU memory footprin...","Confidential serverless made efficient w...","Aggressive Pipelining of Irregular Appli...","Genesis: a hardware acceleration framewo...","Gorgon: accelerating machine learning fr...","No-FAT: architectural support for low ov...","Manycore network interfaces for in-memor...","Generic system calls for GPUs","Relaxed persist ordering using strand pe...","Communication algorithm-architecture co-...","There's always a bigger fish: a clarifyi...","Coherence protocol for transparent manag...","All-inclusive ECC: thorough end-to-end p...","AccQOC: accelerating quantum optimal con...","Hoop: efficient hardware-assisted out-of...","The anytime automaton","DICE: Compressing DRAM Caches for Bandwi...","Software-hardware co-optimization for co...","A RISC-V in-network accelerator for flex...","VIP: virtualizing IP chains on handheld ...","Evaluation of an analog accelerator for ...","Exploiting page table locality for agile...","Unlimited vector extension with data str...","Energy efficient data encoding in DRAM c...","ACCORD: enabling associativity for gigas...","Stitch: fusible heterogeneous accelerato...","Rebooting virtual memory with midgard","SNS's not a synthesizer: a deep-learning...","Training personalized recommendation sys...","LAP: loop-block aware inclusion properti...","Scalable interconnects for reconfigurabl...","Printed microprocessors","D\u00e9j\u00e0 view: spatio-temporal compute reuse...","A specialized architecture for object se...","DRAF: a low-power DRAM-based reconfigura...","Get out of the valley: power-efficient a...","SecDir: a secure directory to defeat dir...","T4: compiling sequential code for effect...","Hardware-based domain virtualization for...","The virtual block interface: a flexible ...","Designing calibration and expressivity-e...","NDMiner: accelerating graph pattern mini...","Fractal: An Execution Model for Fine-Gra...","Bit-level perceptron prediction for indi...","Hardware-software co-design for brain-co...","CryoGuard: a near refresh-free robust DR...","MeNDA: a near-memory multi-way merge sol...","Base-victim compression: an opportunisti...","Non-Speculative Load-Load Reordering in ...","Division of labor: a more effective appr...","Flexon: a flexible digital neuron for ef...","Tiny but mighty: designing and realizing...","Reducing world switches in virtualized e...","SEESAW: using superpages to improve VIPT...","Adaptive memory-side last-level GPU cach...","XPC: architectural support for secure an...","The dark side of DNN pruning","Divide and conquer frontend bottleneck","Accelerated seeding for genome sequence ...","Large-scale graph processing on FPGAs wi...","IntroSpectre: a pre-silicon framework fo...","EDAM: edit distance tolerant approximate...","Asymmetry-aware work-stealing runtimes","The IBM zl5 high frequency mainframe bra...","A case for hardware-based demand paging","SHRINK: Reducing the ISA complexity via ...","Callback: efficient synchronization with...","ThermoGater: Thermally-Aware On-Chip Vol...","Tailored page sizes","A cost-effective entangling prefetcher f...","SpZip: architectural support for effecti...","INSPIRE: in-storage private information ...","Increasing ising machine capacity with m...","MGX: near-zero overhead memory protectio...","Accelerating asynchronous programs throu...","Hiding intermittent information leakage ...","A bus authentication and anti-probing ar...","ZnG: architecting GPU multi-processors w...","REDUCT: keep it close, keep it cool!: ef...","Satori: efficient and fair resource part...","HiveMind: a hardware-software system sta...","Efficient synonym filtering and scalable...","Efficient synonym filtering and scalable...","Decoupled Affine Computation for SIMT GP...","Exploring predictive replacement policie...","Fine-grained warm water cooling for impr...","NvMR: non-volatile memory renaming for i...","Computer performance microscopy with Shi...","SLIP: reducing wire energy in the memory...","MBus: an ultra-low power interconnect bu...","Architectural Support for Server-Side PH...","MorLog: morphable hardware logging for a...","Hetero-ViTAL: a virtualization stack for...","Failure sentinels: ubiquitous just-in-ti...","Cambricon-Q: a hybrid architecture for e...","ZeR\u00d8: zero-overhead resilient operation ...","t\u00e4k\u014d: a polymorphic cache hierarchy for ...","PPMLAC: high performance chipset archite...","A synthesis framework for stitching surf...","XQsim: modeling cross-technology control...","Protogen: automatically generating direc...","SCU: a GPU stream compaction unit for gr...","HALO: accelerating flow classification f...","Taming the zoo: the unified GraphIt comp...","MiSAR: minimalistic synchronization acce...","Rescuing uncorrectable fault patterns in...","Post-silicon CPU adaptation made practic...","Compact leakage-free support for integri...","Vector runahead","IChannels: exploiting current management...","RACOD: algorithm\u002fhardware co-design for ...","There and Back Again: Optimizing the Int...","Footprint: Regulating Routing Adaptivene...","Aurochs: an architecture for dataflow th...","Superconducting computing with alternati...","TDGraph: a topology-driven accelerator f...","uBrain: a unary brain computer interface","Cascading structured pruning: enabling h...","A Programmable Galois Field Processor fo...","Yukta: multilayer resource controllers t...","Nonblocking memory refresh","Mobilizing the micro-ops: exploiting con...","OO- VR: NUMA friendly object-oriented VR...","InvisiPage: oblivious demand paging for ...","GraphABCD: scaling out graph analytics w...","Packet chasing: spying on network packet...","Thermometer: profile-guided btb replacem...","Emerald: graphics modeling for SoC syste...","SOFF: an OpenCL high-level synthesis fra...","GCoM: a detailed GPU core model for accu...","Accelerating markov random field inferen...","Scaling datacenter accelerators with com...","BabelFish: fusing address translations f...","\u03b7-LSTM: co-designing highly-efficient la...","A Programmable Hardware Accelerator for ...","Securing GPU via region-based bounds che...","FlexiCores: low footprint, high yield, f...","Efficiently scaling out-of-order cores f...","XPro: A Cross-End Processing Architectur...","Accelerating GPU Hardware Transactional ...","Focused value prediction","Efficiently supporting dynamic task para...","TimeCache: using time to eliminate cache...","Accelerating database analytic query wor...","Master of none acceleration: a compariso...","Tvarak: software-managed hardware offloa...","Efficient multi-GPU shared memory via au...","BlockMaestro: enabling programmer-transp...","Gearbox: a case for supporting accumulat...","RelaxFault memory repair","Architecting a stochastic computing unit...","Linebacker: preserving victim cache line...","Free atomics: hardware atomic operations...","Clean: a race detector with cleaner sema...","CHARSTAR: Clock Hierarchy Aware Resource...","Non-speculative store coalescing in tota...","PES: proactive event scheduling for resp...","AxMemo: hardware-compiler co-design for ...","Using SMT to accelerate nested virtualiz...","Energy efficiency boost in the AI-infuse...","SIMD2: a generalized matrix instruction ...","Cost-effective speculative scheduling in...","Heat to power: thermal energy harvesting...","RingCNN: exploiting algebraically-sparse...","X-cache: a modular architecture for doma...","EyeCoD: eye tracking system acceleration...","DCS-ctrl: a fast and flexible device-con...","PMNet: in-network data persistence","A scalable architecture for reprioritizi...","Dynamic global adaptive routing in high-...","Opportunistic competition overhead reduc...","MTraceCheck: Validating Non-Deterministi...","Redundant Memory Array Architecture for ...","Slipstream processors revisited: exploit...","Dv\u00e9: improving DRAM reliability and perf...","CaSMap: agile mapper for reconfigurable ...","Branch vanguard: decomposing branch func...","ShortCut: Architectural Support for Fast...","Filter caching for free: the untapped po...","High-performance deep-learning coprocess...","Mocktails: capturing the memory behaviou...","HieraGen: automated generation of concur...","ScoRD: a scoped race detector for GPUs","NVOverlay: enabling efficient and scalab...","PS-ORAM: efficient crash consistency sup...","ASAP: architecture support for asynchron...","Register file prefetching","HetCore: TFET-CMOS hetero-device archite...","PF-DRAM: a precharge-free DRAM structure","FastTrack: leveraging heterogeneous FPGA...","Time squeezing for tiny devices","Flick: fast and lightweight ISA-crossing...","TransForm: formally specifying transiste...","Managing reliability skew in DNA storage","Virtual melting temperature: managing se...","Hardware supported permission checks on ...","Retracted on May 10, 2023: TPShare: a ti...","Quantifying server memory frequency marg...","NASA: accelerating neural network design...","SoftVN: efficient memory protection via ...","Anticipating and eliminating redundant c...","PowerChop: identifying and managing non-...","Future vector microprocessor extensions ...","Retracted on January 26, 2021: 3D-based ...","FaultHound: value-locality-based soft-fa...","Short-circuit dispatch: accelerating vir...","TCEP: traffic consolidation for energy-p...","Data compression accelerator on IBM POWE...","Auto-predication of critical branches","Lelantus: fine-granularity copy-on-write...","Check-in: in-storage checkpointing for k...","Independent forward progress of work-gro...","Supporting legacy libraries on non-volat...","Revamping storage class memory with hard...","NASGuard: a novel accelerator architectu...","Charm: a language for closed-form high-l...","FFCCD: fence-free crash-consistent concu...","HyperTRIO: hyper-tenant translation of I...","Commutative data reordering: a new techn...","Zero inclusion victim: isolating core ca...","LaZy superscalar","Lemonade from Lemons: Harnessing Device ...","Maya: using formal control to obfuscate ...","LightPC: hardware and software co-design...","Rethinking programmable earable processo...","Production-run software failure diagnosi...","Bouncer: static program analysis in hard...","Execution dependence extension (EDE): is...","Decoupling loads for nano-instruction se...","BOSS: bandwidth-optimized search acceler...","Constructing a weak memory model","Dynamic memory dependence predication","Space-time algebra: a model for neocorti...","Speculative vectorisation with selective...","Ghost routing to enable oblivious comput..."],"y":{"dtype":"f8","bdata":"ZwqTZWVUcT\u002f3zJIANbWAP3w7hwfUaIg\u002fcj4KEU\u002f6jz9CUszAT7yTPw9oRMQoY5c\u002f8FH7hB73mj\u002fH6sFqTW+eP1WaDkmf6aA\u002fxiHoVnKPoj8aCzi7HTCkPzibCMee0KU\u002ftTcttTRwpz9CfCgdIQ2pPyY+I4Nvqao\u002fsL5eunlFrD\u002fL5c9PItatP71EsAJgYq8\u002feRDsDBR3sD944cfLrDuxP4i5fqMY\u002frE\u002faPIMm3q9sj9\u002foq2ok3yzPxgGtaroObQ\u002fglL2jgH2tD\u002fukwFyUrC1P2qy8\u002fQZabY\u002f\u002fcodEsshtz98qaLvo9e3P+sFg5G1i7g\u002fFjNa+A4\u002fuT9J4zGyTfK5Pxw1+ULVo7o\u002fJvIGk0BVuz8YSrsLcwa8Pwdnwr+gtrw\u002f\u002fAB+b85mvT\u002f4xNJxSRa+P9WgzUmmxb4\u002f7u3icsZ0vz\u002fy7Y281BHAP3GjhD1GacA\u002ff7X7OSvAwD+fykhB4BbBPxu2ai2FbcE\u002fOhlUzw3DwT+t2y6hcxjCP51fL3XZbcI\u002fSw3SKxzDwj9\u002f\u002fNo3TRjDP03XCeujbMM\u002fw2ekC9XAwz9e73g\u002f4BTEP3k7oVvYaMQ\u002fX1UtUQy8xD9Zb3HgFw\u002fFP5Pu6RUPYsU\u002fNz6G0\u002fG0xT\u002flDacBwAfGPxolNr9kWsY\u002fTUKJmfSsxj\u002fjPs0kRf\u002fGP5UyBFuAUcc\u002fjJRH5nqjxz9\u002fGk3n8PTHPzZEVGJQRsg\u002ffLK7cIKXyD+LU08UWOjIP9FrAvjmOMk\u002flEXb3XWJyT91lkLo7NnJPy2IG8wzKso\u002fYjsasnp6yj8\u002fpzfbkMrKP5TjILF1Gss\u002f6R8Kh1pqyz+Px8o39LnLP2vajYp0Ccw\u002fxK523\u002fRYzD84g9b7QajMP47sqj1198w\u002fgo4mfI5GzT899aj1B5XNP8x5JDBm480\u002fUZ+YgFUxzj\u002fXxAzRRH\u002fOP4Peqe0Xzc4\u002fsTYhCOsazz\u002fgjpgivmjPPwD2iNN0ts8\u002f6gTHdAcC0D8WrrZ+1CjQPwK7WCCTT9A\u002fEznVPkN20D9m1j5c85zQPwHTvdyUw9A\u002f3u4pXDbq0D\u002fsta0hyRDRP3eK9xEvN9E\u002fyQK+jHZd0T\u002faW5cIvoPRP44nlhz2qdE\u002fpwx8Qv\u002fP0T8QjvmD+PXRPzfwicbxG9I\u002fXlIaCetB0j+C+gtH1GfSP6Wi\u002fYS9jdI\u002fyErvwqaz0j9uutqUb9nSPxMqxmY4\u002f9I\u002fH3MIls8k0z\u002fpnF3GZkrTP7TGsvb9b9M\u002fmadAVISV0z9+iM6xCrvTP2JpXA+R4NM\u002fL+iUstIF1D+6R+BWFCvUPyS8fGxEUNQ\u002f0E8GgXR11D87xKKWpJrUP4jQxBixv9Q\u002fHUIRYpnk1D9wlHCsgQnVP8Fs+JlXLtU\u002fyW7bARtT1T\u002fQcL5p3nfVP9dyodGhnNU\u002fU7\u002fGhlLB1T9ElgJb8OXVPxjFM+dnCtY\u002fLhNSct8u1j+6qwOSQ1PWP4ZjorCnd9Y\u002fCEtgZuSb1j+KMh4cIcDWPxBdL9FJ5NY\u002flodAhnII1z8cslE7myzXP2C9dfHDUNc\u002fMgdQcth01z\u002fCMT307JjXP4WN5W\u002fYvNc\u002fSOmN68Pg1z8LRTZnrwTYPzG6jgFxKNg\u002fFhD6nDJM2D+Cp2Dy3m\u002fYP+4+x0eLk9g\u002fW9YtnTe32D\u002feJ6dzztrYPw0p\u002f41P\u002ftg\u002f+gpqqdAh2T\u002fo7NTEUUXZPxZ06ua8aNk\u002fA9wSCiiM2T\u002fwQzstk6\u002fZP92rY1D+0tk\u002fXo8VPlP22T8PYdG0kRnaP8U1UHa5PNo\u002fewrPN+Ff2j9y\u002fjr4CIPaP0yH18AZpto\u002fZy9hiCrJ2j9BuP1QO+zaP1xghxhMD9s\u002fm8I2IS4y2z8KWW5N4FTbP4wsSBx6d9s\u002fDgAi6xOa2z+P0\u002fu5rbzbP02PO98u39s\u002fC0t7BLAB3D\u002f096swGCTcP\u002fWeWBRnRtw\u002f9kUF+LVo3D+CLpNB64rcP082Doogrdw\u002fHUSE4jvP3D\u002frUfo6V\u002fHcP3hAg5RyE90\u002fRk757I013T+NJoT\u002fjlfdPxYe\u002fBCQed0\u002fXPaGI5Gb3T+jzhE2kr3dPyzGiUeT390\u002faKr3uXkB3j8Y3BYyRSPePwktI6kQRd4\u002f+30vINxm3j\u002fi2m8+jIjeP8ZAQaMgqt4\u002f68X\u002fBrXL3j\u002fPK9FrSe3eP7KRotDdDt8\u002f1xZhNHIw3z+C4Od76lHfPy2qbsNic98\u002f13P1CtuU3z+9LqriGbbfP2HKcbtY198\u002fR4Umk5f43z92EPc16wzgP8eC3vV7HeA\u002fQ2P90f0t4D+\u002fQxyufz7gP5qUxIoBT+A\u002fceuoSXRf4D+oshYJ52\u002fgP38J+8dZgOA\u002fIv4fL72Q4D\u002fG8kSWIKHgPwp34PyDseA\u002frWsFZOfB4D9RYCrLStLgP\u002fRUTzKu4uA\u002fcqQzMPLy4D\u002fw8xcuNgPhP6DOyxdqE+E\u002f8Tj2AJ4j4T+hE6rq0TPhP\u002fF91NMFROE\u002fQuj+vDlU4T\u002fywrKmbWThP0It3Y+hdOE\u002f8geRedWE4T9DcrtiCZXhP0KJmfQspeE\u002f4y\u002fuhVC14T\u002fiRswXdMXhP+JdqqmX1eE\u002f4nSIO7vl4T8xKE8wzvXhP4HbFSXhBeI\u002f0Y7cGfQV4j9S7a0q9iXiP9RLfzv4NeI\u002f9znHS\u002fpF4j95mJhc\u002fFXiP\u002fv2aW3+ZeI\u002f9\u002fylT+914j9Sc2sy4IXiP055pxTRleI\u002fSX\u002fj9sGl4j9ISNdeobXiP+igQcaAxeI\u002f2DV4ZE7V4j9pWiUCHOXiP\u002fp+0p\u002fp9OI\u002fjkaeIaUE4z+BfvOjYBTjP+8dgbQJJOM\u002fkAhO\u002fJ8z4z\u002fSgpFDNkPjP3RtXovMUuM\u002fSCMTrk9i4z8d2cfQ0nHjP0moXW9CgeM\u002fdXfzDbKQ4z+hRomsIaDjP80VH0uRr+M\u002f+uS06QC\u002f4z8mtEqIcM7jP1KD4Cbg3eM\u002fflJ2xU\u002ft4z+qIQxkv\u002fzjP9bwoQIvDOQ\u002f4xRQuoob5D\u002fvOP5x5irkP\u002fxcrClCOuQ\u002fCYFa4Z1J5D8VpQiZ+VjkPyLJtlBVaOQ\u002fLu1kCLF35D+rHhdy+IbkPydQyds\u002fluQ\u002fo4F7RYel5D8fsy2vzrTkP5vk3xgWxOQ\u002fGBaSgl3T5D+UR0TspOLkPxW8mpzX8eQ\u002f9aB6TQoB5T92FdH9PBDlP1b6sK5vH+U\u002f1tSJNo0u5T9Vr2K+qj3lP3YZskXITOU\u002f9vOKzeVb5T91zmNVA2vlP9ddtT8LeuU\u002fl12QKhOJ5T\u002f47OEUG5jlP1l8M\u002f8ip+U\u002f4\u002fwx0xS25T8ODaemBsXlPzkdHHr40+U\u002fZC2RTeri5T8qtuyKxfHlP06v0cigAOY\u002fFDgtBnwP5j\u002fZwIhDVx7mP1FWL2YbLeY\u002fyevViN875j9BgXyro0rmP7kWI85nWeY\u002fMazJ8Cto5j+pQXAT8HbmPxXDy4+cheY\u002fgUQnDEmU5j\u002fuxYKI9aLmP7m3ZwWiseY\u002fJTnDgU7A5j9QQczF4s7mP9u5Xgp33eY\u002fZTLxTgvs5j\u002fwqoOTn\u002frmPxuzjNczCec\u002fpisfHMgX5z8wpLFgXCbnP7ocRKXwNOc\u002f+AGCGGxD5z8257+L51HnP3TM\u002ff5iYOc\u002fsrE7ct5u5z\u002fwlnnlWX3nP6Cp3Oa7i+c\u002fULw\u002f6B2a5z8Az6Lpf6jnP7DhBevhtuc\u002fAYTf60PF5z\u002fufkrSi9PnP3wJLLjT4ec\u002faQSXnhvw5z\u002f3jniEY\u002f7nPzlGJJ2QDOg\u002f9IeyLKIa6D9RWbe7syjoP60qvErFNug\u002faGxK2tZE6D\u002fFPU9p6FLoP+4vkqjdYOg\u002fFyLV59Ju6D\u002fho44myHzoP\u002fukm0Ogiug\u002fFqaoYHiY6D8wp7V9UKboP0qowpootOg\u002fZanPtwDC6D9\u002fqtzU2M\u002foP5qr6fGw3eg\u002ftKz2Donr6D\u002fOrQMsYfnoP2+TDUgbB+k\u002fEHkXZNUU6T+xXiGAjyLpP7G0tJxJMOk\u002fUZq+uAM+6T\u002fyf8jUvUvpP5Nl0vB3Wek\u002fYXwpIBNn6T\u002fQIvdOrnTpP0DJxH1Jguk\u002fDuAbreSP6T99hunbf53pP+wstwobq+k\u002fBiYHUZa46T8hH1eXEcbpP5qIMN6M0+k\u002ftIGAJAjh6T\u002fPetBqg+7pP+lzILH+++k\u002fA21w93kJ6j8dZsA99RbqPzhfEIRwJOo\u002fUlhgyusx6j\u002fLwTkRZz\u002fqP+W6iVfiTOo\u002fALTZnV1a6j8Z9sXvt2fqPzI4skESdeo\u002fS3qek2yC6j9lvIrlxo\u002fqPypR7CT\u002fnOo\u002f8OVNZDeq6j+1eq+jb7fqP3oPEeOnxOo\u002fQKRyIuDR6j9lqV1iGN\u002fqPyo+v6FQ7Oo\u002fhfGRnGX56j\u002ffpGSXegbrPzpYN5KPE+s\u002flAsKjaQg6z\u002fuvtyHuS3rP0lyr4LOOus\u002foyWCfeNH6z\u002fbUzXr01TrPxOC6FjEYes\u002fqSAlx7Ru6z\u002fhTtg0pXvrPxl9i6KViOs\u002fsBvIEIaV6z\u002fnSXt+dqLrPx94Luxmr+s\u002fVqbhWVe86z\u002f16rvZIcnrP5Mvllns1es\u002f0gPn2Lbi6z+otrrtWe\u002frP39pjgL9++s\u002f9qvYFqAI7D\u002fMXqwrQxXsPxZiKDm9Iew\u002fYWWkRjcu7D8K2alUsTrsPzX3IJwAR+w\u002fYBWY409T7D+LMw8rn1\u002fsP7ZRhnLua+w\u002f4G\u002f9uT147D8LjnQBjYTsPzas60jckOw\u002fxcNs4\u002f6c7D\u002f70+288qjsPzHkbpbmtOw\u002fZ\u002fTvb9rA7D+cBHFJzszsP6R41BmR2Ow\u002fTHyu6VPk7D9U8BG6FvDsP1tkdYrZ++w\u002fs318zWgH7T8Kl4MQ+BLtP8EgFFSHHu0\u002fGDoblxYq7T9wUyLapTXtP8dsKR01Qe0\u002fH4YwYMRM7T92nzejU1jtP\u002ftv64+sY+0\u002fIdAVfAVv7T9HMEBoXnrtP22QalS3he0\u002fkvCUQBCR7T92gMe6L5ztP\u002fufcDRPp+0\u002f3y+jrm6y7T\u002fDv9Uojr3tP\u002fz0yLRwyO0\u002fNSq8QFPT7T9tX6\u002fMNd7tP6aUolgY6e0\u002f38mV5Prz7T8Y\u002f4hw3f7tP\u002frPmh9\u002fCe4\u002ffjAjziAU7j9hATV9wh7uP0TSRixkKe4\u002fJ6NY2wU07j+qA+GJpz7uP43U8jhJSe4\u002fcKUE6OpT7j9TdhaXjF7uP9bWnkUuae4\u002fuaew9M9z7j\u002fd6ahMLH7uPwIsoaSIiO4\u002fCo8XgZqS7j8T8o1drJzuP3vFjTq+pu4\u002fhCgEF9Cw7j\u002fs+wP04bruPwCOpV6jxO4\u002fFCBHyWTO7j8osugzJtjuPzxEip7n4e4\u002f8WWiCKnr7j8F+ENzavXuPxmK5d0r\u002f+4\u002fcjCM1ZUI7z\u002fL1jLN\u002fxHvPyN92cRpG+8\u002flQ\u002fZ8XMk7z9mEmIffi3vPzcV60yINu8\u002fqafqeZI\u002f7z96qnOnnEjvP+w8c9SmUe8\u002fvT\u002f8AbFa7z+OQoUvu2PvPwDVhFzFbO8\u002f0dcNis917z9Dag232X7vPzNZXQB6h+8\u002fIkitSRqQ7z8lDMYyRJjvPyfQ3htuoO8\u002fyiNuBJio7z+49Iu5O7DvP0ZVIG7ft+8\u002fNCY+I4O\u002f7z\u002fChtLXJsfvP6CnmZ8vzu8\u002f3Mi+KYHU7z92Wm200trvP7J7kj4k4e8\u002ffs7QhJXm7z\u002frsIXKBuzvPzPw6O5W8O8\u002f2p\u002fVE6f07z8i3zg49\u002fjvP8qOJV1H\u002fe8\u002fAAAAAAAA8D8="},"type":"scatter","xaxis":"x","yaxis":"y2"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermap":[{"type":"scattermap","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,0.94],"title":{"text":"Paper Title"},"tickangle":45},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"\u003cb\u003eCitation Count\u003c\u002fb\u003e"},"type":"log"},"yaxis2":{"anchor":"x","overlaying":"y","side":"right","title":{"text":"\u003cb\u003eCumulative %\u003c\u002fb\u003e"},"tickformat":".0%","showgrid":false},"title":{"text":"Citation Counts and Cumulative Distribution"},"legend":{"x":0.7,"y":0.9},"margin":{"b":150,"t":120},"hovermode":"closest","updatemenus":[{"buttons":[{"args":[{"yaxis.type":"linear"}],"label":"Linear Scale","method":"relayout"},{"args":[{"yaxis.type":"log"}],"label":"Log Scale","method":"relayout"}],"direction":"left","pad":{"r":10,"t":10},"showactive":true,"type":"buttons","x":0.0,"xanchor":"left","y":1.15,"yanchor":"top"}],"height":700},                        {"responsive": true}                    )                };            </script>        </div>
    </div>

    <hr>


<div style="margin: 20px; font-family: Arial, sans-serif;">
    <button id="toggle-btn" onclick="toggleList()" style="padding: 10px 20px; font-size: 16px; cursor: pointer;">Hide Paper List</button>
</div>


<div id="paper-list-container" style="display: block; font-family: Arial, sans-serif; margin: 20px;">

<h3>Stats by year</h3>

<table border="1">
  <thead>
    <tr style="text-align: right;">
      <th>year</th>
      <th>npapers</th>
      <th>% of citations</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>2015</td>
      <td>24</td>
      <td>10.72</td>
    </tr>
    <tr>
      <td>2016</td>
      <td>24</td>
      <td>22.42</td>
    </tr>
    <tr>
      <td>2017</td>
      <td>23</td>
      <td>17.83</td>
    </tr>
    <tr>
      <td>2018</td>
      <td>20</td>
      <td>6.75</td>
    </tr>
    <tr>
      <td>2019</td>
      <td>24</td>
      <td>4.95</td>
    </tr>
    <tr>
      <td>2020</td>
      <td>18</td>
      <td>5.06</td>
    </tr>
    <tr>
      <td>2021</td>
      <td>15</td>
      <td>3.68</td>
    </tr>
    <tr>
      <td>2022</td>
      <td>5</td>
      <td>1.35</td>
    </tr>
    <tr>
      <td>2023</td>
      <td>6</td>
      <td>1.72</td>
    </tr>
    <tr>
      <td>2024</td>
      <td>1</td>
      <td>0.62</td>
    </tr>
    <tr>
      <td>2025</td>
      <td>0</td>
      <td>0.00</td>
    </tr>
  </tbody>
</table>

    <h3>Paper List (Sorted by Citations)</h3>
    <ul>
        <li><a href="https://scholar.google.com/scholar?q=In-Datacenter%20Performance%20Analysis%20of%20a%20Tensor%20Processing%20Unit" target="_blank" style="text-decoration: none; color: #3366cc;">In-Datacenter Performance Analysis of a Tensor Processing Unit</a> - <b>3.834102656</b> citations</li><li><a href="https://scholar.google.com/scholar?q=EIE%3A%20efficient%20inference%20engine%20on%20compressed%20deep%20neural%20network" target="_blank" style="text-decoration: none; color: #3366cc;">EIE: efficient inference engine on compressed deep neural network</a> - <b>3.558948446</b> citations</li><li><a href="https://scholar.google.com/scholar?q=ISAAC%3A%20a%20convolutional%20neural%20network%20accelerator%20with%20in-situ%20analog%20arithmetic%20in%20crossbars" target="_blank" style="text-decoration: none; color: #3366cc;">ISAAC: a convolutional neural network accelerator with in-situ analog arithmetic in crossbars</a> - <b>3.40790054</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Eyeriss%3A%20a%20spatial%20architecture%20for%20energy-efficient%20dataflow%20for%20convolutional%20neural%20networks" target="_blank" style="text-decoration: none; color: #3366cc;">Eyeriss: a spatial architecture for energy-efficient dataflow for convolutional neural networks</a> - <b>3.348888723</b> citations</li><li><a href="https://scholar.google.com/scholar?q=PRIME%3A%20a%20novel%20processing-in-memory%20architecture%20for%20neural%20network%20computation%20in%20ReRAM-based%20main%20memory" target="_blank" style="text-decoration: none; color: #3366cc;">PRIME: a novel processing-in-memory architecture for neural network computation in ReRAM-based main memory</a> - <b>3.315760491</b> citations</li><li><a href="https://scholar.google.com/scholar?q=SCNN%3A%20An%20Accelerator%20for%20Compressed-sparse%20Convolutional%20Neural%20Networks" target="_blank" style="text-decoration: none; color: #3366cc;">SCNN: An Accelerator for Compressed-sparse Convolutional Neural Networks</a> - <b>3.231724383</b> citations</li><li><a href="https://scholar.google.com/scholar?q=ShiDianNao%3A%20shifting%20vision%20processing%20closer%20to%20the%20sensor" target="_blank" style="text-decoration: none; color: #3366cc;">ShiDianNao: shifting vision processing closer to the sensor</a> - <b>3.166430114</b> citations</li><li><a href="https://scholar.google.com/scholar?q=A%20scalable%20processing-in-memory%20accelerator%20for%20parallel%20graph%20processing" target="_blank" style="text-decoration: none; color: #3366cc;">A scalable processing-in-memory accelerator for parallel graph processing</a> - <b>3.070407322</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Cnvlutin%3A%20ineffectual-neuron-free%20deep%20neural%20network%20computing" target="_blank" style="text-decoration: none; color: #3366cc;">Cnvlutin: ineffectual-neuron-free deep neural network computing</a> - <b>3.000434077</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Minerva%3A%20enabling%20low-power%2C%20highly-accurate%20deep%20neural%20network%20accelerators" target="_blank" style="text-decoration: none; color: #3366cc;">Minerva: enabling low-power, highly-accurate deep neural network accelerators</a> - <b>2.916453949</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Bit%20fusion%3A%20bit-level%20dynamically%20composable%20architecture%20for%20accelerating%20deep%20neural%20networks" target="_blank" style="text-decoration: none; color: #3366cc;">Bit fusion: bit-level dynamically composable architecture for accelerating deep neural networks</a> - <b>2.880813592</b> citations</li><li><a href="https://scholar.google.com/scholar?q=A%20configurable%20cloud-scale%20DNN%20processor%20for%20real-time%20AI" target="_blank" style="text-decoration: none; color: #3366cc;">A configurable cloud-scale DNN processor for real-time AI</a> - <b>2.879669206</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Heracles%3A%20improving%20resource%20efficiency%20at%20scale" target="_blank" style="text-decoration: none; color: #3366cc;">Heracles: improving resource efficiency at scale</a> - <b>2.873320602</b> citations</li><li><a href="https://scholar.google.com/scholar?q=MLPerf%20inference%20benchmark" target="_blank" style="text-decoration: none; color: #3366cc;">MLPerf inference benchmark</a> - <b>2.854913022</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Profiling%20a%20warehouse-scale%20computer" target="_blank" style="text-decoration: none; color: #3366cc;">Profiling a warehouse-scale computer</a> - <b>2.850646235</b> citations</li><li><a href="https://scholar.google.com/scholar?q=PIM-enabled%20instructions%3A%20a%20low-overhead%2C%20locality-aware%20processing-in-memory%20architecture" target="_blank" style="text-decoration: none; color: #3366cc;">PIM-enabled instructions: a low-overhead, locality-aware processing-in-memory architecture</a> - <b>2.848804701</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Neurocube%3A%20a%20programmable%20digital%20neuromorphic%20architecture%20with%20high-density%203D%20memory" target="_blank" style="text-decoration: none; color: #3366cc;">Neurocube: a programmable digital neuromorphic architecture with high-density 3D memory</a> - <b>2.770115295</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Ten%20lessons%20from%20three%20generations%20shaped%20Google%27s%20TPUv4i" target="_blank" style="text-decoration: none; color: #3366cc;">Ten lessons from three generations shaped Google's TPUv4i</a> - <b>2.739572344</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Neural%20cache%3A%20bit-serial%20in-cache%20acceleration%20of%20deep%20neural%20networks" target="_blank" style="text-decoration: none; color: #3366cc;">Neural cache: bit-serial in-cache acceleration of deep neural networks</a> - <b>2.736396502</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Scalpel%3A%20Customizing%20DNN%20Pruning%20to%20the%20Underlying%20Hardware%20Parallelism" target="_blank" style="text-decoration: none; color: #3366cc;">Scalpel: Customizing DNN Pruning to the Underlying Hardware Parallelism</a> - <b>2.718501689</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Cambricon%3A%20an%20instruction%20set%20architecture%20for%20neural%20networks" target="_blank" style="text-decoration: none; color: #3366cc;">Cambricon: an instruction set architecture for neural networks</a> - <b>2.688419822</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Maximizing%20CNN%20Accelerator%20Efficiency%20Through%20Resource%20Partitioning" target="_blank" style="text-decoration: none; color: #3366cc;">Maximizing CNN Accelerator Efficiency Through Resource Partitioning</a> - <b>2.646403726</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Accel-sim%3A%20an%20extensible%20simulation%20framework%20for%20validated%20GPU%20modeling" target="_blank" style="text-decoration: none; color: #3366cc;">Accel-sim: an extensible simulation framework for validated GPU modeling</a> - <b>2.64246452</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Plasticine%3A%20A%20Reconfigurable%20Architecture%20For%20Parallel%20Paterns" target="_blank" style="text-decoration: none; color: #3366cc;">Plasticine: A Reconfigurable Architecture For Parallel Paterns</a> - <b>2.618048097</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Firesim%3A%20FPGA-accelerated%20cycle-exact%20scale-out%20system%20simulation%20in%20the%20public%20cloud" target="_blank" style="text-decoration: none; color: #3366cc;">Firesim: FPGA-accelerated cycle-exact scale-out system simulation in the public cloud</a> - <b>2.600972896</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Biscuit%3A%20a%20framework%20for%20near-data%20processing%20of%20big%20data%20workloads" target="_blank" style="text-decoration: none; color: #3366cc;">Biscuit: a framework for near-data processing of big data workloads</a> - <b>2.57634135</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Transparent%20offloading%20and%20mapping%20%28TOM%29%3A%20enabling%20programmer-transparent%20near-data%20processing%20in%20GPU%20systems" target="_blank" style="text-decoration: none; color: #3366cc;">Transparent offloading and mapping (TOM): enabling programmer-transparent near-data processing in GPU systems</a> - <b>2.555094449</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Hardware%20architecture%20and%20software%20stack%20for%20PIM%20based%20on%20commercial%20DRAM%20technology" target="_blank" style="text-decoration: none; color: #3366cc;">Hardware architecture and software stack for PIM based on commercial DRAM technology</a> - <b>2.553883027</b> citations</li><li><a href="https://scholar.google.com/scholar?q=RecNMP%3A%20accelerating%20personalized%20recommendation%20with%20near-memory%20processing" target="_blank" style="text-decoration: none; color: #3366cc;">RecNMP: accelerating personalized recommendation with near-memory processing</a> - <b>2.514547753</b> citations</li><li><a href="https://scholar.google.com/scholar?q=UCNN%3A%20exploiting%20computational%20reuse%20in%20deep%20neural%20networks%20via%20weight%20repetition" target="_blank" style="text-decoration: none; color: #3366cc;">UCNN: exploiting computational reuse in deep neural networks via weight repetition</a> - <b>2.489958479</b> citations</li><li><a href="https://scholar.google.com/scholar?q=MCM-GPU%3A%20Multi-Chip-Module%20GPUs%20for%20Continued%20Performance%20Scalability" target="_blank" style="text-decoration: none; color: #3366cc;">MCM-GPU: Multi-Chip-Module GPUs for Continued Performance Scalability</a> - <b>2.480006943</b> citations</li><li><a href="https://scholar.google.com/scholar?q=ScaleDeep%3A%20A%20Scalable%20Compute%20Architecture%20for%20Learning%20and%20Evaluating%20Deep%20Networks" target="_blank" style="text-decoration: none; color: #3366cc;">ScaleDeep: A Scalable Compute Architecture for Learning and Evaluating Deep Networks</a> - <b>2.478566496</b> citations</li><li><a href="https://scholar.google.com/scholar?q=RedEye%3A%20analog%20ConvNet%20image%20sensor%20architecture%20for%20continuous%20mobile%20vision" target="_blank" style="text-decoration: none; color: #3366cc;">RedEye: analog ConvNet image sensor architecture for continuous mobile vision</a> - <b>2.45484486</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Back%20to%20the%20future%3A%20leveraging%20Belady%27s%20algorithm%20for%20improved%20cache%20replacement" target="_blank" style="text-decoration: none; color: #3366cc;">Back to the future: leveraging Belady's algorithm for improved cache replacement</a> - <b>2.45331834</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Data%20reorganization%20in%20memory%20using%203D-stacked%20DRAM" target="_blank" style="text-decoration: none; color: #3366cc;">Data reorganization in memory using 3D-stacked DRAM</a> - <b>2.450249108</b> citations</li><li><a href="https://scholar.google.com/scholar?q=BlueDBM%3A%20an%20appliance%20for%20big%20data%20analytics" target="_blank" style="text-decoration: none; color: #3366cc;">BlueDBM: an appliance for big data analytics</a> - <b>2.436162647</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Pioneering%20chiplet%20technology%20and%20design%20for%20the%20AMD%20EPYC%E2%84%A2and%20Ryzen%E2%84%A2processor%20families" target="_blank" style="text-decoration: none; color: #3366cc;">Pioneering chiplet technology and design for the AMD EPYC™and Ryzen™processor families</a> - <b>2.436162647</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Stream-Dataflow%20Acceleration" target="_blank" style="text-decoration: none; color: #3366cc;">Stream-Dataflow Acceleration</a> - <b>2.426511261</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Redundant%20memory%20mappings%20for%20fast%20access%20to%20large%20memories" target="_blank" style="text-decoration: none; color: #3366cc;">Redundant memory mappings for fast access to large memories</a> - <b>2.424881637</b> citations</li><li><a href="https://scholar.google.com/scholar?q=CraterLake%3A%20a%20hardware%20accelerator%20for%20efficient%20unbounded%20computation%20on%20encrypted%20data" target="_blank" style="text-decoration: none; color: #3366cc;">CraterLake: a hardware accelerator for efficient unbounded computation on encrypted data</a> - <b>2.421603927</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Revisiting%20RowHammer%3A%20an%20experimental%20analysis%20of%20modern%20DRAM%20devices%20and%20mitigation%20techniques" target="_blank" style="text-decoration: none; color: #3366cc;">Revisiting RowHammer: an experimental analysis of modern DRAM devices and mitigation techniques</a> - <b>2.418301291</b> citations</li><li><a href="https://scholar.google.com/scholar?q=ACT%3A%20designing%20sustainable%20computer%20systems%20with%20an%20architectural%20carbon%20modeling%20tool" target="_blank" style="text-decoration: none; color: #3366cc;">ACT: designing sustainable computer systems with an architectural carbon modeling tool</a> - <b>2.418301291</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Sparse%20ReRAM%20engine%3A%20joint%20exploration%20of%20activation%20and%20weight%20sparsity%20in%20compressed%20neural%20networks" target="_blank" style="text-decoration: none; color: #3366cc;">Sparse ReRAM engine: joint exploration of activation and weight sparsity in compressed neural networks</a> - <b>2.403120521</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Energy-efficient%20neural%20network%20accelerator%20based%20on%20outlier-aware%20low-precision%20computation" target="_blank" style="text-decoration: none; color: #3366cc;">Energy-efficient neural network accelerator based on outlier-aware low-precision computation</a> - <b>2.397940009</b> citations</li><li><a href="https://scholar.google.com/scholar?q=DeepRecSys%3A%20a%20system%20for%20optimizing%20end-to-end%20at-scale%20neural%20recommendation%20inference" target="_blank" style="text-decoration: none; color: #3366cc;">DeepRecSys: a system for optimizing end-to-end at-scale neural recommendation inference</a> - <b>2.396199347</b> citations</li><li><a href="https://scholar.google.com/scholar?q=SnaPEA%3A%20predictive%20early%20activation%20for%20reducing%20computation%20in%20deep%20convolutional%20neural%20networks" target="_blank" style="text-decoration: none; color: #3366cc;">SnaPEA: predictive early activation for reducing computation in deep convolutional neural networks</a> - <b>2.365487985</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Full-stack%2C%20real-system%20quantum%20computer%20studies%3A%20architectural%20comparisons%20and%20design%20insights" target="_blank" style="text-decoration: none; color: #3366cc;">Full-stack, real-system quantum computer studies: architectural comparisons and design insights</a> - <b>2.361727836</b> citations</li><li><a href="https://scholar.google.com/scholar?q=BTS%3A%20an%20accelerator%20for%20bootstrappable%20fully%20homomorphic%20encryption" target="_blank" style="text-decoration: none; color: #3366cc;">BTS: an accelerator for bootstrappable fully homomorphic encryption</a> - <b>2.361727836</b> citations</li><li><a href="https://scholar.google.com/scholar?q=ELSA%3A%20hardware-software%20co-design%20for%20efficient%2C%20lightweight%20self-attention%20mechanism%20in%20neural%20networks" target="_blank" style="text-decoration: none; color: #3366cc;">ELSA: hardware-software co-design for efficient, lightweight self-attention mechanism in neural networks</a> - <b>2.357934847</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Energy%20efficient%20architecture%20for%20graph%20analytics%20accelerators" target="_blank" style="text-decoration: none; color: #3366cc;">Energy efficient architecture for graph analytics accelerators</a> - <b>2.356025857</b> citations</li><li><a href="https://scholar.google.com/scholar?q=New%20attacks%20and%20defense%20for%20encrypted-address%20cache" target="_blank" style="text-decoration: none; color: #3366cc;">New attacks and defense for encrypted-address cache</a> - <b>2.33243846</b> citations</li><li><a href="https://scholar.google.com/scholar?q=DjiNN%20and%20Tonic%3A%20DNN%20as%20a%20service%20and%20its%20implications%20for%20future%20warehouse%20scale%20computers" target="_blank" style="text-decoration: none; color: #3366cc;">DjiNN and Tonic: DNN as a service and its implications for future warehouse scale computers</a> - <b>2.328379603</b> citations</li><li><a href="https://scholar.google.com/scholar?q=EDDIE%3A%20EM-Based%20Detection%20of%20Deviations%20in%20Program%20Execution" target="_blank" style="text-decoration: none; color: #3366cc;">EDDIE: EM-Based Detection of Deviations in Program Execution</a> - <b>2.324282455</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Regaining%20Lost%20Cycles%20with%20HotCalls%3A%20A%20Fast%20Interface%20for%20SGX%20Secure%20Enclaves" target="_blank" style="text-decoration: none; color: #3366cc;">Regaining Lost Cycles with HotCalls: A Fast Interface for SGX Secure Enclaves</a> - <b>2.322219295</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Dynamo%3A%20facebook%27s%20data%20center-wide%20power%20management%20system" target="_blank" style="text-decoration: none; color: #3366cc;">Dynamo: facebook's data center-wide power management system</a> - <b>2.301029996</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Understanding%20and%20Optimizing%20Asynchronous%20Low-Precision%20Stochastic%20Gradient%20Descent" target="_blank" style="text-decoration: none; color: #3366cc;">Understanding and Optimizing Asynchronous Low-Precision Stochastic Gradient Descent</a> - <b>2.29666519</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Rumba%3A%20an%20online%20quality%20management%20system%20for%20approximate%20computing" target="_blank" style="text-decoration: none; color: #3366cc;">Rumba: an online quality management system for approximate computing</a> - <b>2.294466226</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Asymptotic%20improvements%20to%20quantum%20circuits%20via%20qutrits" target="_blank" style="text-decoration: none; color: #3366cc;">Asymptotic improvements to quantum circuits via qutrits</a> - <b>2.292256071</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Statistical%20assertions%20for%20validating%20patterns%20and%20finding%20bugs%20in%20quantum%20programs" target="_blank" style="text-decoration: none; color: #3366cc;">Statistical assertions for validating patterns and finding bugs in quantum programs</a> - <b>2.290034611</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Gist%3A%20efficient%20data%20encoding%20for%20deep%20neural%20network%20training" target="_blank" style="text-decoration: none; color: #3366cc;">Gist: efficient data encoding for deep neural network training</a> - <b>2.285557309</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Clank%3A%20Architectural%20Support%20for%20Intermittent%20Computation" target="_blank" style="text-decoration: none; color: #3366cc;">Clank: Architectural Support for Intermittent Computation</a> - <b>2.283301229</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Accelerating%20distributed%20reinforcement%20learning%20with%20in-switch%20computing" target="_blank" style="text-decoration: none; color: #3366cc;">Accelerating distributed reinforcement learning with in-switch computing</a> - <b>2.276461804</b> citations</li><li><a href="https://scholar.google.com/scholar?q=DSAGEN%3A%20synthesizing%20programmable%20spatial%20accelerators" target="_blank" style="text-decoration: none; color: #3366cc;">DSAGEN: synthesizing programmable spatial accelerators</a> - <b>2.274157849</b> citations</li><li><a href="https://scholar.google.com/scholar?q=CoSA%3A%20scheduling%20by%20constrained%20optimization%20for%20spatial%20accelerators" target="_blank" style="text-decoration: none; color: #3366cc;">CoSA: scheduling by constrained optimization for spatial accelerators</a> - <b>2.267171728</b> citations</li><li><a href="https://scholar.google.com/scholar?q=TWiCe%3A%20preventing%20row-hammering%20by%20exploiting%20time%20window%20counters" target="_blank" style="text-decoration: none; color: #3366cc;">TWiCe: preventing row-hammering by exploiting time window counters</a> - <b>2.252853031</b> citations</li><li><a href="https://scholar.google.com/scholar?q=HeteroOS%3A%20OS%20Design%20for%20Heterogeneous%20Memory%20Management%20in%20Datacenter" target="_blank" style="text-decoration: none; color: #3366cc;">HeteroOS: OS Design for Heterogeneous Memory Management in Datacenter</a> - <b>2.250420002</b> citations</li><li><a href="https://scholar.google.com/scholar?q=The%20Reach%20Profiler%20%28REAPER%29%3A%20Enabling%20the%20Mitigation%20of%20DRAM%20Retention%20Failures%20via%20Profiling%20at%20Aggressive%20Conditions" target="_blank" style="text-decoration: none; color: #3366cc;">The Reach Profiler (REAPER): Enabling the Mitigation of DRAM Retention Failures via Profiling at Aggressive Conditions</a> - <b>2.245512668</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Duality%20cache%20for%20data%20parallel%20acceleration" target="_blank" style="text-decoration: none; color: #3366cc;">Duality cache for data parallel acceleration</a> - <b>2.235528447</b> citations</li><li><a href="https://scholar.google.com/scholar?q=ASIC%20clouds%3A%20specializing%20the%20datacenter" target="_blank" style="text-decoration: none; color: #3366cc;">ASIC clouds: specializing the datacenter</a> - <b>2.227886705</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Secure%20Hierarchy-Aware%20Cache%20Replacement%20Policy%20%28SHARP%29%3A%20Defending%20Against%20Cache-Based%20Side%20Channel%20Atacks" target="_blank" style="text-decoration: none; color: #3366cc;">Secure Hierarchy-Aware Cache Replacement Policy (SHARP): Defending Against Cache-Based Side Channel Atacks</a> - <b>2.227886705</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Architecting%20to%20achieve%20a%20billion%20requests%20per%20second%20throughput%20on%20a%20single%20key-value%20store%20server%20platform" target="_blank" style="text-decoration: none; color: #3366cc;">Architecting to achieve a billion requests per second throughput on a single key-value store server platform</a> - <b>2.225309282</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Automatic%20generation%20of%20efficient%20accelerators%20for%20reconfigurable%20hardware" target="_blank" style="text-decoration: none; color: #3366cc;">Automatic generation of efficient accelerators for reconfigurable hardware</a> - <b>2.220108088</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Modular%20routing%20design%20for%20chiplet-based%20systems" target="_blank" style="text-decoration: none; color: #3366cc;">Modular routing design for chiplet-based systems</a> - <b>2.220108088</b> citations</li><li><a href="https://scholar.google.com/scholar?q=CoNDA%3A%20efficient%20cache%20coherence%20support%20for%20near-data%20accelerators" target="_blank" style="text-decoration: none; color: #3366cc;">CoNDA: efficient cache coherence support for near-data accelerators</a> - <b>2.214843848</b> citations</li><li><a href="https://scholar.google.com/scholar?q=The%20Mondrian%20Data%20Engine" target="_blank" style="text-decoration: none; color: #3366cc;">The Mondrian Data Engine</a> - <b>2.209515015</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Efficient%20invisible%20speculative%20execution%20through%20selective%20delay%20and%20value%20prediction" target="_blank" style="text-decoration: none; color: #3366cc;">Efficient invisible speculative execution through selective delay and value prediction</a> - <b>2.209515015</b> citations</li><li><a href="https://scholar.google.com/scholar?q=SoftSKU%3A%20optimizing%20server%20architectures%20for%20microservice%20diversity%20%40scale" target="_blank" style="text-decoration: none; color: #3366cc;">SoftSKU: optimizing server architectures for microservice diversity @scale</a> - <b>2.201397124</b> citations</li><li><a href="https://scholar.google.com/scholar?q=MGPUSim%3A%20enabling%20multi-GPU%20performance%20modeling%20and%20optimization" target="_blank" style="text-decoration: none; color: #3366cc;">MGPUSim: enabling multi-GPU performance modeling and optimization</a> - <b>2.198657087</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Centaur%3A%20a%20chiplet-based%2C%20hybrid%20sparse-dense%20accelerator%20for%20personalized%20recommendations" target="_blank" style="text-decoration: none; color: #3366cc;">Centaur: a chiplet-based, hybrid sparse-dense accelerator for personalized recommendations</a> - <b>2.198657087</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Warped-compression%3A%20enabling%20power%20efficient%20GPUs%20through%20register%20compression" target="_blank" style="text-decoration: none; color: #3366cc;">Warped-compression: enabling power efficient GPUs through register compression</a> - <b>2.193124598</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Warped-slicer%3A%20efficient%20intra-SM%20slicing%20through%20dynamic%20resource%20partitioning%20for%20GPU%20multiprogramming" target="_blank" style="text-decoration: none; color: #3366cc;">Warped-slicer: efficient intra-SM slicing through dynamic resource partitioning for GPU multiprogramming</a> - <b>2.190331698</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Laconic%20deep%20learning%20inference%20acceleration" target="_blank" style="text-decoration: none; color: #3366cc;">Laconic deep learning inference acceleration</a> - <b>2.187520721</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Semantic%20locality%20and%20context-based%20prefetching%20using%20reinforcement%20learning" target="_blank" style="text-decoration: none; color: #3366cc;">Semantic locality and context-based prefetching using reinforcement learning</a> - <b>2.170261715</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Think%20fast%3A%20a%20tensor%20streaming%20processor%20%28TSP%29%20for%20accelerating%20deep%20learning%20workloads" target="_blank" style="text-decoration: none; color: #3366cc;">Think fast: a tensor streaming processor (TSP) for accelerating deep learning workloads</a> - <b>2.167317335</b> citations</li><li><a href="https://scholar.google.com/scholar?q=A%20case%20for%20core-assisted%20bottleneck%20acceleration%20in%20GPUs%3A%20enabling%20flexible%20data%20compression%20with%20assist%20warps" target="_blank" style="text-decoration: none; color: #3366cc;">A case for core-assisted bottleneck acceleration in GPUs: enabling flexible data compression with assist warps</a> - <b>2.155336037</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Xuantie-910%3A%20a%20commercial%20multi-core%2012-stage%20pipeline%20out-of-order%2064-bit%20high%20performance%20RISC-V%20processor%20with%20vector%20extension" target="_blank" style="text-decoration: none; color: #3366cc;">Xuantie-910: a commercial multi-core 12-stage pipeline out-of-order 64-bit high performance RISC-V processor with vector extension</a> - <b>2.155336037</b> citations</li><li><a href="https://scholar.google.com/scholar?q=A%20fully%20associative%2C%20tagless%20DRAM%20cache" target="_blank" style="text-decoration: none; color: #3366cc;">A fully associative, tagless DRAM cache</a> - <b>2.152288344</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Accelerating%20dependent%20cache%20misses%20with%20an%20enhanced%20memory%20controller" target="_blank" style="text-decoration: none; color: #3366cc;">Accelerating dependent cache misses with an enhanced memory controller</a> - <b>2.152288344</b> citations</li><li><a href="https://scholar.google.com/scholar?q=GraFboost%3A%20using%20accelerated%20flash%20storage%20for%20external%20graph%20analytics" target="_blank" style="text-decoration: none; color: #3366cc;">GraFboost: using accelerated flash storage for external graph analytics</a> - <b>2.152288344</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Perceptron-based%20prefetch%20filtering" target="_blank" style="text-decoration: none; color: #3366cc;">Perceptron-based prefetch filtering</a> - <b>2.149219113</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Flexible%20software%20profiling%20of%20GPU%20architectures" target="_blank" style="text-decoration: none; color: #3366cc;">Flexible software profiling of GPU architectures</a> - <b>2.146128036</b> citations</li><li><a href="https://scholar.google.com/scholar?q=A%20multi-neural%20network%20acceleration%20architecture" target="_blank" style="text-decoration: none; color: #3366cc;">A multi-neural network acceleration architecture</a> - <b>2.146128036</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Flexible%20auto-refresh%3A%20enabling%20scalable%20and%20energy-efficient%20DRAM%20refresh%20reductions" target="_blank" style="text-decoration: none; color: #3366cc;">Flexible auto-refresh: enabling scalable and energy-efficient DRAM refresh reductions</a> - <b>2.1430148</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Quantitative%20comparison%20of%20hardware%20transactional%20memory%20for%20Blue%20Gene/Q%2C%20zEnterprise%20EC12%2C%20Intel%20Core%2C%20and%20POWER8" target="_blank" style="text-decoration: none; color: #3366cc;">Quantitative comparison of hardware transactional memory for Blue Gene/Q, zEnterprise EC12, Intel Core, and POWER8</a> - <b>2.139879086</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Bit-plane%20compression%3A%20transforming%20data%20for%20better%20compression%20in%20many-core%20architectures" target="_blank" style="text-decoration: none; color: #3366cc;">Bit-plane compression: transforming data for better compression in many-core architectures</a> - <b>2.139879086</b> citations</li><li><a href="https://scholar.google.com/scholar?q=SpinalFlow%3A%20an%20architecture%20and%20dataflow%20tailored%20for%20spiking%20neural%20networks" target="_blank" style="text-decoration: none; color: #3366cc;">SpinalFlow: an architecture and dataflow tailored for spiking neural networks</a> - <b>2.136720567</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Sparsity-aware%20and%20re-configurable%20NPU%20architecture%20for%20samsung%20flagship%20mobile%20SoC" target="_blank" style="text-decoration: none; color: #3366cc;">Sparsity-aware and re-configurable NPU architecture for samsung flagship mobile SoC</a> - <b>2.136720567</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Language-level%20persistency" target="_blank" style="text-decoration: none; color: #3366cc;">Language-level persistency</a> - <b>2.133538908</b> citations</li><li><a href="https://scholar.google.com/scholar?q=PACMAN%3A%20attacking%20ARM%20pointer%20authentication%20with%20speculative%20execution" target="_blank" style="text-decoration: none; color: #3366cc;">PACMAN: attacking ARM pointer authentication with speculative execution</a> - <b>2.123851641</b> citations</li><li><a href="https://scholar.google.com/scholar?q=CAWA%3A%20coordinated%20warp%20scheduling%20and%20cache%20prioritization%20for%20critical%20warp%20acceleration%20of%20GPGPU%20workloads" target="_blank" style="text-decoration: none; color: #3366cc;">CAWA: coordinated warp scheduling and cache prioritization for critical warp acceleration of GPGPU workloads</a> - <b>2.117271296</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Genax%3A%20a%20genome%20sequencing%20accelerator" target="_blank" style="text-decoration: none; color: #3366cc;">Genax: a genome sequencing accelerator</a> - <b>2.117271296</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Enabling%20scientific%20computing%20on%20memristive%20accelerators" target="_blank" style="text-decoration: none; color: #3366cc;">Enabling scientific computing on memristive accelerators</a> - <b>2.113943352</b> citations</li><li><a href="https://scholar.google.com/scholar?q=AsmDB%3A%20understanding%20and%20mitigating%20front-end%20stalls%20in%20warehouse-scale%20computers" target="_blank" style="text-decoration: none; color: #3366cc;">AsmDB: understanding and mitigating front-end stalls in warehouse-scale computers</a> - <b>2.103803721</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Treadmill%3A%20attributing%20the%20source%20of%20tail%20latency%20through%20precise%20load%20testing%20and%20statistical%20inference" target="_blank" style="text-decoration: none; color: #3366cc;">Treadmill: attributing the source of tail latency through precise load testing and statistical inference</a> - <b>2.100370545</b> citations</li><li><a href="https://scholar.google.com/scholar?q=MuonTrap%3A%20preventing%20cross-domain%20spectre-like%20attacks%20by%20capturing%20speculative%20state" target="_blank" style="text-decoration: none; color: #3366cc;">MuonTrap: preventing cross-domain spectre-like attacks by capturing speculative state</a> - <b>2.100370545</b> citations</li><li><a href="https://scholar.google.com/scholar?q=DRQ%3A%20dynamic%20region-based%20quantization%20for%20deep%20neural%20network%20acceleration" target="_blank" style="text-decoration: none; color: #3366cc;">DRQ: dynamic region-based quantization for deep neural network acceleration</a> - <b>2.100370545</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Euphrates%3A%20algorithm-SoC%20co-design%20for%20low-power%20mobile%20continuous%20vision" target="_blank" style="text-decoration: none; color: #3366cc;">Euphrates: algorithm-SoC co-design for low-power mobile continuous vision</a> - <b>2.096910013</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Mitigating%20wordline%20crosstalk%20using%20adaptive%20trees%20of%20counters" target="_blank" style="text-decoration: none; color: #3366cc;">Mitigating wordline crosstalk using adaptive trees of counters</a> - <b>2.096910013</b> citations</li><li><a href="https://scholar.google.com/scholar?q=GANAX%3A%20a%20unified%20MIMD-SIMD%20acceleration%20for%20generative%20adversarial%20networks" target="_blank" style="text-decoration: none; color: #3366cc;">GANAX: a unified MIMD-SIMD acceleration for generative adversarial networks</a> - <b>2.096910013</b> citations</li><li><a href="https://scholar.google.com/scholar?q=FLIN%3A%20enabling%20fairness%20and%20enhancing%20performance%20in%20modern%20NVMe%20solid%20state%20drives" target="_blank" style="text-decoration: none; color: #3366cc;">FLIN: enabling fairness and enhancing performance in modern NVMe solid state drives</a> - <b>2.089905111</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Dual-side%20sparse%20tensor%20core" target="_blank" style="text-decoration: none; color: #3366cc;">Dual-side sparse tensor core</a> - <b>2.089905111</b> citations</li><li><a href="https://scholar.google.com/scholar?q=CROW%3A%20a%20low-cost%20substrate%20for%20improving%20DRAM%20performance%2C%20energy%20efficiency%2C%20and%20reliability" target="_blank" style="text-decoration: none; color: #3366cc;">CROW: a low-cost substrate for improving DRAM performance, energy efficiency, and reliability</a> - <b>2.079181246</b> citations</li><li><a href="https://scholar.google.com/scholar?q=MicroScope%3A%20enabling%20microarchitectural%20replay%20attacks" target="_blank" style="text-decoration: none; color: #3366cc;">MicroScope: enabling microarchitectural replay attacks</a> - <b>2.079181246</b> citations</li><li><a href="https://scholar.google.com/scholar?q=RaPiD%3A%20AI%20accelerator%20for%20ultra-low%20precision%20training%20and%20inference" target="_blank" style="text-decoration: none; color: #3366cc;">RaPiD: AI accelerator for ultra-low precision training and inference</a> - <b>2.079181246</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Hybrid%20TLB%20Coalescing%3A%20Improving%20TLB%20Translation%20Coverage%20under%20Diverse%20Fragmented%20Memory%20Allocations" target="_blank" style="text-decoration: none; color: #3366cc;">Hybrid TLB Coalescing: Improving TLB Translation Coverage under Diverse Fragmented Memory Allocations</a> - <b>2.075546961</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Computation%20reuse%20in%20DNNs%20by%20exploiting%20input%20similarity" target="_blank" style="text-decoration: none; color: #3366cc;">Computation reuse in DNNs by exploiting input similarity</a> - <b>2.075546961</b> citations</li><li><a href="https://scholar.google.com/scholar?q=EVA2%3A%20exploiting%20temporal%20redundancy%20in%20live%20computer%20vision" target="_blank" style="text-decoration: none; color: #3366cc;">EVA2: exploiting temporal redundancy in live computer vision</a> - <b>2.075546961</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Bouquet%20of%20instruction%20pointers%3A%20instruction%20pointer%20classifier-based%20spatial%20hardware%20prefetching" target="_blank" style="text-decoration: none; color: #3366cc;">Bouquet of instruction pointers: instruction pointer classifier-based spatial hardware prefetching</a> - <b>2.06069784</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Snafu%3A%20an%20ultra-low-power%2C%20energy-minimal%20CGRA-generation%20framework%20and%20architecture" target="_blank" style="text-decoration: none; color: #3366cc;">Snafu: an ultra-low-power, energy-minimal CGRA-generation framework and architecture</a> - <b>2.06069784</b> citations</li><li><a href="https://scholar.google.com/scholar?q=ObfusMem%3A%20A%20Low-Overhead%20Access%20Obfuscation%20for%20Trusted%20Memories" target="_blank" style="text-decoration: none; color: #3366cc;">ObfusMem: A Low-Overhead Access Obfuscation for Trusted Memories</a> - <b>2.056904851</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Architecting%20noisy%20intermediate-scale%20trapped%20ion%20quantum%20computers" target="_blank" style="text-decoration: none; color: #3366cc;">Architecting noisy intermediate-scale trapped ion quantum computers</a> - <b>2.056904851</b> citations</li><li><a href="https://scholar.google.com/scholar?q=iPIM%3A%20programmable%20in-memory%20image%20processing%20accelerator%20using%20near-bank%20architecture" target="_blank" style="text-decoration: none; color: #3366cc;">iPIM: programmable in-memory image processing accelerator using near-bank architecture</a> - <b>2.056904851</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Interplay%20between%20hardware%20prefetcher%20and%20page%20eviction%20policy%20in%20CPU-GPU%20unified%20virtual%20memory" target="_blank" style="text-decoration: none; color: #3366cc;">Interplay between hardware prefetcher and page eviction policy in CPU-GPU unified virtual memory</a> - <b>2.049218023</b> citations</li><li><a href="https://scholar.google.com/scholar?q=BEAR%3A%20techniques%20for%20mitigating%20bandwidth%20bloat%20in%20gigascale%20DRAM%20caches" target="_blank" style="text-decoration: none; color: #3366cc;">BEAR: techniques for mitigating bandwidth bloat in gigascale DRAM caches</a> - <b>2.041392685</b> citations</li><li><a href="https://scholar.google.com/scholar?q=PipeZK%3A%20accelerating%20zero-knowledge%20proof%20with%20a%20pipelined%20architecture" target="_blank" style="text-decoration: none; color: #3366cc;">PipeZK: accelerating zero-knowledge proof with a pipelined architecture</a> - <b>2.041392685</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Hi-fi%20playback%3A%20tolerating%20position%20errors%20in%20shift%20operations%20of%20racetrack%20memory" target="_blank" style="text-decoration: none; color: #3366cc;">Hi-fi playback: tolerating position errors in shift operations of racetrack memory</a> - <b>2.037426498</b> citations</li><li><a href="https://scholar.google.com/scholar?q=XED%3A%20exposing%20on-die%20error%20detection%20information%20for%20strong%20memory%20reliability" target="_blank" style="text-decoration: none; color: #3366cc;">XED: exposing on-die error detection information for strong memory reliability</a> - <b>2.033423755</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Mellow%20writes%3A%20extending%20lifetime%20in%20resistive%20memories%20through%20selective%20slow%20write%20backs" target="_blank" style="text-decoration: none; color: #3366cc;">Mellow writes: extending lifetime in resistive memories through selective slow write backs</a> - <b>2.033423755</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Rethinking%20TLB%20Designs%20in%20Virtualized%20Environments%3A%20A%20Very%20Large%20Part-of-Memory%20TLB" target="_blank" style="text-decoration: none; color: #3366cc;">Rethinking TLB Designs in Virtualized Environments: A Very Large Part-of-Memory TLB</a> - <b>2.033423755</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Stash%3A%20have%20your%20scratchpad%20and%20cache%20it%20too" target="_blank" style="text-decoration: none; color: #3366cc;">Stash: have your scratchpad and cache it too</a> - <b>2.029383778</b> citations</li><li><a href="https://scholar.google.com/scholar?q=TIMELY%3A%20pushing%20data%20movements%20and%20interfaces%20in%20PIM%20accelerators%20towards%20local%20and%20in%20time%20domain" target="_blank" style="text-decoration: none; color: #3366cc;">TIMELY: pushing data movements and interfaces in PIM accelerators towards local and in time domain</a> - <b>2.025305865</b> citations</li><li><a href="https://scholar.google.com/scholar?q=InvisiMem%3A%20Smart%20Memory%20Defenses%20for%20Memory%20Bus%20Side%20Channel" target="_blank" style="text-decoration: none; color: #3366cc;">InvisiMem: Smart Memory Defenses for Memory Bus Side Channel</a> - <b>2.017033339</b> citations</li><li><a href="https://scholar.google.com/scholar?q=I%20see%20dead%20%CE%BCops%3A%20leaking%20secrets%20via%20Intel/AMD%20micro-op%20caches" target="_blank" style="text-decoration: none; color: #3366cc;">I see dead μops: leaking secrets via Intel/AMD micro-op caches</a> - <b>2.017033339</b> citations</li><li><a href="https://scholar.google.com/scholar?q=APPROX-NoC%3A%20A%20Data%20Approximation%20Framework%20for%20Network-On-Chip%20Architectures" target="_blank" style="text-decoration: none; color: #3366cc;">APPROX-NoC: A Data Approximation Framework for Network-On-Chip Architectures</a> - <b>2.012837225</b> citations</li><li><a href="https://scholar.google.com/scholar?q=GraphSSD%3A%20graph%20semantics%20aware%20SSD" target="_blank" style="text-decoration: none; color: #3366cc;">GraphSSD: graph semantics aware SSD</a> - <b>2.012837225</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Evolution%20of%20the%20samsung%20exynos%20CPU%20microarchitecture" target="_blank" style="text-decoration: none; color: #3366cc;">Evolution of the samsung exynos CPU microarchitecture</a> - <b>2.004321374</b> citations</li><li><a href="https://scholar.google.com/scholar?q=FORMS%3A%20fine-grained%20polarized%20ReRAM-based%20in-situ%20computation%20for%20mixed-signal%20DNN%20accelerator" target="_blank" style="text-decoration: none; color: #3366cc;">FORMS: fine-grained polarized ReRAM-based in-situ computation for mixed-signal DNN accelerator</a> - <b>2.004321374</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Agile%20paging%3A%20exceeding%20the%20best%20of%20nested%20and%20shadow%20paging" target="_blank" style="text-decoration: none; color: #3366cc;">Agile paging: exceeding the best of nested and shadow paging</a> - <b>2.0</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Access%20Pattern-Aware%20Cache%20Management%20for%20Improving%20Data%20Utilization%20in%20GPU" target="_blank" style="text-decoration: none; color: #3366cc;">Access Pattern-Aware Cache Management for Improving Data Utilization in GPU</a> - <b>2.0</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Prediction%20based%20execution%20on%20deep%20neural%20networks" target="_blank" style="text-decoration: none; color: #3366cc;">Prediction based execution on deep neural networks</a> - <b>2.0</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Hydra%3A%20enabling%20low-overhead%20mitigation%20of%20row-hammer%20at%20ultra-low%20thresholds%20via%20hybrid%20tracking" target="_blank" style="text-decoration: none; color: #3366cc;">Hydra: enabling low-overhead mitigation of row-hammer at ultra-low thresholds via hybrid tracking</a> - <b>2.0</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Triad-NVM%3A%20persistency%20for%20integrity-protected%20and%20encrypted%20non-volatile%20memories" target="_blank" style="text-decoration: none; color: #3366cc;">Triad-NVM: persistency for integrity-protected and encrypted non-volatile memories</a> - <b>1.995635195</b> citations</li><li><a href="https://scholar.google.com/scholar?q=GoSPA%3A%20an%20energy-efficient%20high-performance%20globally%20optimized%20sparse%20convolutional%20neural%20network%20accelerator" target="_blank" style="text-decoration: none; color: #3366cc;">GoSPA: an energy-efficient high-performance globally optimized sparse convolutional neural network accelerator</a> - <b>1.995635195</b> citations</li><li><a href="https://scholar.google.com/scholar?q=MnnFast%3A%20a%20fast%20and%20scalable%20system%20architecture%20for%20memory-augmented%20neural%20networks" target="_blank" style="text-decoration: none; color: #3366cc;">MnnFast: a fast and scalable system architecture for memory-augmented neural networks</a> - <b>1.986771734</b> citations</li><li><a href="https://scholar.google.com/scholar?q=TIE%3A%20energy-efficient%20tensor%20train-based%20inference%20engine%20for%20deep%20neural%20network" target="_blank" style="text-decoration: none; color: #3366cc;">TIE: energy-efficient tensor train-based inference engine for deep neural network</a> - <b>1.986771734</b> citations</li><li><a href="https://scholar.google.com/scholar?q=TENET%3A%20a%20framework%20for%20modeling%20tensor%20dataflow%20based%20on%20relation-centric%20notation" target="_blank" style="text-decoration: none; color: #3366cc;">TENET: a framework for modeling tensor dataflow based on relation-centric notation</a> - <b>1.986771734</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Morpheus%3A%20creating%20application%20objects%20efficiently%20for%20heterogeneous%20computing" target="_blank" style="text-decoration: none; color: #3366cc;">Morpheus: creating application objects efficiently for heterogeneous computing</a> - <b>1.977723605</b> citations</li><li><a href="https://scholar.google.com/scholar?q=HASCO%3A%20towards%20agile%20hardware%20and%20software%20co-design%20for%20tensor%20computation" target="_blank" style="text-decoration: none; color: #3366cc;">HASCO: towards agile hardware and software co-design for tensor computation</a> - <b>1.977723605</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Exploring%20the%20potential%20of%20heterogeneous%20von%20neumann/dataflow%20execution%20models" target="_blank" style="text-decoration: none; color: #3366cc;">Exploring the potential of heterogeneous von neumann/dataflow execution models</a> - <b>1.973127854</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Unified%20address%20translation%20for%20memory-mapped%20SSDs%20with%20FlashMap" target="_blank" style="text-decoration: none; color: #3366cc;">Unified address translation for memory-mapped SSDs with FlashMap</a> - <b>1.973127854</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Anubis%3A%20ultra-low%20overhead%20and%20recovery%20time%20for%20secure%20non-volatile%20memories" target="_blank" style="text-decoration: none; color: #3366cc;">Anubis: ultra-low overhead and recovery time for secure non-volatile memories</a> - <b>1.973127854</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Eager%20pruning%3A%20algorithm%20and%20architecture%20support%20for%20fast%20training%20of%20deep%20neural%20networks" target="_blank" style="text-decoration: none; color: #3366cc;">Eager pruning: algorithm and architecture support for fast training of deep neural networks</a> - <b>1.968482949</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Jenga%3A%20Software-Defined%20Cache%20Hierarchies" target="_blank" style="text-decoration: none; color: #3366cc;">Jenga: Software-Defined Cache Hierarchies</a> - <b>1.963787827</b> citations</li><li><a href="https://scholar.google.com/scholar?q=RANA%3A%20towards%20efficient%20neural%20acceleration%20with%20refresh-optimized%20embedded%20DRAM" target="_blank" style="text-decoration: none; color: #3366cc;">RANA: towards efficient neural acceleration with refresh-optimized embedded DRAM</a> - <b>1.963787827</b> citations</li><li><a href="https://scholar.google.com/scholar?q=NN-baton%3A%20DNN%20workload%20orchestration%20and%20chiplet%20granularity%20exploration%20for%20multichip%20accelerators" target="_blank" style="text-decoration: none; color: #3366cc;">NN-baton: DNN workload orchestration and chiplet granularity exploration for multichip accelerators</a> - <b>1.963787827</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Harmonia%3A%20balancing%20compute%20and%20memory%20power%20in%20high-performance%20GPUs" target="_blank" style="text-decoration: none; color: #3366cc;">Harmonia: balancing compute and memory power in high-performance GPUs</a> - <b>1.959041392</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Dynamic%20thread%20block%20launch%3A%20a%20lightweight%20execution%20mechanism%20to%20support%20irregular%20applications%20on%20GPUs" target="_blank" style="text-decoration: none; color: #3366cc;">Dynamic thread block launch: a lightweight execution mechanism to support irregular applications on GPUs</a> - <b>1.959041392</b> citations</li><li><a href="https://scholar.google.com/scholar?q=DeepAttest%3A%20an%20end-to-end%20attestation%20framework%20for%20deep%20neural%20networks" target="_blank" style="text-decoration: none; color: #3366cc;">DeepAttest: an end-to-end attestation framework for deep neural networks</a> - <b>1.959041392</b> citations</li><li><a href="https://scholar.google.com/scholar?q=QUAC-TRNG%3A%20high-throughput%20true%20random%20number%20generation%20using%20quadruple%20row%20activation%20in%20commodity%20DRAM%20chips" target="_blank" style="text-decoration: none; color: #3366cc;">QUAC-TRNG: high-throughput true random number generation using quadruple row activation in commodity DRAM chips</a> - <b>1.959041392</b> citations</li><li><a href="https://scholar.google.com/scholar?q=GaaS-X%3A%20graph%20analytics%20accelerator%20supporting%20sparse%20data%20representation%20using%20crossbar%20architectures" target="_blank" style="text-decoration: none; color: #3366cc;">GaaS-X: graph analytics accelerator supporting sparse data representation using crossbar architectures</a> - <b>1.954242509</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Translation%20ranger%3A%20operating%20system%20support%20for%20contiguity-aware%20TLBs" target="_blank" style="text-decoration: none; color: #3366cc;">Translation ranger: operating system support for contiguity-aware TLBs</a> - <b>1.949390007</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Do-It-Yourself%20Virtual%20Memory%20Translation" target="_blank" style="text-decoration: none; color: #3366cc;">Do-It-Yourself Virtual Memory Translation</a> - <b>1.944482672</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Scheduling%20page%20table%20walks%20for%20irregular%20GPU%20applications" target="_blank" style="text-decoration: none; color: #3366cc;">Scheduling page table walks for irregular GPU applications</a> - <b>1.944482672</b> citations</li><li><a href="https://scholar.google.com/scholar?q=PolyGraph%3A%20exposing%20the%20value%20of%20flexibility%20for%20graph%20processing%20accelerators" target="_blank" style="text-decoration: none; color: #3366cc;">PolyGraph: exposing the value of flexibility for graph processing accelerators</a> - <b>1.944482672</b> citations</li><li><a href="https://scholar.google.com/scholar?q=The%20load%20slice%20core%20microarchitecture" target="_blank" style="text-decoration: none; color: #3366cc;">The load slice core microarchitecture</a> - <b>1.939519253</b> citations</li><li><a href="https://scholar.google.com/scholar?q=FASE%3A%20finding%20amplitude-modulated%20side-channel%20emanations" target="_blank" style="text-decoration: none; color: #3366cc;">FASE: finding amplitude-modulated side-channel emanations</a> - <b>1.939519253</b> citations</li><li><a href="https://scholar.google.com/scholar?q=The%20locality%20descriptor%3A%20a%20holistic%20cross-layer%20abstraction%20to%20express%20data%20locality%20in%20GPUs" target="_blank" style="text-decoration: none; color: #3366cc;">The locality descriptor: a holistic cross-layer abstraction to express data locality in GPUs</a> - <b>1.939519253</b> citations</li><li><a href="https://scholar.google.com/scholar?q=BioHD%3A%20an%20efficient%20genome%20sequence%20search%20platform%20using%20HyperDimensional%20memorization" target="_blank" style="text-decoration: none; color: #3366cc;">BioHD: an efficient genome sequence search platform using HyperDimensional memorization</a> - <b>1.939519253</b> citations</li><li><a href="https://scholar.google.com/scholar?q=An%20in-network%20architecture%20for%20accelerating%20shared-memory%20multiprocessor%20collectives" target="_blank" style="text-decoration: none; color: #3366cc;">An in-network architecture for accelerating shared-memory multiprocessor collectives</a> - <b>1.929418926</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Demystifying%20the%20system%20vulnerability%20stack%3A%20transient%20fault%20effects%20across%20the%20layers" target="_blank" style="text-decoration: none; color: #3366cc;">Demystifying the system vulnerability stack: transient fault effects across the layers</a> - <b>1.919078092</b> citations</li><li><a href="https://scholar.google.com/scholar?q=DHTM%3A%20durable%20hardware%20transactional%20memory" target="_blank" style="text-decoration: none; color: #3366cc;">DHTM: durable hardware transactional memory</a> - <b>1.913813852</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Energy-efficient%20video%20processing%20for%20virtual%20reality" target="_blank" style="text-decoration: none; color: #3366cc;">Energy-efficient video processing for virtual reality</a> - <b>1.913813852</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Speculative%20data-oblivious%20execution%3A%20mobilizing%20safe%20prediction%20for%20safe%20and%20efficient%20speculative%20execution" target="_blank" style="text-decoration: none; color: #3366cc;">Speculative data-oblivious execution: mobilizing safe prediction for safe and efficient speculative execution</a> - <b>1.913813852</b> citations</li><li><a href="https://scholar.google.com/scholar?q=ARM%20virtualization%3A%20performance%20and%20architectural%20implications" target="_blank" style="text-decoration: none; color: #3366cc;">ARM virtualization: performance and architectural implications</a> - <b>1.908485019</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Density%20tradeoffs%20of%20non-volatile%20memory%20as%20a%20replacement%20for%20SRAM%20based%20last%20level%20cache" target="_blank" style="text-decoration: none; color: #3366cc;">Density tradeoffs of non-volatile memory as a replacement for SRAM based last level cache</a> - <b>1.908485019</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Janus%3A%20optimizing%20memory%20and%20storage%20support%20for%20non-volatile%20memory%20systems" target="_blank" style="text-decoration: none; color: #3366cc;">Janus: optimizing memory and storage support for non-volatile memory systems</a> - <b>1.903089987</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Using%20multiple%20input%2C%20multiple%20output%20formal%20control%20to%20maximize%20resource%20efficiency%20in%20architectures" target="_blank" style="text-decoration: none; color: #3366cc;">Using multiple input, multiple output formal control to maximize resource efficiency in architectures</a> - <b>1.897627091</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Efficient%20metadata%20management%20for%20irregular%20data%20prefetching" target="_blank" style="text-decoration: none; color: #3366cc;">Efficient metadata management for irregular data prefetching</a> - <b>1.897627091</b> citations</li><li><a href="https://scholar.google.com/scholar?q=PROMISE%3A%20an%20end-to-end%20design%20of%20a%20programmable%20mixed-signal%20accelerator%20for%20machine-learning%20algorithms" target="_blank" style="text-decoration: none; color: #3366cc;">PROMISE: an end-to-end design of a programmable mixed-signal accelerator for machine-learning algorithms</a> - <b>1.892094603</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Cost-efficient%20overclocking%20in%20immersion-cooled%20datacenters" target="_blank" style="text-decoration: none; color: #3366cc;">Cost-efficient overclocking in immersion-cooled datacenters</a> - <b>1.892094603</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Towards%20sustainable%20in-situ%20server%20systems%20in%20the%20big%20data%20era" target="_blank" style="text-decoration: none; color: #3366cc;">Towards sustainable in-situ server systems in the big data era</a> - <b>1.886490725</b> citations</li><li><a href="https://scholar.google.com/scholar?q=HEB%3A%20deploying%20and%20managing%20hybrid%20energy%20buffers%20for%20improving%20datacenter%20efficiency%20and%20economy" target="_blank" style="text-decoration: none; color: #3366cc;">HEB: deploying and managing hybrid energy buffers for improving datacenter efficiency and economy</a> - <b>1.886490725</b> citations</li><li><a href="https://scholar.google.com/scholar?q=CLR-DRAM%3A%20a%20low-cost%20DRAM%20architecture%20enabling%20dynamic%20capacity-latency%20trade-off" target="_blank" style="text-decoration: none; color: #3366cc;">CLR-DRAM: a low-cost DRAM architecture enabling dynamic capacity-latency trade-off</a> - <b>1.886490725</b> citations</li><li><a href="https://scholar.google.com/scholar?q=2QAN%3A%20a%20quantum%20compiler%20for%202-local%20qubit%20hamiltonian%20simulation%20algorithms" target="_blank" style="text-decoration: none; color: #3366cc;">2QAN: a quantum compiler for 2-local qubit hamiltonian simulation algorithms</a> - <b>1.886490725</b> citations</li><li><a href="https://scholar.google.com/scholar?q=LaPerm%3A%20locality%20aware%20scheduler%20for%20dynamic%20parallelism%20on%20GPUs" target="_blank" style="text-decoration: none; color: #3366cc;">LaPerm: locality aware scheduler for dynamic parallelism on GPUs</a> - <b>1.880813592</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Virtual%20thread%3A%20maximizing%20thread-level%20parallelism%20beyond%20GPU%20scheduling%20limit" target="_blank" style="text-decoration: none; color: #3366cc;">Virtual thread: maximizing thread-level parallelism beyond GPU scheduling limit</a> - <b>1.880813592</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Guaranteeing%20local%20differential%20privacy%20on%20ultra-low-power%20systems" target="_blank" style="text-decoration: none; color: #3366cc;">Guaranteeing local differential privacy on ultra-low-power systems</a> - <b>1.880813592</b> citations</li><li><a href="https://scholar.google.com/scholar?q=A%20quantum%20computational%20compiler%20and%20design%20tool%20for%20technology-specific%20targets" target="_blank" style="text-decoration: none; color: #3366cc;">A quantum computational compiler and design tool for technology-specific targets</a> - <b>1.880813592</b> citations</li><li><a href="https://scholar.google.com/scholar?q=SARA%3A%20scaling%20a%20reconfigurable%20dataflow%20accelerator" target="_blank" style="text-decoration: none; color: #3366cc;">SARA: scaling a reconfigurable dataflow accelerator</a> - <b>1.880813592</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Rethinking%20belady%27s%20algorithm%20to%20accommodate%20prefetching" target="_blank" style="text-decoration: none; color: #3366cc;">Rethinking belady's algorithm to accommodate prefetching</a> - <b>1.875061263</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Hiding%20the%20Long%20Latency%20of%20Persist%20Barriers%20Using%20Speculative%20Execution" target="_blank" style="text-decoration: none; color: #3366cc;">Hiding the Long Latency of Persist Barriers Using Speculative Execution</a> - <b>1.86923172</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Quality%20of%20Service%20Support%20for%20Fine-Grained%20Sharing%20on%20GPUs" target="_blank" style="text-decoration: none; color: #3366cc;">Quality of Service Support for Fine-Grained Sharing on GPUs</a> - <b>1.86923172</b> citations</li><li><a href="https://scholar.google.com/scholar?q=EQC%3A%20ensembled%20quantum%20computing%20for%20variational%20quantum%20algorithms" target="_blank" style="text-decoration: none; color: #3366cc;">EQC: ensembled quantum computing for variational quantum algorithms</a> - <b>1.86923172</b> citations</li><li><a href="https://scholar.google.com/scholar?q=NEBULA%3A%20a%20neuromorphic%20spin-based%20ultra-low%20power%20architecture%20for%20SNNs%20and%20ANNs" target="_blank" style="text-decoration: none; color: #3366cc;">NEBULA: a neuromorphic spin-based ultra-low power architecture for SNNs and ANNs</a> - <b>1.86332286</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Multiple%20clone%20row%20DRAM%3A%20a%20low%20latency%20and%20area%20optimized%20DRAM" target="_blank" style="text-decoration: none; color: #3366cc;">Multiple clone row DRAM: a low latency and area optimized DRAM</a> - <b>1.857332496</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Towards%20statistical%20guarantees%20in%20controlling%20quality%20tradeoffs%20for%20approximate%20acceleration" target="_blank" style="text-decoration: none; color: #3366cc;">Towards statistical guarantees in controlling quality tradeoffs for approximate acceleration</a> - <b>1.857332496</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Generative%20and%20multi-phase%20learning%20for%20computer%20systems%20optimization" target="_blank" style="text-decoration: none; color: #3366cc;">Generative and multi-phase learning for computer systems optimization</a> - <b>1.857332496</b> citations</li><li><a href="https://scholar.google.com/scholar?q=uGEMM%3A%20unary%20computing%20architecture%20for%20GEMM%20applications" target="_blank" style="text-decoration: none; color: #3366cc;">uGEMM: unary computing architecture for GEMM applications</a> - <b>1.857332496</b> citations</li><li><a href="https://scholar.google.com/scholar?q=DIMMining%3A%20pruning-efficient%20and%20parallel%20graph%20mining%20on%20near-memory-computing" target="_blank" style="text-decoration: none; color: #3366cc;">DIMMining: pruning-efficient and parallel graph mining on near-memory-computing</a> - <b>1.857332496</b> citations</li><li><a href="https://scholar.google.com/scholar?q=MeRLiN%3A%20Exploiting%20Dynamic%20Instruction%20Behavior%20for%20Fast%20and%20Accurate%20Microarchitecture%20Level%20Reliability%20Assessment" target="_blank" style="text-decoration: none; color: #3366cc;">MeRLiN: Exploiting Dynamic Instruction Behavior for Fast and Accurate Microarchitecture Level Reliability Assessment</a> - <b>1.851258349</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Spandex%3A%20a%20flexible%20interface%20for%20efficient%20heterogeneous%20coherence" target="_blank" style="text-decoration: none; color: #3366cc;">Spandex: a flexible interface for efficient heterogeneous coherence</a> - <b>1.851258349</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Buddy%20compression%3A%20enabling%20larger%20memory%20for%20deep%20learning%20and%20HPC%20workloads%20on%20GPUs" target="_blank" style="text-decoration: none; color: #3366cc;">Buddy compression: enabling larger memory for deep learning and HPC workloads on GPUs</a> - <b>1.851258349</b> citations</li><li><a href="https://scholar.google.com/scholar?q=MITTS%3A%20memory%20inter-arrival%20time%20traffic%20shaping" target="_blank" style="text-decoration: none; color: #3366cc;">MITTS: memory inter-arrival time traffic shaping</a> - <b>1.838849091</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Hardware%20Translation%20Coherence%20for%20Virtualized%20Systems" target="_blank" style="text-decoration: none; color: #3366cc;">Hardware Translation Coherence for Virtualized Systems</a> - <b>1.838849091</b> citations</li><li><a href="https://scholar.google.com/scholar?q=EbDa%3A%20A%20New%20Theory%20on%20Design%20and%20Verification%20of%20Deadlock-free%20Interconnection%20Networks" target="_blank" style="text-decoration: none; color: #3366cc;">EbDa: A New Theory on Design and Verification of Deadlock-free Interconnection Networks</a> - <b>1.838849091</b> citations</li><li><a href="https://scholar.google.com/scholar?q=SeGraM%3A%20a%20universal%20hardware%20accelerator%20for%20genomic%20sequence-to-graph%20and%20sequence-to-sequence%20mapping" target="_blank" style="text-decoration: none; color: #3366cc;">SeGraM: a universal hardware accelerator for genomic sequence-to-graph and sequence-to-sequence mapping</a> - <b>1.838849091</b> citations</li><li><a href="https://scholar.google.com/scholar?q=A%20software-defined%20tensor%20streaming%20multiprocessor%20for%20large-scale%20machine%20learning" target="_blank" style="text-decoration: none; color: #3366cc;">A software-defined tensor streaming multiprocessor for large-scale machine learning</a> - <b>1.832508913</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Page%20overlays%3A%20an%20enhanced%20virtual%20memory%20framework%20to%20enable%20fine-grained%20memory%20management" target="_blank" style="text-decoration: none; color: #3366cc;">Page overlays: an enhanced virtual memory framework to enable fine-grained memory management</a> - <b>1.826074803</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Power%20attack%20defense%3A%20securing%20battery-backed%20data%20centers" target="_blank" style="text-decoration: none; color: #3366cc;">Power attack defense: securing battery-backed data centers</a> - <b>1.826074803</b> citations</li><li><a href="https://scholar.google.com/scholar?q=To%20PIM%20or%20not%20for%20emerging%20general%20purpose%20processing%20in%20DDR%20memory%20systems" target="_blank" style="text-decoration: none; color: #3366cc;">To PIM or not for emerging general purpose processing in DDR memory systems</a> - <b>1.826074803</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Stream-based%20memory%20access%20specialization%20for%20general%20purpose%20processors" target="_blank" style="text-decoration: none; color: #3366cc;">Stream-based memory access specialization for general purpose processors</a> - <b>1.819543936</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Bonsai%3A%20high-performance%20adaptive%20merge%20tree%20sorting" target="_blank" style="text-decoration: none; color: #3366cc;">Bonsai: high-performance adaptive merge tree sorting</a> - <b>1.819543936</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Enhancing%20and%20exploiting%20contiguity%20for%20fast%20memory%20virtualization" target="_blank" style="text-decoration: none; color: #3366cc;">Enhancing and exploiting contiguity for fast memory virtualization</a> - <b>1.819543936</b> citations</li><li><a href="https://scholar.google.com/scholar?q=A%20variable%20warp%20size%20architecture" target="_blank" style="text-decoration: none; color: #3366cc;">A variable warp size architecture</a> - <b>1.812913357</b> citations</li><li><a href="https://scholar.google.com/scholar?q=PowerChief%3A%20Intelligent%20Power%20Allocation%20for%20Multi-Stage%20Applications%20to%20Improve%20Responsiveness%20on%20Power%20Constrained%20CMP" target="_blank" style="text-decoration: none; color: #3366cc;">PowerChief: Intelligent Power Allocation for Multi-Stage Applications to Improve Responsiveness on Power Constrained CMP</a> - <b>1.812913357</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Chasing%20Away%20RAts%3A%20Semantics%20and%20Evaluation%20for%20Relaxed%20Atomics%20on%20Heterogeneous%20Systems" target="_blank" style="text-decoration: none; color: #3366cc;">Chasing Away RAts: Semantics and Evaluation for Relaxed Atomics on Heterogeneous Systems</a> - <b>1.812913357</b> citations</li><li><a href="https://scholar.google.com/scholar?q=LogCA%3A%20A%20High-Level%20Performance%20Model%20for%20Hardware%20Accelerators" target="_blank" style="text-decoration: none; color: #3366cc;">LogCA: A High-Level Performance Model for Hardware Accelerators</a> - <b>1.812913357</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Don%27t%20forget%20the%20I/O%20when%20allocating%20your%20LLC" target="_blank" style="text-decoration: none; color: #3366cc;">Don't forget the I/O when allocating your LLC</a> - <b>1.812913357</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Opening%20pandora%27s%20box%3A%20a%20systematic%20study%20of%20new%20ways%20microarchitecture%20can%20leak%20private%20data" target="_blank" style="text-decoration: none; color: #3366cc;">Opening pandora's box: a systematic study of new ways microarchitecture can leak private data</a> - <b>1.812913357</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Thermal%20time%20shifting%3A%20leveraging%20phase%20change%20materials%20to%20reduce%20cooling%20costs%20in%20warehouse-scale%20computers" target="_blank" style="text-decoration: none; color: #3366cc;">Thermal time shifting: leveraging phase change materials to reduce cooling costs in warehouse-scale computers</a> - <b>1.799340549</b> citations</li><li><a href="https://scholar.google.com/scholar?q=ActivePointers%3A%20a%20case%20for%20software%20address%20translation%20on%20GPUs" target="_blank" style="text-decoration: none; color: #3366cc;">ActivePointers: a case for software address translation on GPUs</a> - <b>1.799340549</b> citations</li><li><a href="https://scholar.google.com/scholar?q=PrORAM%3A%20dynamic%20prefetcher%20for%20oblivious%20RAM" target="_blank" style="text-decoration: none; color: #3366cc;">PrORAM: dynamic prefetcher for oblivious RAM</a> - <b>1.792391689</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Exploiting%20dynamic%20timing%20slack%20for%20energy%20efficiency%20in%20ultra-low-power%20embedded%20systems" target="_blank" style="text-decoration: none; color: #3366cc;">Exploiting dynamic timing slack for energy efficiency in ultra-low-power embedded systems</a> - <b>1.792391689</b> citations</li><li><a href="https://scholar.google.com/scholar?q=2B-SSD%3A%20the%20case%20for%20dual%2C%20byte-%20and%20block-addressable%20solid-state%20drives" target="_blank" style="text-decoration: none; color: #3366cc;">2B-SSD: the case for dual, byte- and block-addressable solid-state drives</a> - <b>1.792391689</b> citations</li><li><a href="https://scholar.google.com/scholar?q=RegMutex%3A%20inter-warp%20GPU%20register%20time-sharing" target="_blank" style="text-decoration: none; color: #3366cc;">RegMutex: inter-warp GPU register time-sharing</a> - <b>1.792391689</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Opportunistic%20computing%20in%20GPU%20architectures" target="_blank" style="text-decoration: none; color: #3366cc;">Opportunistic computing in GPU architectures</a> - <b>1.792391689</b> citations</li><li><a href="https://scholar.google.com/scholar?q=NISQ%2B%3A%20boosting%20quantum%20computing%20power%20by%20approximating%20quantum%20error%20correction" target="_blank" style="text-decoration: none; color: #3366cc;">NISQ+: boosting quantum computing power by approximating quantum error correction</a> - <b>1.792391689</b> citations</li><li><a href="https://scholar.google.com/scholar?q=SQUARE%3A%20strategic%20quantum%20ancilla%20reuse%20for%20modular%20quantum%20programs%20via%20cost-effective%20uncomputation" target="_blank" style="text-decoration: none; color: #3366cc;">SQUARE: strategic quantum ancilla reuse for modular quantum programs via cost-effective uncomputation</a> - <b>1.792391689</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Flex%3A%20high-availability%20datacenters%20with%20zero%20reserved%20power" target="_blank" style="text-decoration: none; color: #3366cc;">Flex: high-availability datacenters with zero reserved power</a> - <b>1.792391689</b> citations</li><li><a href="https://scholar.google.com/scholar?q=SPACE%3A%20locality-aware%20processing%20in%20heterogeneous%20memory%20for%20personalized%20recommendations" target="_blank" style="text-decoration: none; color: #3366cc;">SPACE: locality-aware processing in heterogeneous memory for personalized recommendations</a> - <b>1.792391689</b> citations</li><li><a href="https://scholar.google.com/scholar?q=CloudMonatt%3A%20an%20architecture%20for%20security%20health%20monitoring%20and%20attestation%20of%20virtual%20machines%20in%20cloud%20computing" target="_blank" style="text-decoration: none; color: #3366cc;">CloudMonatt: an architecture for security health monitoring and attestation of virtual machines in cloud computing</a> - <b>1.785329835</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Parallel%20Automata%20Processor" target="_blank" style="text-decoration: none; color: #3366cc;">Parallel Automata Processor</a> - <b>1.785329835</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Secure%20TLBs" target="_blank" style="text-decoration: none; color: #3366cc;">Secure TLBs</a> - <b>1.785329835</b> citations</li><li><a href="https://scholar.google.com/scholar?q=The%20NeBuLa%20RPC-optimized%20architecture" target="_blank" style="text-decoration: none; color: #3366cc;">The NeBuLa RPC-optimized architecture</a> - <b>1.785329835</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Enabling%20compute-communication%20overlap%20in%20distributed%20deep%20learning%20training%20platforms" target="_blank" style="text-decoration: none; color: #3366cc;">Enabling compute-communication overlap in distributed deep learning training platforms</a> - <b>1.785329835</b> citations</li><li><a href="https://scholar.google.com/scholar?q=A%20stochastic-computing%20based%20deep%20learning%20framework%20using%20adiabatic%20quantum-flux-parametron%20superconducting%20technology" target="_blank" style="text-decoration: none; color: #3366cc;">A stochastic-computing based deep learning framework using adiabatic quantum-flux-parametron superconducting technology</a> - <b>1.77815125</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Hyper-AP%3A%20enhancing%20associative%20processing%20through%20a%20full-stack%20optimization" target="_blank" style="text-decoration: none; color: #3366cc;">Hyper-AP: enhancing associative processing through a full-stack optimization</a> - <b>1.77815125</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Ripple%3A%20profile-guided%20instruction%20cache%20replacement%20for%20data%20center%20applications" target="_blank" style="text-decoration: none; color: #3366cc;">Ripple: profile-guided instruction cache replacement for data center applications</a> - <b>1.77815125</b> citations</li><li><a href="https://scholar.google.com/scholar?q=COP%3A%20to%20compress%20and%20protect%20main%20memory" target="_blank" style="text-decoration: none; color: #3366cc;">COP: to compress and protect main memory</a> - <b>1.770852012</b> citations</li><li><a href="https://scholar.google.com/scholar?q=SmartExchange%3A%20trading%20higher-cost%20memory%20storage/access%20for%20lower-cost%20computation" target="_blank" style="text-decoration: none; color: #3366cc;">SmartExchange: trading higher-cost memory storage/access for lower-cost computation</a> - <b>1.770852012</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Leaky%20buddies%3A%20cross-component%20covert%20channels%20on%20integrated%20CPU-GPU%20systems" target="_blank" style="text-decoration: none; color: #3366cc;">Leaky buddies: cross-component covert channels on integrated CPU-GPU systems</a> - <b>1.770852012</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Sibyl%3A%20adaptive%20and%20extensible%20data%20placement%20in%20hybrid%20storage%20systems%20using%20online%20reinforcement%20learning" target="_blank" style="text-decoration: none; color: #3366cc;">Sibyl: adaptive and extensible data placement in hybrid storage systems using online reinforcement learning</a> - <b>1.770852012</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Geyser%3A%20a%20compilation%20framework%20for%20quantum%20computing%20with%20neutral%20atoms" target="_blank" style="text-decoration: none; color: #3366cc;">Geyser: a compilation framework for quantum computing with neutral atoms</a> - <b>1.770852012</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Nested%20enclave%3A%20supporting%20fine-grained%20hierarchical%20isolation%20with%20SGX" target="_blank" style="text-decoration: none; color: #3366cc;">Nested enclave: supporting fine-grained hierarchical isolation with SGX</a> - <b>1.763427994</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Near%20data%20acceleration%20with%20concurrent%20host%20access" target="_blank" style="text-decoration: none; color: #3366cc;">Near data acceleration with concurrent host access</a> - <b>1.763427994</b> citations</li><li><a href="https://scholar.google.com/scholar?q=ABC-DIMM%3A%20alleviating%20the%20bottleneck%20of%20communication%20in%20DIMM-based%20near-memory%20processing%20with%20inter-DIMM%20broadcast" target="_blank" style="text-decoration: none; color: #3366cc;">ABC-DIMM: alleviating the bottleneck of communication in DIMM-based near-memory processing with inter-DIMM broadcast</a> - <b>1.763427994</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Exploiting%20long-distance%20interactions%20and%20tolerating%20atom%20loss%20in%20neutral%20atom%20quantum%20architectures" target="_blank" style="text-decoration: none; color: #3366cc;">Exploiting long-distance interactions and tolerating atom loss in neutral atom quantum architectures</a> - <b>1.763427994</b> citations</li><li><a href="https://scholar.google.com/scholar?q=A%20case%20for%20richer%20cross-layer%20abstractions%3A%20bridging%20the%20semantic%20gap%20with%20expressive%20memory" target="_blank" style="text-decoration: none; color: #3366cc;">A case for richer cross-layer abstractions: bridging the semantic gap with expressive memory</a> - <b>1.755874856</b> citations</li><li><a href="https://scholar.google.com/scholar?q=SysScale%3A%20exploiting%20multi-domain%20dynamic%20voltage%20and%20frequency%20scaling%20for%20energy%20efficient%20mobile%20processors" target="_blank" style="text-decoration: none; color: #3366cc;">SysScale: exploiting multi-domain dynamic voltage and frequency scaling for energy efficient mobile processors</a> - <b>1.755874856</b> citations</li><li><a href="https://scholar.google.com/scholar?q=A%20hardware%20accelerator%20for%20tracing%20garbage%20collection" target="_blank" style="text-decoration: none; color: #3366cc;">A hardware accelerator for tracing garbage collection</a> - <b>1.748188027</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Albireo%3A%20energy-efficient%20acceleration%20of%20convolutional%20neural%20networks%20via%20silicon%20photonics" target="_blank" style="text-decoration: none; color: #3366cc;">Albireo: energy-efficient acceleration of convolutional neural networks via silicon photonics</a> - <b>1.748188027</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Themis%3A%20a%20network%20bandwidth-aware%20collective%20scheduling%20policy%20for%20distributed%20training%20of%20DL%20models" target="_blank" style="text-decoration: none; color: #3366cc;">Themis: a network bandwidth-aware collective scheduling policy for distributed training of DL models</a> - <b>1.748188027</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Practical%20memory%20safety%20with%20REST" target="_blank" style="text-decoration: none; color: #3366cc;">Practical memory safety with REST</a> - <b>1.740362689</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Synchronized%20progress%20in%20interconnection%20networks%20%28SPIN%29%3A%20a%20new%20theory%20for%20deadlock%20freedom" target="_blank" style="text-decoration: none; color: #3366cc;">Synchronized progress in interconnection networks (SPIN): a new theory for deadlock freedom</a> - <b>1.740362689</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Strober%3A%20fast%20and%20accurate%20sample-based%20energy%20simulation%20for%20arbitrary%20RTL" target="_blank" style="text-decoration: none; color: #3366cc;">Strober: fast and accurate sample-based energy simulation for arbitrary RTL</a> - <b>1.73239376</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Designing%20vertical%20processors%20in%20monolithic%203D" target="_blank" style="text-decoration: none; color: #3366cc;">Designing vertical processors in monolithic 3D</a> - <b>1.72427587</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Cryogenic%20computer%20architecture%20modeling%20with%20memory-side%20case%20studies" target="_blank" style="text-decoration: none; color: #3366cc;">Cryogenic computer architecture modeling with memory-side case studies</a> - <b>1.72427587</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Axiomatic%20hardware-software%20contracts%20for%20security" target="_blank" style="text-decoration: none; color: #3366cc;">Axiomatic hardware-software contracts for security</a> - <b>1.72427587</b> citations</li><li><a href="https://scholar.google.com/scholar?q=DynaSpAM%3A%20dynamic%20spatial%20architecture%20mapping%20using%20out%20of%20order%20instruction%20schedules" target="_blank" style="text-decoration: none; color: #3366cc;">DynaSpAM: dynamic spatial architecture mapping using out of order instruction schedules</a> - <b>1.716003344</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Sieve%3A%20scalable%20in-situ%20DRAM-based%20accelerator%20designs%20for%20massively%20parallel%20k-mer%20matching" target="_blank" style="text-decoration: none; color: #3366cc;">Sieve: scalable in-situ DRAM-based accelerator designs for massively parallel k-mer matching</a> - <b>1.716003344</b> citations</li><li><a href="https://scholar.google.com/scholar?q=ArMOR%3A%20defending%20against%20memory%20consistency%20model%20mismatches%20in%20heterogeneous%20architectures" target="_blank" style="text-decoration: none; color: #3366cc;">ArMOR: defending against memory consistency model mismatches in heterogeneous architectures</a> - <b>1.707570176</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Probable%20cause%3A%20the%20deanonymizing%20effects%20of%20approximate%20DRAM" target="_blank" style="text-decoration: none; color: #3366cc;">Probable cause: the deanonymizing effects of approximate DRAM</a> - <b>1.707570176</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Fusion%3A%20design%20tradeoffs%20in%20coherent%20cache%20hierarchies%20for%20accelerators" target="_blank" style="text-decoration: none; color: #3366cc;">Fusion: design tradeoffs in coherent cache hierarchies for accelerators</a> - <b>1.707570176</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Boosting%20access%20parallelism%20to%20PCM-based%20main%20memory" target="_blank" style="text-decoration: none; color: #3366cc;">Boosting access parallelism to PCM-based main memory</a> - <b>1.707570176</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Lazy%20persistency%3A%20a%20high-performing%20and%20write-efficient%20software%20persistency%20technique" target="_blank" style="text-decoration: none; color: #3366cc;">Lazy persistency: a high-performing and write-efficient software persistency technique</a> - <b>1.707570176</b> citations</li><li><a href="https://scholar.google.com/scholar?q=IntelliNoC%3A%20a%20holistic%20design%20framework%20for%20energy-efficient%20and%20reliable%20on-chip%20communication%20for%20manycores" target="_blank" style="text-decoration: none; color: #3366cc;">IntelliNoC: a holistic design framework for energy-efficient and reliable on-chip communication for manycores</a> - <b>1.707570176</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Perforated%20page%3A%20supporting%20fragmented%20memory%20allocation%20for%20large%20pages" target="_blank" style="text-decoration: none; color: #3366cc;">Perforated page: supporting fragmented memory allocation for large pages</a> - <b>1.707570176</b> citations</li><li><a href="https://scholar.google.com/scholar?q=CODIC%3A%20a%20low-cost%20substrate%20for%20enabling%20custom%20in-DRAM%20functionalities%20and%20optimizations" target="_blank" style="text-decoration: none; color: #3366cc;">CODIC: a low-cost substrate for enabling custom in-DRAM functionalities and optimizations</a> - <b>1.707570176</b> citations</li><li><a href="https://scholar.google.com/scholar?q=MOESI-prime%3A%20preventing%20coherence-induced%20hammering%20in%20commodity%20workloads" target="_blank" style="text-decoration: none; color: #3366cc;">MOESI-prime: preventing coherence-induced hammering in commodity workloads</a> - <b>1.707570176</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Lukewarm%20serverless%20functions%3A%20characterization%20and%20optimization" target="_blank" style="text-decoration: none; color: #3366cc;">Lukewarm serverless functions: characterization and optimization</a> - <b>1.707570176</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Efficient%20execution%20of%20memory%20access%20phases%20using%20dataflow%20specialization" target="_blank" style="text-decoration: none; color: #3366cc;">Efficient execution of memory access phases using dataflow specialization</a> - <b>1.698970004</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Peak%20efficiency%20aware%20scheduling%20for%20highly%20energy%20proportional%20servers" target="_blank" style="text-decoration: none; color: #3366cc;">Peak efficiency aware scheduling for highly energy proportional servers</a> - <b>1.698970004</b> citations</li><li><a href="https://scholar.google.com/scholar?q=CASH%3A%20supporting%20IaaS%20customers%20with%20a%20sub-core%20configurable%20architecture" target="_blank" style="text-decoration: none; color: #3366cc;">CASH: supporting IaaS customers with a sub-core configurable architecture</a> - <b>1.698970004</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Bespoke%20Processors%20for%20Applications%20with%20Ultra-low%20Area%20and%20Power%20Constraints" target="_blank" style="text-decoration: none; color: #3366cc;">Bespoke Processors for Applications with Ultra-low Area and Power Constraints</a> - <b>1.698970004</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Criticality%20aware%20tiered%20cache%20hierarchy%3A%20a%20fundamental%20relook%20at%20multi-level%20cache%20hierarchies" target="_blank" style="text-decoration: none; color: #3366cc;">Criticality aware tiered cache hierarchy: a fundamental relook at multi-level cache hierarchies</a> - <b>1.698970004</b> citations</li><li><a href="https://scholar.google.com/scholar?q=CryoCore%3A%20a%20fast%20and%20dense%20processor%20architecture%20for%20cryogenic%20computing" target="_blank" style="text-decoration: none; color: #3366cc;">CryoCore: a fast and dense processor architecture for cryogenic computing</a> - <b>1.698970004</b> citations</li><li><a href="https://scholar.google.com/scholar?q=FlexMiner%3A%20a%20pattern-aware%20accelerator%20for%20graph%20pattern%20mining" target="_blank" style="text-decoration: none; color: #3366cc;">FlexMiner: a pattern-aware accelerator for graph pattern mining</a> - <b>1.698970004</b> citations</li><li><a href="https://scholar.google.com/scholar?q=APRES%3A%20improving%20cache%20efficiency%20by%20exploiting%20load%20characteristics%20on%20GPUs" target="_blank" style="text-decoration: none; color: #3366cc;">APRES: improving cache efficiency by exploiting load characteristics on GPUs</a> - <b>1.69019608</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Viyojit%3A%20Decoupling%20Battery%20and%20DRAM%20Capacities%20for%20Battery-Backed%20DRAM" target="_blank" style="text-decoration: none; color: #3366cc;">Viyojit: Decoupling Battery and DRAM Capacities for Battery-Backed DRAM</a> - <b>1.69019608</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Robox%3A%20an%20end-to-end%20solution%20to%20accelerate%20autonomous%20control%20in%20robotics" target="_blank" style="text-decoration: none; color: #3366cc;">Robox: an end-to-end solution to accelerate autonomous control in robotics</a> - <b>1.69019608</b> citations</li><li><a href="https://scholar.google.com/scholar?q=CHEx86%3A%20context-sensitive%20enforcement%20of%20memory%20safety%20via%20microcode-enabled%20capabilities" target="_blank" style="text-decoration: none; color: #3366cc;">CHEx86: context-sensitive enforcement of memory safety via microcode-enabled capabilities</a> - <b>1.69019608</b> citations</li><li><a href="https://scholar.google.com/scholar?q=JPEG-ACT%3A%20accelerating%20deep%20learning%20via%20transform-based%20lossy%20compression" target="_blank" style="text-decoration: none; color: #3366cc;">JPEG-ACT: accelerating deep learning via transform-based lossy compression</a> - <b>1.69019608</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Echo%3A%20compiler-based%20GPU%20memory%20footprint%20reduction%20for%20LSTM%20RNN%20training" target="_blank" style="text-decoration: none; color: #3366cc;">Echo: compiler-based GPU memory footprint reduction for LSTM RNN training</a> - <b>1.69019608</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Confidential%20serverless%20made%20efficient%20with%20plug-in%20enclaves" target="_blank" style="text-decoration: none; color: #3366cc;">Confidential serverless made efficient with plug-in enclaves</a> - <b>1.69019608</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Aggressive%20Pipelining%20of%20Irregular%20Applications%20on%20Reconfigurable%20Hardware" target="_blank" style="text-decoration: none; color: #3366cc;">Aggressive Pipelining of Irregular Applications on Reconfigurable Hardware</a> - <b>1.681241237</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Genesis%3A%20a%20hardware%20acceleration%20framework%20for%20genomic%20data%20analysis" target="_blank" style="text-decoration: none; color: #3366cc;">Genesis: a hardware acceleration framework for genomic data analysis</a> - <b>1.681241237</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Gorgon%3A%20accelerating%20machine%20learning%20from%20relational%20data" target="_blank" style="text-decoration: none; color: #3366cc;">Gorgon: accelerating machine learning from relational data</a> - <b>1.681241237</b> citations</li><li><a href="https://scholar.google.com/scholar?q=No-FAT%3A%20architectural%20support%20for%20low%20overhead%20memory%20safety%20checks" target="_blank" style="text-decoration: none; color: #3366cc;">No-FAT: architectural support for low overhead memory safety checks</a> - <b>1.681241237</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Manycore%20network%20interfaces%20for%20in-memory%20rack-scale%20computing" target="_blank" style="text-decoration: none; color: #3366cc;">Manycore network interfaces for in-memory rack-scale computing</a> - <b>1.672097858</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Generic%20system%20calls%20for%20GPUs" target="_blank" style="text-decoration: none; color: #3366cc;">Generic system calls for GPUs</a> - <b>1.672097858</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Relaxed%20persist%20ordering%20using%20strand%20persistency" target="_blank" style="text-decoration: none; color: #3366cc;">Relaxed persist ordering using strand persistency</a> - <b>1.672097858</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Communication%20algorithm-architecture%20co-design%20for%20distributed%20deep%20learning" target="_blank" style="text-decoration: none; color: #3366cc;">Communication algorithm-architecture co-design for distributed deep learning</a> - <b>1.672097858</b> citations</li><li><a href="https://scholar.google.com/scholar?q=There%27s%20always%20a%20bigger%20fish%3A%20a%20clarifying%20analysis%20of%20a%20machine-learning-assisted%20side-channel%20attack" target="_blank" style="text-decoration: none; color: #3366cc;">There's always a bigger fish: a clarifying analysis of a machine-learning-assisted side-channel attack</a> - <b>1.672097858</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Coherence%20protocol%20for%20transparent%20management%20of%20scratchpad%20memories%20in%20shared%20memory%20manycore%20architectures" target="_blank" style="text-decoration: none; color: #3366cc;">Coherence protocol for transparent management of scratchpad memories in shared memory manycore architectures</a> - <b>1.662757832</b> citations</li><li><a href="https://scholar.google.com/scholar?q=All-inclusive%20ECC%3A%20thorough%20end-to-end%20protection%20for%20reliable%20computer%20memory" target="_blank" style="text-decoration: none; color: #3366cc;">All-inclusive ECC: thorough end-to-end protection for reliable computer memory</a> - <b>1.662757832</b> citations</li><li><a href="https://scholar.google.com/scholar?q=AccQOC%3A%20accelerating%20quantum%20optimal%20control%20based%20pulse%20generation" target="_blank" style="text-decoration: none; color: #3366cc;">AccQOC: accelerating quantum optimal control based pulse generation</a> - <b>1.662757832</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Hoop%3A%20efficient%20hardware-assisted%20out-of-place%20update%20for%20non-volatile%20memory" target="_blank" style="text-decoration: none; color: #3366cc;">Hoop: efficient hardware-assisted out-of-place update for non-volatile memory</a> - <b>1.662757832</b> citations</li><li><a href="https://scholar.google.com/scholar?q=The%20anytime%20automaton" target="_blank" style="text-decoration: none; color: #3366cc;">The anytime automaton</a> - <b>1.653212514</b> citations</li><li><a href="https://scholar.google.com/scholar?q=DICE%3A%20Compressing%20DRAM%20Caches%20for%20Bandwidth%20and%20Capacity" target="_blank" style="text-decoration: none; color: #3366cc;">DICE: Compressing DRAM Caches for Bandwidth and Capacity</a> - <b>1.653212514</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Software-hardware%20co-optimization%20for%20computational%20chemistry%20on%20superconducting%20quantum%20processors" target="_blank" style="text-decoration: none; color: #3366cc;">Software-hardware co-optimization for computational chemistry on superconducting quantum processors</a> - <b>1.653212514</b> citations</li><li><a href="https://scholar.google.com/scholar?q=A%20RISC-V%20in-network%20accelerator%20for%20flexible%20high-performance%20low-power%20packet%20processing" target="_blank" style="text-decoration: none; color: #3366cc;">A RISC-V in-network accelerator for flexible high-performance low-power packet processing</a> - <b>1.653212514</b> citations</li><li><a href="https://scholar.google.com/scholar?q=VIP%3A%20virtualizing%20IP%20chains%20on%20handheld%20platforms" target="_blank" style="text-decoration: none; color: #3366cc;">VIP: virtualizing IP chains on handheld platforms</a> - <b>1.643452676</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Evaluation%20of%20an%20analog%20accelerator%20for%20linear%20algebra" target="_blank" style="text-decoration: none; color: #3366cc;">Evaluation of an analog accelerator for linear algebra</a> - <b>1.643452676</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Exploiting%20page%20table%20locality%20for%20agile%20TLB%20prefetching" target="_blank" style="text-decoration: none; color: #3366cc;">Exploiting page table locality for agile TLB prefetching</a> - <b>1.643452676</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Unlimited%20vector%20extension%20with%20data%20streaming%20support" target="_blank" style="text-decoration: none; color: #3366cc;">Unlimited vector extension with data streaming support</a> - <b>1.643452676</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Energy%20efficient%20data%20encoding%20in%20DRAM%20channels%20exploiting%20data%20value%20similarity" target="_blank" style="text-decoration: none; color: #3366cc;">Energy efficient data encoding in DRAM channels exploiting data value similarity</a> - <b>1.633468456</b> citations</li><li><a href="https://scholar.google.com/scholar?q=ACCORD%3A%20enabling%20associativity%20for%20gigascale%20DRAM%20caches%20by%20coordinating%20way-install%20and%20way-prediction" target="_blank" style="text-decoration: none; color: #3366cc;">ACCORD: enabling associativity for gigascale DRAM caches by coordinating way-install and way-prediction</a> - <b>1.633468456</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Stitch%3A%20fusible%20heterogeneous%20accelerators%20enmeshed%20with%20many-core%20architecture%20for%20wearables" target="_blank" style="text-decoration: none; color: #3366cc;">Stitch: fusible heterogeneous accelerators enmeshed with many-core architecture for wearables</a> - <b>1.633468456</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Rebooting%20virtual%20memory%20with%20midgard" target="_blank" style="text-decoration: none; color: #3366cc;">Rebooting virtual memory with midgard</a> - <b>1.633468456</b> citations</li><li><a href="https://scholar.google.com/scholar?q=SNS%27s%20not%20a%20synthesizer%3A%20a%20deep-learning-based%20synthesis%20predictor" target="_blank" style="text-decoration: none; color: #3366cc;">SNS's not a synthesizer: a deep-learning-based synthesis predictor</a> - <b>1.633468456</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Training%20personalized%20recommendation%20systems%20from%20%28GPU%29%20scratch%3A%20look%20forward%20not%20backwards" target="_blank" style="text-decoration: none; color: #3366cc;">Training personalized recommendation systems from (GPU) scratch: look forward not backwards</a> - <b>1.633468456</b> citations</li><li><a href="https://scholar.google.com/scholar?q=LAP%3A%20loop-block%20aware%20inclusion%20properties%20for%20energy-efficient%20asymmetric%20last%20level%20caches" target="_blank" style="text-decoration: none; color: #3366cc;">LAP: loop-block aware inclusion properties for energy-efficient asymmetric last level caches</a> - <b>1.62324929</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Scalable%20interconnects%20for%20reconfigurable%20spatial%20architectures" target="_blank" style="text-decoration: none; color: #3366cc;">Scalable interconnects for reconfigurable spatial architectures</a> - <b>1.62324929</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Printed%20microprocessors" target="_blank" style="text-decoration: none; color: #3366cc;">Printed microprocessors</a> - <b>1.62324929</b> citations</li><li><a href="https://scholar.google.com/scholar?q=D%C3%A9j%C3%A0%20view%3A%20spatio-temporal%20compute%20reuse%20for%20energy-efficient%20360%C2%B0%20VR%20video%20streaming" target="_blank" style="text-decoration: none; color: #3366cc;">Déjà view: spatio-temporal compute reuse for energy-efficient 360° VR video streaming</a> - <b>1.62324929</b> citations</li><li><a href="https://scholar.google.com/scholar?q=A%20specialized%20architecture%20for%20object%20serialization%20with%20applications%20to%20big%20data%20analytics" target="_blank" style="text-decoration: none; color: #3366cc;">A specialized architecture for object serialization with applications to big data analytics</a> - <b>1.62324929</b> citations</li><li><a href="https://scholar.google.com/scholar?q=DRAF%3A%20a%20low-power%20DRAM-based%20reconfigurable%20acceleration%20fabric" target="_blank" style="text-decoration: none; color: #3366cc;">DRAF: a low-power DRAM-based reconfigurable acceleration fabric</a> - <b>1.612783857</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Get%20out%20of%20the%20valley%3A%20power-efficient%20address%20mapping%20for%20GPUs" target="_blank" style="text-decoration: none; color: #3366cc;">Get out of the valley: power-efficient address mapping for GPUs</a> - <b>1.612783857</b> citations</li><li><a href="https://scholar.google.com/scholar?q=SecDir%3A%20a%20secure%20directory%20to%20defeat%20directory%20side-channel%20attacks" target="_blank" style="text-decoration: none; color: #3366cc;">SecDir: a secure directory to defeat directory side-channel attacks</a> - <b>1.612783857</b> citations</li><li><a href="https://scholar.google.com/scholar?q=T4%3A%20compiling%20sequential%20code%20for%20effective%20speculative%20parallelization%20in%20hardware" target="_blank" style="text-decoration: none; color: #3366cc;">T4: compiling sequential code for effective speculative parallelization in hardware</a> - <b>1.612783857</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Hardware-based%20domain%20virtualization%20for%20intra-process%20isolation%20of%20persistent%20memory%20objects" target="_blank" style="text-decoration: none; color: #3366cc;">Hardware-based domain virtualization for intra-process isolation of persistent memory objects</a> - <b>1.612783857</b> citations</li><li><a href="https://scholar.google.com/scholar?q=The%20virtual%20block%20interface%3A%20a%20flexible%20alternative%20to%20the%20conventional%20virtual%20memory%20framework" target="_blank" style="text-decoration: none; color: #3366cc;">The virtual block interface: a flexible alternative to the conventional virtual memory framework</a> - <b>1.612783857</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Designing%20calibration%20and%20expressivity-efficient%20instruction%20sets%20for%20quantum%20computing" target="_blank" style="text-decoration: none; color: #3366cc;">Designing calibration and expressivity-efficient instruction sets for quantum computing</a> - <b>1.612783857</b> citations</li><li><a href="https://scholar.google.com/scholar?q=NDMiner%3A%20accelerating%20graph%20pattern%20mining%20using%20near%20data%20processing" target="_blank" style="text-decoration: none; color: #3366cc;">NDMiner: accelerating graph pattern mining using near data processing</a> - <b>1.612783857</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Fractal%3A%20An%20Execution%20Model%20for%20Fine-Grain%20Nested%20Speculative%20Parallelism" target="_blank" style="text-decoration: none; color: #3366cc;">Fractal: An Execution Model for Fine-Grain Nested Speculative Parallelism</a> - <b>1.602059991</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Bit-level%20perceptron%20prediction%20for%20indirect%20branches" target="_blank" style="text-decoration: none; color: #3366cc;">Bit-level perceptron prediction for indirect branches</a> - <b>1.602059991</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Hardware-software%20co-design%20for%20brain-computer%20interfaces" target="_blank" style="text-decoration: none; color: #3366cc;">Hardware-software co-design for brain-computer interfaces</a> - <b>1.602059991</b> citations</li><li><a href="https://scholar.google.com/scholar?q=CryoGuard%3A%20a%20near%20refresh-free%20robust%20DRAM%20design%20for%20cryogenic%20computing" target="_blank" style="text-decoration: none; color: #3366cc;">CryoGuard: a near refresh-free robust DRAM design for cryogenic computing</a> - <b>1.602059991</b> citations</li><li><a href="https://scholar.google.com/scholar?q=MeNDA%3A%20a%20near-memory%20multi-way%20merge%20solution%20for%20sparse%20transposition%20and%20dataflows" target="_blank" style="text-decoration: none; color: #3366cc;">MeNDA: a near-memory multi-way merge solution for sparse transposition and dataflows</a> - <b>1.602059991</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Base-victim%20compression%3A%20an%20opportunistic%20cache%20compression%20architecture" target="_blank" style="text-decoration: none; color: #3366cc;">Base-victim compression: an opportunistic cache compression architecture</a> - <b>1.591064607</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Non-Speculative%20Load-Load%20Reordering%20in%20TSO" target="_blank" style="text-decoration: none; color: #3366cc;">Non-Speculative Load-Load Reordering in TSO</a> - <b>1.591064607</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Division%20of%20labor%3A%20a%20more%20effective%20approach%20to%20prefetching" target="_blank" style="text-decoration: none; color: #3366cc;">Division of labor: a more effective approach to prefetching</a> - <b>1.591064607</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Flexon%3A%20a%20flexible%20digital%20neuron%20for%20efficient%20spiking%20neural%20network%20simulations" target="_blank" style="text-decoration: none; color: #3366cc;">Flexon: a flexible digital neuron for efficient spiking neural network simulations</a> - <b>1.591064607</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Tiny%20but%20mighty%3A%20designing%20and%20realizing%20scalable%20latency%20tolerance%20for%20manycore%20SoCs" target="_blank" style="text-decoration: none; color: #3366cc;">Tiny but mighty: designing and realizing scalable latency tolerance for manycore SoCs</a> - <b>1.591064607</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Reducing%20world%20switches%20in%20virtualized%20environment%20with%20flexible%20cross-world%20calls" target="_blank" style="text-decoration: none; color: #3366cc;">Reducing world switches in virtualized environment with flexible cross-world calls</a> - <b>1.579783597</b> citations</li><li><a href="https://scholar.google.com/scholar?q=SEESAW%3A%20using%20superpages%20to%20improve%20VIPT%20caches" target="_blank" style="text-decoration: none; color: #3366cc;">SEESAW: using superpages to improve VIPT caches</a> - <b>1.579783597</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Adaptive%20memory-side%20last-level%20GPU%20caching" target="_blank" style="text-decoration: none; color: #3366cc;">Adaptive memory-side last-level GPU caching</a> - <b>1.579783597</b> citations</li><li><a href="https://scholar.google.com/scholar?q=XPC%3A%20architectural%20support%20for%20secure%20and%20efficient%20cross%20process%20call" target="_blank" style="text-decoration: none; color: #3366cc;">XPC: architectural support for secure and efficient cross process call</a> - <b>1.579783597</b> citations</li><li><a href="https://scholar.google.com/scholar?q=The%20dark%20side%20of%20DNN%20pruning" target="_blank" style="text-decoration: none; color: #3366cc;">The dark side of DNN pruning</a> - <b>1.568201724</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Divide%20and%20conquer%20frontend%20bottleneck" target="_blank" style="text-decoration: none; color: #3366cc;">Divide and conquer frontend bottleneck</a> - <b>1.556302501</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Accelerated%20seeding%20for%20genome%20sequence%20alignment%20with%20enumerated%20radix%20trees" target="_blank" style="text-decoration: none; color: #3366cc;">Accelerated seeding for genome sequence alignment with enumerated radix trees</a> - <b>1.556302501</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Large-scale%20graph%20processing%20on%20FPGAs%20with%20caches%20for%20thousands%20of%20simultaneous%20misses" target="_blank" style="text-decoration: none; color: #3366cc;">Large-scale graph processing on FPGAs with caches for thousands of simultaneous misses</a> - <b>1.556302501</b> citations</li><li><a href="https://scholar.google.com/scholar?q=IntroSpectre%3A%20a%20pre-silicon%20framework%20for%20discovery%20and%20analysis%20of%20transient%20execution%20vulnerabilities" target="_blank" style="text-decoration: none; color: #3366cc;">IntroSpectre: a pre-silicon framework for discovery and analysis of transient execution vulnerabilities</a> - <b>1.556302501</b> citations</li><li><a href="https://scholar.google.com/scholar?q=EDAM%3A%20edit%20distance%20tolerant%20approximate%20matching%20content%20addressable%20memory" target="_blank" style="text-decoration: none; color: #3366cc;">EDAM: edit distance tolerant approximate matching content addressable memory</a> - <b>1.556302501</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Asymmetry-aware%20work-stealing%20runtimes" target="_blank" style="text-decoration: none; color: #3366cc;">Asymmetry-aware work-stealing runtimes</a> - <b>1.544068044</b> citations</li><li><a href="https://scholar.google.com/scholar?q=The%20IBM%20zl5%20high%20frequency%20mainframe%20branch%20predictor" target="_blank" style="text-decoration: none; color: #3366cc;">The IBM zl5 high frequency mainframe branch predictor</a> - <b>1.544068044</b> citations</li><li><a href="https://scholar.google.com/scholar?q=A%20case%20for%20hardware-based%20demand%20paging" target="_blank" style="text-decoration: none; color: #3366cc;">A case for hardware-based demand paging</a> - <b>1.544068044</b> citations</li><li><a href="https://scholar.google.com/scholar?q=SHRINK%3A%20Reducing%20the%20ISA%20complexity%20via%20instruction%20recycling" target="_blank" style="text-decoration: none; color: #3366cc;">SHRINK: Reducing the ISA complexity via instruction recycling</a> - <b>1.531478917</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Callback%3A%20efficient%20synchronization%20without%20invalidation%20with%20a%20directory%20just%20for%20spin-waiting" target="_blank" style="text-decoration: none; color: #3366cc;">Callback: efficient synchronization without invalidation with a directory just for spin-waiting</a> - <b>1.531478917</b> citations</li><li><a href="https://scholar.google.com/scholar?q=ThermoGater%3A%20Thermally-Aware%20On-Chip%20Voltage%20Regulation" target="_blank" style="text-decoration: none; color: #3366cc;">ThermoGater: Thermally-Aware On-Chip Voltage Regulation</a> - <b>1.531478917</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Tailored%20page%20sizes" target="_blank" style="text-decoration: none; color: #3366cc;">Tailored page sizes</a> - <b>1.531478917</b> citations</li><li><a href="https://scholar.google.com/scholar?q=A%20cost-effective%20entangling%20prefetcher%20for%20instructions" target="_blank" style="text-decoration: none; color: #3366cc;">A cost-effective entangling prefetcher for instructions</a> - <b>1.531478917</b> citations</li><li><a href="https://scholar.google.com/scholar?q=SpZip%3A%20architectural%20support%20for%20effective%20data%20compression%20in%20irregular%20applications" target="_blank" style="text-decoration: none; color: #3366cc;">SpZip: architectural support for effective data compression in irregular applications</a> - <b>1.531478917</b> citations</li><li><a href="https://scholar.google.com/scholar?q=INSPIRE%3A%20in-storage%20private%20information%20retrieval%20via%20protocol%20and%20architecture%20co-design" target="_blank" style="text-decoration: none; color: #3366cc;">INSPIRE: in-storage private information retrieval via protocol and architecture co-design</a> - <b>1.531478917</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Increasing%20ising%20machine%20capacity%20with%20multi-chip%20architectures" target="_blank" style="text-decoration: none; color: #3366cc;">Increasing ising machine capacity with multi-chip architectures</a> - <b>1.531478917</b> citations</li><li><a href="https://scholar.google.com/scholar?q=MGX%3A%20near-zero%20overhead%20memory%20protection%20for%20data-intensive%20accelerators" target="_blank" style="text-decoration: none; color: #3366cc;">MGX: near-zero overhead memory protection for data-intensive accelerators</a> - <b>1.531478917</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Accelerating%20asynchronous%20programs%20through%20event%20sneak%20peek" target="_blank" style="text-decoration: none; color: #3366cc;">Accelerating asynchronous programs through event sneak peek</a> - <b>1.51851394</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Hiding%20intermittent%20information%20leakage%20with%20architectural%20support%20for%20blinking" target="_blank" style="text-decoration: none; color: #3366cc;">Hiding intermittent information leakage with architectural support for blinking</a> - <b>1.51851394</b> citations</li><li><a href="https://scholar.google.com/scholar?q=A%20bus%20authentication%20and%20anti-probing%20architecture%20extending%20hardware%20trusted%20computing%20base%20off%20CPU%20chips%20and%20beyond" target="_blank" style="text-decoration: none; color: #3366cc;">A bus authentication and anti-probing architecture extending hardware trusted computing base off CPU chips and beyond</a> - <b>1.51851394</b> citations</li><li><a href="https://scholar.google.com/scholar?q=ZnG%3A%20architecting%20GPU%20multi-processors%20with%20new%20flash%20for%20scalable%20data%20analysis" target="_blank" style="text-decoration: none; color: #3366cc;">ZnG: architecting GPU multi-processors with new flash for scalable data analysis</a> - <b>1.51851394</b> citations</li><li><a href="https://scholar.google.com/scholar?q=REDUCT%3A%20keep%20it%20close%2C%20keep%20it%20cool%21%3A%20efficient%20scaling%20of%20DNN%20inference%20on%20multi-core%20CPUs%20with%20near-cache%20compute" target="_blank" style="text-decoration: none; color: #3366cc;">REDUCT: keep it close, keep it cool!: efficient scaling of DNN inference on multi-core CPUs with near-cache compute</a> - <b>1.51851394</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Satori%3A%20efficient%20and%20fair%20resource%20partitioning%20by%20sacrificing%20short-term%20benefits%20for%20long-term%20gains" target="_blank" style="text-decoration: none; color: #3366cc;">Satori: efficient and fair resource partitioning by sacrificing short-term benefits for long-term gains</a> - <b>1.51851394</b> citations</li><li><a href="https://scholar.google.com/scholar?q=HiveMind%3A%20a%20hardware-software%20system%20stack%20for%20serverless%20edge%20swarms" target="_blank" style="text-decoration: none; color: #3366cc;">HiveMind: a hardware-software system stack for serverless edge swarms</a> - <b>1.51851394</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Efficient%20synonym%20filtering%20and%20scalable%20delayed%20translation%20for%20hybrid%20virtual%20caching" target="_blank" style="text-decoration: none; color: #3366cc;">Efficient synonym filtering and scalable delayed translation for hybrid virtual caching</a> - <b>1.505149978</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Efficient%20synonym%20filtering%20and%20scalable%20delayed%20translation%20for%20hybrid%20virtual%20caching" target="_blank" style="text-decoration: none; color: #3366cc;">Efficient synonym filtering and scalable delayed translation for hybrid virtual caching</a> - <b>1.505149978</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Decoupled%20Affine%20Computation%20for%20SIMT%20GPUs" target="_blank" style="text-decoration: none; color: #3366cc;">Decoupled Affine Computation for SIMT GPUs</a> - <b>1.505149978</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Exploring%20predictive%20replacement%20policies%20for%20instruction%20cache%20and%20branch%20target%20buffer" target="_blank" style="text-decoration: none; color: #3366cc;">Exploring predictive replacement policies for instruction cache and branch target buffer</a> - <b>1.505149978</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Fine-grained%20warm%20water%20cooling%20for%20improving%20datacenter%20economy" target="_blank" style="text-decoration: none; color: #3366cc;">Fine-grained warm water cooling for improving datacenter economy</a> - <b>1.505149978</b> citations</li><li><a href="https://scholar.google.com/scholar?q=NvMR%3A%20non-volatile%20memory%20renaming%20for%20intermittent%20computing" target="_blank" style="text-decoration: none; color: #3366cc;">NvMR: non-volatile memory renaming for intermittent computing</a> - <b>1.505149978</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Computer%20performance%20microscopy%20with%20Shim" target="_blank" style="text-decoration: none; color: #3366cc;">Computer performance microscopy with Shim</a> - <b>1.491361694</b> citations</li><li><a href="https://scholar.google.com/scholar?q=SLIP%3A%20reducing%20wire%20energy%20in%20the%20memory%20hierarchy" target="_blank" style="text-decoration: none; color: #3366cc;">SLIP: reducing wire energy in the memory hierarchy</a> - <b>1.491361694</b> citations</li><li><a href="https://scholar.google.com/scholar?q=MBus%3A%20an%20ultra-low%20power%20interconnect%20bus%20for%20next%20generation%20nanopower%20systems" target="_blank" style="text-decoration: none; color: #3366cc;">MBus: an ultra-low power interconnect bus for next generation nanopower systems</a> - <b>1.491361694</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Architectural%20Support%20for%20Server-Side%20PHP%20Processing" target="_blank" style="text-decoration: none; color: #3366cc;">Architectural Support for Server-Side PHP Processing</a> - <b>1.491361694</b> citations</li><li><a href="https://scholar.google.com/scholar?q=MorLog%3A%20morphable%20hardware%20logging%20for%20atomic%20persistence%20in%20non-volatile%20main%20memory" target="_blank" style="text-decoration: none; color: #3366cc;">MorLog: morphable hardware logging for atomic persistence in non-volatile main memory</a> - <b>1.491361694</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Hetero-ViTAL%3A%20a%20virtualization%20stack%20for%20heterogeneous%20FPGA%20clusters" target="_blank" style="text-decoration: none; color: #3366cc;">Hetero-ViTAL: a virtualization stack for heterogeneous FPGA clusters</a> - <b>1.491361694</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Failure%20sentinels%3A%20ubiquitous%20just-in-time%20intermittent%20computation%20via%20low-cost%20hardware%20support%20for%20voltage%20monitoring" target="_blank" style="text-decoration: none; color: #3366cc;">Failure sentinels: ubiquitous just-in-time intermittent computation via low-cost hardware support for voltage monitoring</a> - <b>1.491361694</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Cambricon-Q%3A%20a%20hybrid%20architecture%20for%20efficient%20training" target="_blank" style="text-decoration: none; color: #3366cc;">Cambricon-Q: a hybrid architecture for efficient training</a> - <b>1.491361694</b> citations</li><li><a href="https://scholar.google.com/scholar?q=ZeR%C3%98%3A%20zero-overhead%20resilient%20operation%20under%20pointer%20integrity%20attacks" target="_blank" style="text-decoration: none; color: #3366cc;">ZeRØ: zero-overhead resilient operation under pointer integrity attacks</a> - <b>1.491361694</b> citations</li><li><a href="https://scholar.google.com/scholar?q=t%C3%A4k%C5%8D%3A%20a%20polymorphic%20cache%20hierarchy%20for%20general-purpose%20optimization%20of%20data%20movement" target="_blank" style="text-decoration: none; color: #3366cc;">täkō: a polymorphic cache hierarchy for general-purpose optimization of data movement</a> - <b>1.491361694</b> citations</li><li><a href="https://scholar.google.com/scholar?q=PPMLAC%3A%20high%20performance%20chipset%20architecture%20for%20secure%20multi-party%20computation" target="_blank" style="text-decoration: none; color: #3366cc;">PPMLAC: high performance chipset architecture for secure multi-party computation</a> - <b>1.491361694</b> citations</li><li><a href="https://scholar.google.com/scholar?q=A%20synthesis%20framework%20for%20stitching%20surface%20code%20with%20superconducting%20quantum%20devices" target="_blank" style="text-decoration: none; color: #3366cc;">A synthesis framework for stitching surface code with superconducting quantum devices</a> - <b>1.491361694</b> citations</li><li><a href="https://scholar.google.com/scholar?q=XQsim%3A%20modeling%20cross-technology%20control%20processors%20for%2010%2BK%20qubit%20quantum%20computers" target="_blank" style="text-decoration: none; color: #3366cc;">XQsim: modeling cross-technology control processors for 10+K qubit quantum computers</a> - <b>1.491361694</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Protogen%3A%20automatically%20generating%20directory%20cache%20coherence%20protocols%20from%20atomic%20specifications" target="_blank" style="text-decoration: none; color: #3366cc;">Protogen: automatically generating directory cache coherence protocols from atomic specifications</a> - <b>1.477121255</b> citations</li><li><a href="https://scholar.google.com/scholar?q=SCU%3A%20a%20GPU%20stream%20compaction%20unit%20for%20graph%20processing" target="_blank" style="text-decoration: none; color: #3366cc;">SCU: a GPU stream compaction unit for graph processing</a> - <b>1.477121255</b> citations</li><li><a href="https://scholar.google.com/scholar?q=HALO%3A%20accelerating%20flow%20classification%20for%20scalable%20packet%20processing%20in%20NFV" target="_blank" style="text-decoration: none; color: #3366cc;">HALO: accelerating flow classification for scalable packet processing in NFV</a> - <b>1.477121255</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Taming%20the%20zoo%3A%20the%20unified%20GraphIt%20compiler%20framework%20for%20novel%20architectures" target="_blank" style="text-decoration: none; color: #3366cc;">Taming the zoo: the unified GraphIt compiler framework for novel architectures</a> - <b>1.477121255</b> citations</li><li><a href="https://scholar.google.com/scholar?q=MiSAR%3A%20minimalistic%20synchronization%20accelerator%20with%20resource%20overflow%20management" target="_blank" style="text-decoration: none; color: #3366cc;">MiSAR: minimalistic synchronization accelerator with resource overflow management</a> - <b>1.462397998</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Rescuing%20uncorrectable%20fault%20patterns%20in%20on-chip%20memories%20through%20error%20pattern%20transformation" target="_blank" style="text-decoration: none; color: #3366cc;">Rescuing uncorrectable fault patterns in on-chip memories through error pattern transformation</a> - <b>1.462397998</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Post-silicon%20CPU%20adaptation%20made%20practical%20using%20machine%20learning" target="_blank" style="text-decoration: none; color: #3366cc;">Post-silicon CPU adaptation made practical using machine learning</a> - <b>1.462397998</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Compact%20leakage-free%20support%20for%20integrity%20and%20reliability" target="_blank" style="text-decoration: none; color: #3366cc;">Compact leakage-free support for integrity and reliability</a> - <b>1.462397998</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Vector%20runahead" target="_blank" style="text-decoration: none; color: #3366cc;">Vector runahead</a> - <b>1.462397998</b> citations</li><li><a href="https://scholar.google.com/scholar?q=IChannels%3A%20exploiting%20current%20management%20mechanisms%20to%20create%20covert%20channels%20in%20modern%20processors" target="_blank" style="text-decoration: none; color: #3366cc;">IChannels: exploiting current management mechanisms to create covert channels in modern processors</a> - <b>1.462397998</b> citations</li><li><a href="https://scholar.google.com/scholar?q=RACOD%3A%20algorithm/hardware%20co-design%20for%20mobile%20robot%20path%20planning" target="_blank" style="text-decoration: none; color: #3366cc;">RACOD: algorithm/hardware co-design for mobile robot path planning</a> - <b>1.462397998</b> citations</li><li><a href="https://scholar.google.com/scholar?q=There%20and%20Back%20Again%3A%20Optimizing%20the%20Interconnect%20in%20Networks%20of%20Memory%20Cubes" target="_blank" style="text-decoration: none; color: #3366cc;">There and Back Again: Optimizing the Interconnect in Networks of Memory Cubes</a> - <b>1.447158031</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Footprint%3A%20Regulating%20Routing%20Adaptiveness%20in%20Networks-on-Chip" target="_blank" style="text-decoration: none; color: #3366cc;">Footprint: Regulating Routing Adaptiveness in Networks-on-Chip</a> - <b>1.447158031</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Aurochs%3A%20an%20architecture%20for%20dataflow%20threads" target="_blank" style="text-decoration: none; color: #3366cc;">Aurochs: an architecture for dataflow threads</a> - <b>1.447158031</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Superconducting%20computing%20with%20alternating%20logic%20elements" target="_blank" style="text-decoration: none; color: #3366cc;">Superconducting computing with alternating logic elements</a> - <b>1.447158031</b> citations</li><li><a href="https://scholar.google.com/scholar?q=TDGraph%3A%20a%20topology-driven%20accelerator%20for%20high-performance%20streaming%20graph%20processing" target="_blank" style="text-decoration: none; color: #3366cc;">TDGraph: a topology-driven accelerator for high-performance streaming graph processing</a> - <b>1.447158031</b> citations</li><li><a href="https://scholar.google.com/scholar?q=uBrain%3A%20a%20unary%20brain%20computer%20interface" target="_blank" style="text-decoration: none; color: #3366cc;">uBrain: a unary brain computer interface</a> - <b>1.447158031</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Cascading%20structured%20pruning%3A%20enabling%20high%20data%20reuse%20for%20sparse%20DNN%20accelerators" target="_blank" style="text-decoration: none; color: #3366cc;">Cascading structured pruning: enabling high data reuse for sparse DNN accelerators</a> - <b>1.447158031</b> citations</li><li><a href="https://scholar.google.com/scholar?q=A%20Programmable%20Galois%20Field%20Processor%20for%20the%20Internet%20of%20Things" target="_blank" style="text-decoration: none; color: #3366cc;">A Programmable Galois Field Processor for the Internet of Things</a> - <b>1.431363764</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Yukta%3A%20multilayer%20resource%20controllers%20to%20maximize%20efficiency" target="_blank" style="text-decoration: none; color: #3366cc;">Yukta: multilayer resource controllers to maximize efficiency</a> - <b>1.431363764</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Nonblocking%20memory%20refresh" target="_blank" style="text-decoration: none; color: #3366cc;">Nonblocking memory refresh</a> - <b>1.431363764</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Mobilizing%20the%20micro-ops%3A%20exploiting%20context%20sensitive%20decoding%20for%20security%20and%20energy%20efficiency" target="_blank" style="text-decoration: none; color: #3366cc;">Mobilizing the micro-ops: exploiting context sensitive decoding for security and energy efficiency</a> - <b>1.431363764</b> citations</li><li><a href="https://scholar.google.com/scholar?q=OO-%20VR%3A%20NUMA%20friendly%20object-oriented%20VR%20rendering%20framework%20for%20future%20NUMA-based%20multi-GPU%20systems" target="_blank" style="text-decoration: none; color: #3366cc;">OO- VR: NUMA friendly object-oriented VR rendering framework for future NUMA-based multi-GPU systems</a> - <b>1.431363764</b> citations</li><li><a href="https://scholar.google.com/scholar?q=InvisiPage%3A%20oblivious%20demand%20paging%20for%20secure%20enclaves" target="_blank" style="text-decoration: none; color: #3366cc;">InvisiPage: oblivious demand paging for secure enclaves</a> - <b>1.431363764</b> citations</li><li><a href="https://scholar.google.com/scholar?q=GraphABCD%3A%20scaling%20out%20graph%20analytics%20with%20asynchronous%20block%20coordinate%20descent" target="_blank" style="text-decoration: none; color: #3366cc;">GraphABCD: scaling out graph analytics with asynchronous block coordinate descent</a> - <b>1.431363764</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Packet%20chasing%3A%20spying%20on%20network%20packets%20over%20a%20cache%20side-channel" target="_blank" style="text-decoration: none; color: #3366cc;">Packet chasing: spying on network packets over a cache side-channel</a> - <b>1.431363764</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Thermometer%3A%20profile-guided%20btb%20replacement%20for%20data%20center%20applications" target="_blank" style="text-decoration: none; color: #3366cc;">Thermometer: profile-guided btb replacement for data center applications</a> - <b>1.431363764</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Emerald%3A%20graphics%20modeling%20for%20SoC%20systems" target="_blank" style="text-decoration: none; color: #3366cc;">Emerald: graphics modeling for SoC systems</a> - <b>1.414973348</b> citations</li><li><a href="https://scholar.google.com/scholar?q=SOFF%3A%20an%20OpenCL%20high-level%20synthesis%20framework%20for%20FPGAs" target="_blank" style="text-decoration: none; color: #3366cc;">SOFF: an OpenCL high-level synthesis framework for FPGAs</a> - <b>1.414973348</b> citations</li><li><a href="https://scholar.google.com/scholar?q=GCoM%3A%20a%20detailed%20GPU%20core%20model%20for%20accurate%20analytical%20modeling%20of%20modern%20GPUs" target="_blank" style="text-decoration: none; color: #3366cc;">GCoM: a detailed GPU core model for accurate analytical modeling of modern GPUs</a> - <b>1.414973348</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Accelerating%20markov%20random%20field%20inference%20using%20molecular%20optical%20gibbs%20sampling%20units" target="_blank" style="text-decoration: none; color: #3366cc;">Accelerating markov random field inference using molecular optical gibbs sampling units</a> - <b>1.397940009</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Scaling%20datacenter%20accelerators%20with%20compute-reuse%20architectures" target="_blank" style="text-decoration: none; color: #3366cc;">Scaling datacenter accelerators with compute-reuse architectures</a> - <b>1.397940009</b> citations</li><li><a href="https://scholar.google.com/scholar?q=BabelFish%3A%20fusing%20address%20translations%20for%20containers" target="_blank" style="text-decoration: none; color: #3366cc;">BabelFish: fusing address translations for containers</a> - <b>1.397940009</b> citations</li><li><a href="https://scholar.google.com/scholar?q=%CE%B7-LSTM%3A%20co-designing%20highly-efficient%20large%20LSTM%20training%20via%20exploiting%20memory-saving%20and%20architectural%20design%20opportunities" target="_blank" style="text-decoration: none; color: #3366cc;">η-LSTM: co-designing highly-efficient large LSTM training via exploiting memory-saving and architectural design opportunities</a> - <b>1.397940009</b> citations</li><li><a href="https://scholar.google.com/scholar?q=A%20Programmable%20Hardware%20Accelerator%20for%20Simulating%20Dynamical%20Systems" target="_blank" style="text-decoration: none; color: #3366cc;">A Programmable Hardware Accelerator for Simulating Dynamical Systems</a> - <b>1.380211242</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Securing%20GPU%20via%20region-based%20bounds%20checking" target="_blank" style="text-decoration: none; color: #3366cc;">Securing GPU via region-based bounds checking</a> - <b>1.380211242</b> citations</li><li><a href="https://scholar.google.com/scholar?q=FlexiCores%3A%20low%20footprint%2C%20high%20yield%2C%20field%20reprogrammable%20flexible%20microprocessors" target="_blank" style="text-decoration: none; color: #3366cc;">FlexiCores: low footprint, high yield, field reprogrammable flexible microprocessors</a> - <b>1.380211242</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Efficiently%20scaling%20out-of-order%20cores%20for%20simultaneous%20multithreading" target="_blank" style="text-decoration: none; color: #3366cc;">Efficiently scaling out-of-order cores for simultaneous multithreading</a> - <b>1.361727836</b> citations</li><li><a href="https://scholar.google.com/scholar?q=XPro%3A%20A%20Cross-End%20Processing%20Architecture%20for%20Data%20Analytics%20in%20Wearables" target="_blank" style="text-decoration: none; color: #3366cc;">XPro: A Cross-End Processing Architecture for Data Analytics in Wearables</a> - <b>1.361727836</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Accelerating%20GPU%20Hardware%20Transactional%20Memory%20with%20Snapshot%20Isolation" target="_blank" style="text-decoration: none; color: #3366cc;">Accelerating GPU Hardware Transactional Memory with Snapshot Isolation</a> - <b>1.361727836</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Focused%20value%20prediction" target="_blank" style="text-decoration: none; color: #3366cc;">Focused value prediction</a> - <b>1.361727836</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Efficiently%20supporting%20dynamic%20task%20parallelism%20on%20heterogeneous%20cache-coherent%20systems" target="_blank" style="text-decoration: none; color: #3366cc;">Efficiently supporting dynamic task parallelism on heterogeneous cache-coherent systems</a> - <b>1.361727836</b> citations</li><li><a href="https://scholar.google.com/scholar?q=TimeCache%3A%20using%20time%20to%20eliminate%20cache%20side%20channels%20when%20sharing%20software" target="_blank" style="text-decoration: none; color: #3366cc;">TimeCache: using time to eliminate cache side channels when sharing software</a> - <b>1.361727836</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Accelerating%20database%20analytic%20query%20workloads%20using%20an%20associative%20processor" target="_blank" style="text-decoration: none; color: #3366cc;">Accelerating database analytic query workloads using an associative processor</a> - <b>1.361727836</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Master%20of%20none%20acceleration%3A%20a%20comparison%20of%20accelerator%20architectures%20for%20analytical%20query%20processing" target="_blank" style="text-decoration: none; color: #3366cc;">Master of none acceleration: a comparison of accelerator architectures for analytical query processing</a> - <b>1.342422681</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Tvarak%3A%20software-managed%20hardware%20offload%20for%20redundancy%20in%20direct-access%20NVM%20storage" target="_blank" style="text-decoration: none; color: #3366cc;">Tvarak: software-managed hardware offload for redundancy in direct-access NVM storage</a> - <b>1.322219295</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Efficient%20multi-GPU%20shared%20memory%20via%20automatic%20optimization%20of%20fine-grained%20transfers" target="_blank" style="text-decoration: none; color: #3366cc;">Efficient multi-GPU shared memory via automatic optimization of fine-grained transfers</a> - <b>1.322219295</b> citations</li><li><a href="https://scholar.google.com/scholar?q=BlockMaestro%3A%20enabling%20programmer-transparent%20task-based%20execution%20in%20GPU%20systems" target="_blank" style="text-decoration: none; color: #3366cc;">BlockMaestro: enabling programmer-transparent task-based execution in GPU systems</a> - <b>1.322219295</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Gearbox%3A%20a%20case%20for%20supporting%20accumulation%20dispatching%20and%20hybrid%20partitioning%20in%20PIM-based%20accelerators" target="_blank" style="text-decoration: none; color: #3366cc;">Gearbox: a case for supporting accumulation dispatching and hybrid partitioning in PIM-based accelerators</a> - <b>1.322219295</b> citations</li><li><a href="https://scholar.google.com/scholar?q=RelaxFault%20memory%20repair" target="_blank" style="text-decoration: none; color: #3366cc;">RelaxFault memory repair</a> - <b>1.301029996</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Architecting%20a%20stochastic%20computing%20unit%20with%20molecular%20optical%20devices" target="_blank" style="text-decoration: none; color: #3366cc;">Architecting a stochastic computing unit with molecular optical devices</a> - <b>1.301029996</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Linebacker%3A%20preserving%20victim%20cache%20lines%20in%20idle%20register%20files%20of%20GPUs" target="_blank" style="text-decoration: none; color: #3366cc;">Linebacker: preserving victim cache lines in idle register files of GPUs</a> - <b>1.301029996</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Free%20atomics%3A%20hardware%20atomic%20operations%20without%20fences" target="_blank" style="text-decoration: none; color: #3366cc;">Free atomics: hardware atomic operations without fences</a> - <b>1.301029996</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Clean%3A%20a%20race%20detector%20with%20cleaner%20semantics" target="_blank" style="text-decoration: none; color: #3366cc;">Clean: a race detector with cleaner semantics</a> - <b>1.278753601</b> citations</li><li><a href="https://scholar.google.com/scholar?q=CHARSTAR%3A%20Clock%20Hierarchy%20Aware%20Resource%20Scaling%20in%20Tiled%20ARchitectures" target="_blank" style="text-decoration: none; color: #3366cc;">CHARSTAR: Clock Hierarchy Aware Resource Scaling in Tiled ARchitectures</a> - <b>1.278753601</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Non-speculative%20store%20coalescing%20in%20total%20store%20order" target="_blank" style="text-decoration: none; color: #3366cc;">Non-speculative store coalescing in total store order</a> - <b>1.278753601</b> citations</li><li><a href="https://scholar.google.com/scholar?q=PES%3A%20proactive%20event%20scheduling%20for%20responsive%20and%20energy-efficient%20mobile%20web%20computing" target="_blank" style="text-decoration: none; color: #3366cc;">PES: proactive event scheduling for responsive and energy-efficient mobile web computing</a> - <b>1.278753601</b> citations</li><li><a href="https://scholar.google.com/scholar?q=AxMemo%3A%20hardware-compiler%20co-design%20for%20approximate%20code%20memoization" target="_blank" style="text-decoration: none; color: #3366cc;">AxMemo: hardware-compiler co-design for approximate code memoization</a> - <b>1.278753601</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Using%20SMT%20to%20accelerate%20nested%20virtualization" target="_blank" style="text-decoration: none; color: #3366cc;">Using SMT to accelerate nested virtualization</a> - <b>1.278753601</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Energy%20efficiency%20boost%20in%20the%20AI-infused%20POWER10%20processor" target="_blank" style="text-decoration: none; color: #3366cc;">Energy efficiency boost in the AI-infused POWER10 processor</a> - <b>1.278753601</b> citations</li><li><a href="https://scholar.google.com/scholar?q=SIMD2%3A%20a%20generalized%20matrix%20instruction%20set%20for%20accelerating%20tensor%20computation%20beyond%20GEMM" target="_blank" style="text-decoration: none; color: #3366cc;">SIMD2: a generalized matrix instruction set for accelerating tensor computation beyond GEMM</a> - <b>1.278753601</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Cost-effective%20speculative%20scheduling%20in%20high%20performance%20processors" target="_blank" style="text-decoration: none; color: #3366cc;">Cost-effective speculative scheduling in high performance processors</a> - <b>1.255272505</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Heat%20to%20power%3A%20thermal%20energy%20harvesting%20and%20recycling%20for%20warm%20water-cooled%20datacenters" target="_blank" style="text-decoration: none; color: #3366cc;">Heat to power: thermal energy harvesting and recycling for warm water-cooled datacenters</a> - <b>1.255272505</b> citations</li><li><a href="https://scholar.google.com/scholar?q=RingCNN%3A%20exploiting%20algebraically-sparse%20ring%20tensors%20for%20energy-efficient%20CNN-based%20computational%20imaging" target="_blank" style="text-decoration: none; color: #3366cc;">RingCNN: exploiting algebraically-sparse ring tensors for energy-efficient CNN-based computational imaging</a> - <b>1.255272505</b> citations</li><li><a href="https://scholar.google.com/scholar?q=X-cache%3A%20a%20modular%20architecture%20for%20domain-specific%20caches" target="_blank" style="text-decoration: none; color: #3366cc;">X-cache: a modular architecture for domain-specific caches</a> - <b>1.255272505</b> citations</li><li><a href="https://scholar.google.com/scholar?q=EyeCoD%3A%20eye%20tracking%20system%20acceleration%20via%20flatcam-based%20algorithm%20%26%20accelerator%20co-design" target="_blank" style="text-decoration: none; color: #3366cc;">EyeCoD: eye tracking system acceleration via flatcam-based algorithm & accelerator co-design</a> - <b>1.255272505</b> citations</li><li><a href="https://scholar.google.com/scholar?q=DCS-ctrl%3A%20a%20fast%20and%20flexible%20device-control%20mechanism%20for%20device-centric%20server%20architecture" target="_blank" style="text-decoration: none; color: #3366cc;">DCS-ctrl: a fast and flexible device-control mechanism for device-centric server architecture</a> - <b>1.230448921</b> citations</li><li><a href="https://scholar.google.com/scholar?q=PMNet%3A%20in-network%20data%20persistence" target="_blank" style="text-decoration: none; color: #3366cc;">PMNet: in-network data persistence</a> - <b>1.230448921</b> citations</li><li><a href="https://scholar.google.com/scholar?q=A%20scalable%20architecture%20for%20reprioritizing%20ordered%20parallelism" target="_blank" style="text-decoration: none; color: #3366cc;">A scalable architecture for reprioritizing ordered parallelism</a> - <b>1.230448921</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Dynamic%20global%20adaptive%20routing%20in%20high-radix%20networks" target="_blank" style="text-decoration: none; color: #3366cc;">Dynamic global adaptive routing in high-radix networks</a> - <b>1.230448921</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Opportunistic%20competition%20overhead%20reduction%20for%20expediting%20critical%20section%20in%20NoC%20based%20CMPs" target="_blank" style="text-decoration: none; color: #3366cc;">Opportunistic competition overhead reduction for expediting critical section in NoC based CMPs</a> - <b>1.204119983</b> citations</li><li><a href="https://scholar.google.com/scholar?q=MTraceCheck%3A%20Validating%20Non-Deterministic%20Behavior%20of%20Memory%20Consistency%20Models%20in%20Post-Silicon%20Validation" target="_blank" style="text-decoration: none; color: #3366cc;">MTraceCheck: Validating Non-Deterministic Behavior of Memory Consistency Models in Post-Silicon Validation</a> - <b>1.204119983</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Redundant%20Memory%20Array%20Architecture%20for%20Efficient%20Selective%20Protection" target="_blank" style="text-decoration: none; color: #3366cc;">Redundant Memory Array Architecture for Efficient Selective Protection</a> - <b>1.204119983</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Slipstream%20processors%20revisited%3A%20exploiting%20branch%20sets" target="_blank" style="text-decoration: none; color: #3366cc;">Slipstream processors revisited: exploiting branch sets</a> - <b>1.204119983</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Dv%C3%A9%3A%20improving%20DRAM%20reliability%20and%20performance%20on-demand%20via%20coherent%20replication" target="_blank" style="text-decoration: none; color: #3366cc;">Dvé: improving DRAM reliability and performance on-demand via coherent replication</a> - <b>1.204119983</b> citations</li><li><a href="https://scholar.google.com/scholar?q=CaSMap%3A%20agile%20mapper%20for%20reconfigurable%20spatial%20architectures%20by%20automatically%20clustering%20intermediate%20representations%20and%20scattering%20mapping%20process" target="_blank" style="text-decoration: none; color: #3366cc;">CaSMap: agile mapper for reconfigurable spatial architectures by automatically clustering intermediate representations and scattering mapping process</a> - <b>1.204119983</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Branch%20vanguard%3A%20decomposing%20branch%20functionality%20into%20prediction%20and%20resolution%20instructions" target="_blank" style="text-decoration: none; color: #3366cc;">Branch vanguard: decomposing branch functionality into prediction and resolution instructions</a> - <b>1.176091259</b> citations</li><li><a href="https://scholar.google.com/scholar?q=ShortCut%3A%20Architectural%20Support%20for%20Fast%20Object%20Access%20in%20Scripting%20Languages" target="_blank" style="text-decoration: none; color: #3366cc;">ShortCut: Architectural Support for Fast Object Access in Scripting Languages</a> - <b>1.176091259</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Filter%20caching%20for%20free%3A%20the%20untapped%20potential%20of%20the%20store-buffer" target="_blank" style="text-decoration: none; color: #3366cc;">Filter caching for free: the untapped potential of the store-buffer</a> - <b>1.176091259</b> citations</li><li><a href="https://scholar.google.com/scholar?q=High-performance%20deep-learning%20coprocessor%20integrated%20into%20x86%20SoC%20with%20server-class%20CPUs" target="_blank" style="text-decoration: none; color: #3366cc;">High-performance deep-learning coprocessor integrated into x86 SoC with server-class CPUs</a> - <b>1.176091259</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Mocktails%3A%20capturing%20the%20memory%20behaviour%20of%20proprietary%20mobile%20architectures" target="_blank" style="text-decoration: none; color: #3366cc;">Mocktails: capturing the memory behaviour of proprietary mobile architectures</a> - <b>1.176091259</b> citations</li><li><a href="https://scholar.google.com/scholar?q=HieraGen%3A%20automated%20generation%20of%20concurrent%2C%20hierarchical%20cache%20coherence%20protocols" target="_blank" style="text-decoration: none; color: #3366cc;">HieraGen: automated generation of concurrent, hierarchical cache coherence protocols</a> - <b>1.176091259</b> citations</li><li><a href="https://scholar.google.com/scholar?q=ScoRD%3A%20a%20scoped%20race%20detector%20for%20GPUs" target="_blank" style="text-decoration: none; color: #3366cc;">ScoRD: a scoped race detector for GPUs</a> - <b>1.176091259</b> citations</li><li><a href="https://scholar.google.com/scholar?q=NVOverlay%3A%20enabling%20efficient%20and%20scalable%20high-frequency%20snapshotting%20to%20NVM" target="_blank" style="text-decoration: none; color: #3366cc;">NVOverlay: enabling efficient and scalable high-frequency snapshotting to NVM</a> - <b>1.176091259</b> citations</li><li><a href="https://scholar.google.com/scholar?q=PS-ORAM%3A%20efficient%20crash%20consistency%20support%20for%20oblivious%20RAM%20on%20NVM" target="_blank" style="text-decoration: none; color: #3366cc;">PS-ORAM: efficient crash consistency support for oblivious RAM on NVM</a> - <b>1.176091259</b> citations</li><li><a href="https://scholar.google.com/scholar?q=ASAP%3A%20architecture%20support%20for%20asynchronous%20persistence" target="_blank" style="text-decoration: none; color: #3366cc;">ASAP: architecture support for asynchronous persistence</a> - <b>1.176091259</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Register%20file%20prefetching" target="_blank" style="text-decoration: none; color: #3366cc;">Register file prefetching</a> - <b>1.176091259</b> citations</li><li><a href="https://scholar.google.com/scholar?q=HetCore%3A%20TFET-CMOS%20hetero-device%20architecture%20for%20CPUs%20and%20GPUs" target="_blank" style="text-decoration: none; color: #3366cc;">HetCore: TFET-CMOS hetero-device architecture for CPUs and GPUs</a> - <b>1.146128036</b> citations</li><li><a href="https://scholar.google.com/scholar?q=PF-DRAM%3A%20a%20precharge-free%20DRAM%20structure" target="_blank" style="text-decoration: none; color: #3366cc;">PF-DRAM: a precharge-free DRAM structure</a> - <b>1.146128036</b> citations</li><li><a href="https://scholar.google.com/scholar?q=FastTrack%3A%20leveraging%20heterogeneous%20FPGA%20wires%20to%20design%20low-cost%20high-performance%20soft%20NoCs" target="_blank" style="text-decoration: none; color: #3366cc;">FastTrack: leveraging heterogeneous FPGA wires to design low-cost high-performance soft NoCs</a> - <b>1.113943352</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Time%20squeezing%20for%20tiny%20devices" target="_blank" style="text-decoration: none; color: #3366cc;">Time squeezing for tiny devices</a> - <b>1.113943352</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Flick%3A%20fast%20and%20lightweight%20ISA-crossing%20call%20for%20heterogeneous-ISA%20environments" target="_blank" style="text-decoration: none; color: #3366cc;">Flick: fast and lightweight ISA-crossing call for heterogeneous-ISA environments</a> - <b>1.113943352</b> citations</li><li><a href="https://scholar.google.com/scholar?q=TransForm%3A%20formally%20specifying%20transistency%20models%20and%20synthesizing%20enhanced%20litmus%20tests" target="_blank" style="text-decoration: none; color: #3366cc;">TransForm: formally specifying transistency models and synthesizing enhanced litmus tests</a> - <b>1.113943352</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Managing%20reliability%20skew%20in%20DNA%20storage" target="_blank" style="text-decoration: none; color: #3366cc;">Managing reliability skew in DNA storage</a> - <b>1.113943352</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Virtual%20melting%20temperature%3A%20managing%20server%20load%20to%20minimize%20cooling%20overhead%20with%20phase%20change%20materials" target="_blank" style="text-decoration: none; color: #3366cc;">Virtual melting temperature: managing server load to minimize cooling overhead with phase change materials</a> - <b>1.079181246</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Hardware%20supported%20permission%20checks%20on%20persistent%20objects%20for%20performance%20and%20programmability" target="_blank" style="text-decoration: none; color: #3366cc;">Hardware supported permission checks on persistent objects for performance and programmability</a> - <b>1.079181246</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Retracted%20on%20May%2010%2C%202023%3A%20TPShare%3A%20a%20time-space%20sharing%20scheduling%20abstraction%20for%20shared%20cloud%20via%20vertical%20labels" target="_blank" style="text-decoration: none; color: #3366cc;">Retracted on May 10, 2023: TPShare: a time-space sharing scheduling abstraction for shared cloud via vertical labels</a> - <b>1.079181246</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Quantifying%20server%20memory%20frequency%20margin%20and%20using%20it%20to%20improve%20performance%20in%20HPC%20systems" target="_blank" style="text-decoration: none; color: #3366cc;">Quantifying server memory frequency margin and using it to improve performance in HPC systems</a> - <b>1.079181246</b> citations</li><li><a href="https://scholar.google.com/scholar?q=NASA%3A%20accelerating%20neural%20network%20design%20with%20a%20NAS%20processor" target="_blank" style="text-decoration: none; color: #3366cc;">NASA: accelerating neural network design with a NAS processor</a> - <b>1.079181246</b> citations</li><li><a href="https://scholar.google.com/scholar?q=SoftVN%3A%20efficient%20memory%20protection%20via%20software-provided%20version%20numbers" target="_blank" style="text-decoration: none; color: #3366cc;">SoftVN: efficient memory protection via software-provided version numbers</a> - <b>1.079181246</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Anticipating%20and%20eliminating%20redundant%20computations%20in%20accelerated%20sparse%20training" target="_blank" style="text-decoration: none; color: #3366cc;">Anticipating and eliminating redundant computations in accelerated sparse training</a> - <b>1.079181246</b> citations</li><li><a href="https://scholar.google.com/scholar?q=PowerChop%3A%20identifying%20and%20managing%20non-critical%20units%20in%20hybrid%20processor%20architectures" target="_blank" style="text-decoration: none; color: #3366cc;">PowerChop: identifying and managing non-critical units in hybrid processor architectures</a> - <b>1.041392685</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Future%20vector%20microprocessor%20extensions%20for%20data%20aggregations" target="_blank" style="text-decoration: none; color: #3366cc;">Future vector microprocessor extensions for data aggregations</a> - <b>1.041392685</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Retracted%20on%20January%2026%2C%202021%3A%203D-based%20video%20recognition%20acceleration%20by%20leveraging%20temporal%20locality" target="_blank" style="text-decoration: none; color: #3366cc;">Retracted on January 26, 2021: 3D-based video recognition acceleration by leveraging temporal locality</a> - <b>1.041392685</b> citations</li><li><a href="https://scholar.google.com/scholar?q=FaultHound%3A%20value-locality-based%20soft-fault%20tolerance" target="_blank" style="text-decoration: none; color: #3366cc;">FaultHound: value-locality-based soft-fault tolerance</a> - <b>1.0</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Short-circuit%20dispatch%3A%20accelerating%20virtual%20machine%20interpreters%20on%20embedded%20processors" target="_blank" style="text-decoration: none; color: #3366cc;">Short-circuit dispatch: accelerating virtual machine interpreters on embedded processors</a> - <b>1.0</b> citations</li><li><a href="https://scholar.google.com/scholar?q=TCEP%3A%20traffic%20consolidation%20for%20energy-proportional%20high-radix%20networks" target="_blank" style="text-decoration: none; color: #3366cc;">TCEP: traffic consolidation for energy-proportional high-radix networks</a> - <b>1.0</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Data%20compression%20accelerator%20on%20IBM%20POWER9%20and%20z15%20processors" target="_blank" style="text-decoration: none; color: #3366cc;">Data compression accelerator on IBM POWER9 and z15 processors</a> - <b>1.0</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Auto-predication%20of%20critical%20branches" target="_blank" style="text-decoration: none; color: #3366cc;">Auto-predication of critical branches</a> - <b>1.0</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Lelantus%3A%20fine-granularity%20copy-on-write%20operations%20for%20secure%20non-volatile%20memories" target="_blank" style="text-decoration: none; color: #3366cc;">Lelantus: fine-granularity copy-on-write operations for secure non-volatile memories</a> - <b>1.0</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Check-in%3A%20in-storage%20checkpointing%20for%20key-value%20store%20system%20leveraging%20flash-based%20SSDs" target="_blank" style="text-decoration: none; color: #3366cc;">Check-in: in-storage checkpointing for key-value store system leveraging flash-based SSDs</a> - <b>1.0</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Independent%20forward%20progress%20of%20work-groups" target="_blank" style="text-decoration: none; color: #3366cc;">Independent forward progress of work-groups</a> - <b>1.0</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Supporting%20legacy%20libraries%20on%20non-volatile%20memory%3A%20a%20user-transparent%20approach" target="_blank" style="text-decoration: none; color: #3366cc;">Supporting legacy libraries on non-volatile memory: a user-transparent approach</a> - <b>1.0</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Revamping%20storage%20class%20memory%20with%20hardware%20automated%20memory-over-storage%20solution" target="_blank" style="text-decoration: none; color: #3366cc;">Revamping storage class memory with hardware automated memory-over-storage solution</a> - <b>1.0</b> citations</li><li><a href="https://scholar.google.com/scholar?q=NASGuard%3A%20a%20novel%20accelerator%20architecture%20for%20robust%20neural%20architecture%20search%20%28NAS%29%20networks" target="_blank" style="text-decoration: none; color: #3366cc;">NASGuard: a novel accelerator architecture for robust neural architecture search (NAS) networks</a> - <b>1.0</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Charm%3A%20a%20language%20for%20closed-form%20high-level%20architecture%20modeling" target="_blank" style="text-decoration: none; color: #3366cc;">Charm: a language for closed-form high-level architecture modeling</a> - <b>0.954242509</b> citations</li><li><a href="https://scholar.google.com/scholar?q=FFCCD%3A%20fence-free%20crash-consistent%20concurrent%20defragmentation%20for%20persistent%20memory" target="_blank" style="text-decoration: none; color: #3366cc;">FFCCD: fence-free crash-consistent concurrent defragmentation for persistent memory</a> - <b>0.954242509</b> citations</li><li><a href="https://scholar.google.com/scholar?q=HyperTRIO%3A%20hyper-tenant%20translation%20of%20I/O%20addresses" target="_blank" style="text-decoration: none; color: #3366cc;">HyperTRIO: hyper-tenant translation of I/O addresses</a> - <b>0.903089987</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Commutative%20data%20reordering%3A%20a%20new%20technique%20to%20reduce%20data%20movement%20energy%20on%20sparse%20inference%20workloads" target="_blank" style="text-decoration: none; color: #3366cc;">Commutative data reordering: a new technique to reduce data movement energy on sparse inference workloads</a> - <b>0.903089987</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Zero%20inclusion%20victim%3A%20isolating%20core%20caches%20from%20inclusive%20last-level%20cache%20evictions" target="_blank" style="text-decoration: none; color: #3366cc;">Zero inclusion victim: isolating core caches from inclusive last-level cache evictions</a> - <b>0.903089987</b> citations</li><li><a href="https://scholar.google.com/scholar?q=LaZy%20superscalar" target="_blank" style="text-decoration: none; color: #3366cc;">LaZy superscalar</a> - <b>0.84509804</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Lemonade%20from%20Lemons%3A%20Harnessing%20Device%20Wearout%20to%20Create%20Limited-Use%20Security%20Architectures" target="_blank" style="text-decoration: none; color: #3366cc;">Lemonade from Lemons: Harnessing Device Wearout to Create Limited-Use Security Architectures</a> - <b>0.84509804</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Maya%3A%20using%20formal%20control%20to%20obfuscate%20power%20side%20channels" target="_blank" style="text-decoration: none; color: #3366cc;">Maya: using formal control to obfuscate power side channels</a> - <b>0.84509804</b> citations</li><li><a href="https://scholar.google.com/scholar?q=LightPC%3A%20hardware%20and%20software%20co-design%20for%20energy-efficient%20full%20system%20persistence" target="_blank" style="text-decoration: none; color: #3366cc;">LightPC: hardware and software co-design for energy-efficient full system persistence</a> - <b>0.84509804</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Rethinking%20programmable%20earable%20processors" target="_blank" style="text-decoration: none; color: #3366cc;">Rethinking programmable earable processors</a> - <b>0.77815125</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Production-run%20software%20failure%20diagnosis%20via%20Adaptive%20Communication%20Tracking" target="_blank" style="text-decoration: none; color: #3366cc;">Production-run software failure diagnosis via Adaptive Communication Tracking</a> - <b>0.698970004</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Bouncer%3A%20static%20program%20analysis%20in%20hardware" target="_blank" style="text-decoration: none; color: #3366cc;">Bouncer: static program analysis in hardware</a> - <b>0.698970004</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Execution%20dependence%20extension%20%28EDE%29%3A%20isa%20support%20for%20eliminating%20fences" target="_blank" style="text-decoration: none; color: #3366cc;">Execution dependence extension (EDE): isa support for eliminating fences</a> - <b>0.698970004</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Decoupling%20loads%20for%20nano-instruction%20set%20computers" target="_blank" style="text-decoration: none; color: #3366cc;">Decoupling loads for nano-instruction set computers</a> - <b>0.602059991</b> citations</li><li><a href="https://scholar.google.com/scholar?q=BOSS%3A%20bandwidth-optimized%20search%20accelerator%20for%20storage-class%20memory" target="_blank" style="text-decoration: none; color: #3366cc;">BOSS: bandwidth-optimized search accelerator for storage-class memory</a> - <b>0.602059991</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Constructing%20a%20weak%20memory%20model" target="_blank" style="text-decoration: none; color: #3366cc;">Constructing a weak memory model</a> - <b>0.477121255</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Dynamic%20memory%20dependence%20predication" target="_blank" style="text-decoration: none; color: #3366cc;">Dynamic memory dependence predication</a> - <b>0.477121255</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Space-time%20algebra%3A%20a%20model%20for%20neocortical%20computation" target="_blank" style="text-decoration: none; color: #3366cc;">Space-time algebra: a model for neocortical computation</a> - <b>0.477121255</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Speculative%20vectorisation%20with%20selective%20replay" target="_blank" style="text-decoration: none; color: #3366cc;">Speculative vectorisation with selective replay</a> - <b>0.477121255</b> citations</li><li><a href="https://scholar.google.com/scholar?q=Ghost%20routing%20to%20enable%20oblivious%20computation%20on%20memory-centric%20networks" target="_blank" style="text-decoration: none; color: #3366cc;">Ghost routing to enable oblivious computation on memory-centric networks</a> - <b>0.301029996</b> citations</li>
    </ul>
</div>


<script>
    // 1. Toggle List Function
    function toggleList() {
        var x = document.getElementById("paper-list-container");
        var btn = document.getElementById("toggle-btn");
        if (x.style.display === "none") {
            x.style.display = "block";
            btn.innerText = "Hide Paper List";
        } else {
            x.style.display = "none";
            btn.innerText = "Show Paper List";
        }
    }

    // 2. Plotly Bar Click Interaction
    // We use an interval to ensure the plot div is fully loaded before attaching the event
    var checkPlot = setInterval(function(){
        var plotElement = document.getElementsByClassName('plotly-graph-div')[0];
        if (plotElement) {
            clearInterval(checkPlot);
            plotElement.on('plotly_click', function(data){
                if(data.points.length > 0){
                    var point = data.points[0];
                    var url = null;
                    if(point.data.type === 'bar') {
                         url = point.customdata[3];
                    }
                    if(url && url.startsWith('http')){
                        window.open(url, '_blank');
                    }
                }
            });
        }
    }, 500);
</script>

</body>
</html>