-
Notifications
You must be signed in to change notification settings - Fork 0
Open
Description
#include <algorithm>
#include <ctime>
#include <iostream>
int main()
{
// Generate data
const unsigned arraySize = 32768;
int data[arraySize];
for (unsigned c = 0; c < arraySize; ++c)
data[c] = std::rand() % 256;
// !!! With this, the next loop runs faster.
// std::sort(data, data + arraySize);
// Test
clock_t start = clock();
long long sum = 0;
for (unsigned i = 0; i < 100000; ++i)
{
// Primary loop
for (unsigned c = 0; c < arraySize; ++c)
{
if (data[c] >= 128)
sum += data[c];
}
}
double elapsedTime = static_cast<double>(clock() - start) / CLOCKS_PER_SEC;
std::cout << elapsedTime << std::endl;
std::cout << "sum = " << sum << std::endl;
}未排序,g++未开启优化
$ gcc -O0 test.cpp -o test
$ perf stat ./test
25.8587
sum = 314931600000
Performance counter stats for './test':
25,877.65 msec task-clock:u # 1.000 CPUs utilized
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
155 page-faults:u # 0.006 K/sec
70,377,831,483 cycles:u # 2.720 GHz
32,804,378,804 instructions:u # 0.47 insn per cycle
9,831,737,666 branches:u # 379.932 M/sec
1,562,312,900 branch-misses:u # 15.89% of all branches
25.882057914 seconds time elapsed
25.857499000 seconds user
0.003330000 seconds sys耗时 25.86s, branch misses 15.89%
未排序,g++开启优化-O1
$ g++ -O1 test.cpp -o test
$ perf stat ./test
2.65741
sum = 314931600000
Performance counter stats for './test':
2,660.83 msec task-clock:u # 1.000 CPUs utilized
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
152 page-faults:u # 0.057 K/sec
7,278,131,367 cycles:u # 2.735 GHz
26,219,971,449 instructions:u # 3.60 insn per cycle
3,277,996,834 branches:u # 1231.946 M/sec
115,905 branch-misses:u # 0.00% of all branches
2.661388918 seconds time elapsed
2.656132000 seconds user
0.003327000 seconds sys耗时 2.66s, branch misses 0.00%
已排序,g++未开启优化
$ g++ -O0 test.cpp -o test
$ perf stat ./test
8.60613
sum = 314931600000
Performance counter stats for './test':
8,618.99 msec task-clock:u # 1.000 CPUs utilized
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
154 page-faults:u # 0.018 K/sec
23,467,973,433 cycles:u # 2.723 GHz
32,825,330,467 instructions:u # 1.40 insn per cycle
9,835,368,902 branches:u # 1141.128 M/sec
371,363 branch-misses:u # 0.00% of all branches
8.619636593 seconds time elapsed
8.613694000 seconds user
0.000000000 seconds sys耗时 8.61s, branch misses 0.00%
已排序,g++开启优化-O1
$ g++ -O1 test.cpp -o test
$ perf stat ./test
2.69993
sum = 314931600000
Performance counter stats for './test':
2,705.05 msec task-clock:u # 1.000 CPUs utilized
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
154 page-faults:u # 0.057 K/sec
7,380,438,721 cycles:u # 2.728 GHz
26,223,406,535 instructions:u # 3.55 insn per cycle
3,278,811,478 branches:u # 1212.106 M/sec
252,239 branch-misses:u # 0.01% of all branches
2.705603186 seconds time elapsed
2.703475000 seconds user
0.000000000 seconds sys耗时 2.71s, branch misses 0.01%
未排序,分支替换为位运算,g++未开启优化
$ g++ -O0 test.cpp -o test
$ perf stat ./test
9.23033
sum = 314931600000
Performance counter stats for './test':
9,238.17 msec task-clock:u # 1.000 CPUs utilized
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
157 page-faults:u # 0.017 K/sec
25,248,159,956 cycles:u # 2.733 GHz
55,711,572,584 instructions:u # 2.21 insn per cycle
6,554,931,952 branches:u # 709.549 M/sec
116,102 branch-misses:u # 0.00% of all branches
9.238602971 seconds time elapsed
9.232461000 seconds user
0.000000000 seconds sys耗时 9.24s, branch misses 0.00%
已排序,分支替换为位运算,g++未开启优化
$ g++ -O0 test.cpp -o test
$ perf stat ./test
9.29667
sum = 314931600000
Performance counter stats for './test':
9,311.10 msec task-clock:u # 1.000 CPUs utilized
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
155 page-faults:u # 0.017 K/sec
25,276,492,913 cycles:u # 2.715 GHz
55,732,531,138 instructions:u # 2.20 insn per cycle
6,558,569,580 branches:u # 704.382 M/sec
243,447 branch-misses:u # 0.00% of all branches
9.312390941 seconds time elapsed
9.302561000 seconds user
0.003331000 seconds sys耗时 9.30s, branch misses 0.00%
未排序,分支替换为位运算,g++开启优化-O1
$ g++ -O1 test.cpp -o test
$ perf stat ./test
3.43346
sum = 314931600000
Performance counter stats for './test':
3,437.69 msec task-clock:u # 1.000 CPUs utilized
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
153 page-faults:u # 0.045 K/sec
9,365,887,675 cycles:u # 2.724 GHz
32,773,571,878 instructions:u # 3.50 insn per cycle
3,277,997,281 branches:u # 953.545 M/sec
115,902 branch-misses:u # 0.00% of all branches
3.438166027 seconds time elapsed
3.432307000 seconds user
0.003258000 seconds sys耗时 3.44, branch misses 0.00%
已排序,分支替换为位运算,g++开启优化-O1
$ g++ -O1 test.cpp -o test
$ perf stat ./test
3.42516
sum = 314931600000
Performance counter stats for './test':
3,431.24 msec task-clock:u # 1.000 CPUs utilized
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
152 page-faults:u # 0.044 K/sec
9,316,871,001 cycles:u # 2.715 GHz
32,777,006,878 instructions:u # 3.52 insn per cycle
3,278,811,800 branches:u # 955.576 M/sec
251,609 branch-misses:u # 0.01% of all branches
3.431681839 seconds time elapsed
3.428959000 seconds user
0.000000000 seconds sys耗时 3.43s, branch misses 0.01%
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels