-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsparse.h
More file actions
91 lines (89 loc) · 2.1 KB
/
sparse.h
File metadata and controls
91 lines (89 loc) · 2.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#include <iostream>
//
// Threaded SpMV
//
// y <- A * x
//
// Parameters
// ----------
// n_row, n_col : int
// dimensions of the n_row x n_col matrix A
// Ap, Aj, Ax : array
// CSR pointer, index, and data vectors for matrix A
// Xx : array
// input vector
// Yy : array
// output vector (modified in-place)
//
// See Also
// --------
// csr_matvec
//
// Notes
// -----
// Requires GCC 4.9 for ivdep
// Requires a compiler with OMP
//
template <class I, class T>
void csr_matvec_omp(const I n_row,
const I n_col,
const I Ap[], const int Ap_size,
const I Aj[], const int Aj_size,
const T Ax[], const int Ax_size,
const T Xx[], const int Xx_size,
T Yx[], const int Yx_size)
{
I i, jj;
T sum;
#pragma omp parallel for default(shared) schedule(static) private(i, sum, jj)
for(i = 0; i < n_row; i++){
sum = Yx[i];
#pragma GCC ivdep
for(jj = Ap[i]; jj < Ap[i+1]; jj++){
sum += Ax[jj] * Xx[Aj[jj]];
}
Yx[i] = sum;
}
}
//
// Reference SpMV from scipy
//
// y <- A * x
//
// Parameters
// ----------
// n_row, n_col : int
// dimensions of the n_row x n_col matrix A
// Ap, Aj, Ax : array
// CSR pointer, index, and data vectors for matrix A
// Xx : array
// input vector
// Yy : array
// output vector (modified in-place)
//
// See Also
// --------
// csr_matvec_omp
//
// Notes
// -----
// Requires GCC 4.9 for ivdep
// Requires a compiler with OMP
// https://github.com/scipy/scipy/blob/master/scipy/sparse/sparsetools/csr.h#L1122
template <class I, class T>
void csr_matvec(const I n_row,
const I n_col,
const I Ap[], const int Ap_size,
const I Aj[], const int Aj_size,
const T Ax[], const int Ax_size,
const T Xx[], const int Xx_size,
T Yx[], const int Yx_size)
{
for(I i = 0; i < n_row; i++){
T sum = Yx[i];
for(I jj = Ap[i]; jj < Ap[i+1]; jj++){
sum += Ax[jj] * Xx[Aj[jj]];
}
Yx[i] = sum;
}
}