-
Notifications
You must be signed in to change notification settings - Fork 0
/
cg_omp.cpp
98 lines (86 loc) · 2.18 KB
/
cg_omp.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
//
// Large Scale Computing
//
// solve Ax=b
// Conjugate Gradient Method
// General Sparse Matrix A
// OpenMP Version
//
// Note: A must be symmetric and positive-defined matrix
//
#include "cg_omp.h"
int solve_cg_OMP(int dim, /* dimension */
int nnz, /* # of non-zeros in the matrix */
double *nzval, /* array of nonzero values */
int *colidx, /* array of column indices of the nonzeros */
int *rowptr, /* the first column of nonzero in each row */
double *x, /* solition */
double *b, /* right hand side */
double tol) { /* tolerance */
/* Allocate Working Spaces */
double *r = new double[dim];
double *p = new double[dim];
double *Ap = new double[dim];
int csi = std::max(10,dim / omp_get_max_threads() / 10);
//std::cout << csi << std::endl;
/* compute r0 */
#pragma omp parallel for schedule(guided,csi)
for (int i = 0; i < dim; i++) {
r[i] = b[i];
for (int j = rowptr[i]; j < rowptr[i + 1]; j++) {
r[i] -= nzval[j] * x[colidx[j]];
}
}
double rr = 0.0;
#pragma omp parallel for reduction(+:rr) schedule(guided,csi)
for (int i = 0; i < dim; i++) {
p[i] = r[i]; /* p = r */
rr += r[i] * r[i]; /* rr = r.r */
}
double nRhs = rr;
if (nRhs == 0.0) {
//for (int i = 0; i < dim; i++) x[i] = 0.0;
return 0;
}
/* cg iteration */
int count = 0;
double pAp;
double rr1;
while (rr > tol * tol * nRhs) {
// Ap = A*p
#pragma omp parallel for schedule(guided,csi)
for (int i = 0; i < dim; i++) {
Ap[i] = 0.0;
for (int j = rowptr[i]; j < rowptr[i + 1]; j++) {
Ap[i] += nzval[j] * p[colidx[j]];
}
}
// alpha = r.r / p.Ap
pAp = 0.0;
#pragma omp parallel for reduction(+:pAp) schedule(guided,csi)
for (int i = 0; i < dim; i++) {
pAp += p[i] * Ap[i];
}
double alpha = rr / pAp;
//Beta
rr1 = 0.0;
#pragma omp parallel for reduction(+:rr1) schedule(guided,csi)
for (int i = 0; i < dim; i++) {
x[i] += alpha * p[i];
r[i] -= alpha * Ap[i];
rr1 += r[i] * r[i];
}
double beta = rr1 / rr;
#pragma omp parallel for schedule(guided,csi)
for (int i = 0; i < dim; i++) {
p[i] = r[i] + beta * p[i];
}
rr = rr1;
count++;
}
/* Deallocate Working Spaces */
delete[] r;
delete[] p;
delete[] Ap;
return count;
}