| 1 | = Bcg.c = |
| 2 | {{{#!C |
| 3 | /* |
| 4 | Large Scale Computing |
| 5 | |
| 6 | solve Ax=b |
| 7 | Bi-Conjugate Gradient Method |
| 8 | General Sparse Matrix A |
| 9 | |
| 10 | solve_cg solve Ax=b with BCG method |
| 11 | this returns the number of iterations, or negative if failed. |
| 12 | */ |
| 13 | #include<stdio.h> |
| 14 | |
| 15 | int solve_bcg(int dim, /* dimension */ |
| 16 | int nnz, /* # of non-zeros in the matrix */ |
| 17 | double *nzval, /* array of nonzero values */ |
| 18 | int *colidx, /* array of column indices of the nonzeros */ |
| 19 | int *rowptr, /* the first column of nonzero in each row */ |
| 20 | /* rowptr[j] stores the location of nzval[] and colidx[], */ |
| 21 | /* which starts row j. This has dim+1 entry that is nnz */ |
| 22 | double *x, /* solition */ |
| 23 | double *b, /* right hand side */ |
| 24 | double tol); /* tolerance */ |
| 25 | }}} |
| 26 | {{{#!C |
| 27 | /* |
| 28 | Large Scale Computing |
| 29 | |
| 30 | solve Ax=b |
| 31 | Bi-Conjugate Gradient Method |
| 32 | General Sparse Matrix A |
| 33 | */ |
| 34 | #include<stdio.h> |
| 35 | #include<stdlib.h> |
| 36 | #include<math.h> |
| 37 | |
| 38 | int solve_bcg(int dim, /* dimension */ |
| 39 | int nnz, /* # of non-zeros in the matrix */ |
| 40 | double *nzval, /* array of nonzero values */ |
| 41 | int *colidx, /* array of column indices of the nonzeros */ |
| 42 | int *rowptr, /* the first column of nonzero in each row */ |
| 43 | double *x, /* solition */ |
| 44 | double *b, /* right hand side */ |
| 45 | double tol){ /* tolerance */ |
| 46 | double *r; |
| 47 | double *p; |
| 48 | double *Ap; |
| 49 | double *rt; |
| 50 | double *pt; |
| 51 | double *ATpt; |
| 52 | double *nzvalT; /* AT array of nonzero values */ |
| 53 | int *colidxT; /* AT array of column indices of the nonzeros */ |
| 54 | int *rowptrT; /* AT the first column of nonzero in each row */ |
| 55 | double rr,ptAp,rr1,alpha,beta; |
| 56 | int i,j,count; |
| 57 | int col, relpos; |
| 58 | int *marker; |
| 59 | |
| 60 | /* Allocate Working Spaces */ |
| 61 | r = (double *)calloc(dim, sizeof(double)); |
| 62 | rt = (double *)calloc(dim, sizeof(double)); |
| 63 | p = (double *)calloc(dim, sizeof(double)); |
| 64 | pt = (double *)calloc(dim, sizeof(double)); |
| 65 | Ap = (double *)calloc(dim, sizeof(double)); |
| 66 | ATpt = (double *)calloc(dim, sizeof(double)); |
| 67 | nzvalT = (double *)calloc(nnz, sizeof(double)); |
| 68 | colidxT = (int *)calloc(nnz, sizeof(int)); |
| 69 | rowptrT = (int *)calloc(dim+1, sizeof(int)); |
| 70 | if ((r == NULL) || (rt == NULL) || |
| 71 | (p == NULL) || (pt == NULL) || |
| 72 | (Ap == NULL) || (ATpt == NULL) || |
| 73 | (nzvalT == NULL) || (colidxT == NULL) || |
| 74 | (rowptrT == NULL)){ |
| 75 | printf("memory allocation failed\n"); |
| 76 | return -1; |
| 77 | } |
| 78 | |
| 79 | /* |
| 80 | Transpose A |
| 81 | */ |
| 82 | /* Allocate marker */ |
| 83 | marker = (int *)calloc(dim, sizeof(int)); |
| 84 | for (i = 0 ; i < dim ; i++) marker[i] = 0; |
| 85 | |
| 86 | /* Get counts of each column of A, and set up column pointers */ |
| 87 | for (i = 0 ; i < dim ; i++){ |
| 88 | for (j = rowptr[i] ; j < rowptr[i+1]; j++) ++marker[colidx[j]]; |
| 89 | } |
| 90 | rowptrT[0] = 0; |
| 91 | for (j = 0 ; j < dim ; j++) { |
| 92 | rowptrT[j+1] = rowptrT[j] + marker[j]; |
| 93 | marker[j] = rowptrT[j]; |
| 94 | } |
| 95 | |
| 96 | /* Transfer the matrix into the compressed column storage. */ |
| 97 | for (i = 0 ; i < dim ; i++) { |
| 98 | for (j = rowptr[i] ; j < rowptr[i+1] ; j++) { |
| 99 | col = colidx[j]; |
| 100 | relpos = marker[col]; |
| 101 | colidxT[relpos] = i; |
| 102 | nzvalT[relpos] = nzval[j]; |
| 103 | ++marker[col]; |
| 104 | } |
| 105 | } |
| 106 | free(marker); |
| 107 | |
| 108 | /* |
| 109 | Bi-Conjugate Gradient |
| 110 | */ |
| 111 | /* compute r0 */ |
| 112 | for (i = 0 ; i < dim ; i++){ |
| 113 | r[i] = b[i]; |
| 114 | for (j = rowptr[i] ; j < rowptr[i+1] ; j++){ |
| 115 | r[i] -= nzval[j] * x[colidx[j]]; |
| 116 | } |
| 117 | } |
| 118 | |
| 119 | rr = 0.0; |
| 120 | for (i = 0 ; i < dim ; i++){ |
| 121 | p[i] = r[i]; /* p0 = r0 */ |
| 122 | rt[i] = r[i]; /* rt0 = r0 */ |
| 123 | pt[i] = r[i]; /* pt0 = r0 */ |
| 124 | rr += r[i] * r[i]; /* rr = r.r */ |
| 125 | } |
| 126 | |
| 127 | /* bcg iteration */ |
| 128 | count = 0; |
| 129 | while(rr > tol){ |
| 130 | for (i = 0 ; i < dim ; i++){ |
| 131 | /* Ap = A*p */ |
| 132 | Ap[i] = 0.0; |
| 133 | for (j = rowptr[i] ; j < rowptr[i+1] ; j++){ |
| 134 | Ap[i] += nzval[j] * p[colidx[j]]; |
| 135 | } |
| 136 | |
| 137 | /* ATpt = A^T*pt */ |
| 138 | ATpt[i] = 0.0; |
| 139 | for (j = rowptrT[i] ; j < rowptrT[i+1] ; j++){ |
| 140 | ATpt[i] += nzvalT[j] * pt[colidxT[j]]; |
| 141 | } |
| 142 | } |
| 143 | |
| 144 | // alpha = r.r / pt.Ap |
| 145 | ptAp = 0.0; |
| 146 | for (i = 0 ; i < dim ; i++){ |
| 147 | ptAp += pt[i] * Ap[i]; |
| 148 | } |
| 149 | alpha = rr / ptAp; |
| 150 | |
| 151 | //Beta |
| 152 | rr1 = 0.0; |
| 153 | for (i = 0 ; i < dim ; i++){ |
| 154 | x[i] += alpha * p[i]; |
| 155 | r[i] -= alpha * Ap[i]; |
| 156 | rt[i] -= alpha * ATpt[i]; |
| 157 | rr1 += r[i] * rt[i]; |
| 158 | } |
| 159 | |
| 160 | beta = rr1 / rr; |
| 161 | |
| 162 | for (i = 0 ; i < dim ; i++){ |
| 163 | p[i] = r[i] + beta * p[i]; |
| 164 | pt[i] = rt[i] + beta * pt[i]; |
| 165 | } |
| 166 | |
| 167 | rr = rr1; |
| 168 | count++; |
| 169 | } |
| 170 | |
| 171 | /* Deallocate Working Spaces */ |
| 172 | free(r); |
| 173 | free(p); |
| 174 | free(Ap); |
| 175 | free(rt); |
| 176 | free(pt); |
| 177 | free(ATpt); |
| 178 | free(nzvalT); |
| 179 | free(colidxT); |
| 180 | free(rowptrT); |
| 181 | |
| 182 | return count; |
| 183 | } |
| 184 | }}} |