= Bcg.c = 
{{{#!C
/*
  Large Scale Computing
 
  solve Ax=b  
  Bi-Conjugate Gradient Method
  General Sparse Matrix A
 
  solve_cg solve Ax=b with BCG method
  this returns the number of iterations, or negative if failed.
 */
#include<stdio.h>
 
int solve_bcg(int dim,          /* dimension */
	      int nnz,          /* # of non-zeros in the matrix */
	      double *nzval,    /* array of nonzero values */
	      int *colidx,      /* array of column indices of the nonzeros */
	      int *rowptr,      /* the first column of nonzero in each row */
	      /* rowptr[j] stores the location of nzval[] and colidx[], */
	      /* which starts row j. This has dim+1 entry that is nnz */
	      double *x,        /* solition */
	      double *b,        /* right hand side */
	      double tol);      /* tolerance */
}}}
{{{#!C
/*
  Large Scale Computing
 
  solve Ax=b  
  Bi-Conjugate Gradient Method
  General Sparse Matrix A
 */
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
 
int solve_bcg(int dim,          /* dimension */
	      int nnz,          /* # of non-zeros in the matrix */
	      double *nzval,    /* array of nonzero values */
	      int *colidx,      /* array of column indices of the nonzeros */
	      int *rowptr,      /* the first column of nonzero in each row */
	      double *x,        /* solition */
	      double *b,        /* right hand side */
	      double tol){      /* tolerance */
  double *r;
  double *p;
  double *Ap;
  double *rt;
  double *pt;
  double *ATpt;
  double *nzvalT;    /* AT array of nonzero values */
  int *colidxT;      /* AT array of column indices of the nonzeros */
  int *rowptrT;      /* AT the first column of nonzero in each row */  
  double rr,ptAp,rr1,alpha,beta;
  int i,j,count;
  int col, relpos;
  int *marker;
 
  /* Allocate Working Spaces */
  r = (double *)calloc(dim, sizeof(double));
  rt = (double *)calloc(dim, sizeof(double));
  p = (double *)calloc(dim, sizeof(double));
  pt = (double *)calloc(dim, sizeof(double));
  Ap = (double *)calloc(dim, sizeof(double));
  ATpt = (double *)calloc(dim, sizeof(double));
  nzvalT = (double *)calloc(nnz, sizeof(double));
  colidxT = (int *)calloc(nnz, sizeof(int));
  rowptrT = (int *)calloc(dim+1, sizeof(int));
  if ((r == NULL) || (rt == NULL) || 
      (p == NULL) || (pt == NULL) || 
      (Ap == NULL) || (ATpt == NULL) ||
      (nzvalT == NULL) || (colidxT == NULL) ||
      (rowptrT == NULL)){
    printf("memory allocation failed\n");
    return -1;
  }
 
  /*
    Transpose A
  */
  /* Allocate marker */
  marker = (int *)calloc(dim, sizeof(int));
  for (i = 0 ; i < dim ; i++) marker[i] = 0;
 
  /* Get counts of each column of A, and set up column pointers */
  for (i = 0 ; i < dim ; i++){
    for (j = rowptr[i] ; j < rowptr[i+1]; j++) ++marker[colidx[j]];
  }
  rowptrT[0] = 0;
  for (j = 0 ; j < dim ; j++) {
    rowptrT[j+1] = rowptrT[j] + marker[j];
    marker[j] = rowptrT[j];
  }
 
  /* Transfer the matrix into the compressed column storage. */
  for (i = 0 ; i < dim ; i++) {
    for (j = rowptr[i] ; j < rowptr[i+1] ; j++) {
      col = colidx[j];
      relpos = marker[col];
      colidxT[relpos] = i;
      nzvalT[relpos] = nzval[j];
      ++marker[col];
    }
  }
  free(marker);
 
  /*
    Bi-Conjugate Gradient 
   */
  /* compute r0 */
  for (i = 0 ; i < dim ; i++){
    r[i] = b[i];
    for (j = rowptr[i] ; j < rowptr[i+1] ; j++){
      r[i] -= nzval[j] * x[colidx[j]];
    }
  }
 
  rr = 0.0;
  for (i = 0 ; i < dim ; i++){
    p[i] = r[i];    /* p0 = r0 */
    rt[i] = r[i];   /* rt0 = r0 */
    pt[i] = r[i];   /* pt0 = r0 */
    rr += r[i] * r[i];  /* rr = r.r */
  }
 
  /* bcg iteration */
  count = 0;
  while(rr > tol){    
    for (i = 0 ; i < dim ; i++){
      /* Ap = A*p */
      Ap[i] = 0.0;
      for (j = rowptr[i] ; j < rowptr[i+1] ; j++){
	Ap[i] += nzval[j] * p[colidx[j]];
      }
 
      /* ATpt = A^T*pt */
      ATpt[i] = 0.0;
      for (j = rowptrT[i] ; j < rowptrT[i+1] ; j++){
	ATpt[i] += nzvalT[j] * pt[colidxT[j]];
      }
    }
 
    // alpha = r.r / pt.Ap
    ptAp = 0.0;
    for (i = 0 ; i < dim ; i++){
      ptAp += pt[i] * Ap[i];
    }
    alpha = rr / ptAp;
 
    //Beta
    rr1 = 0.0;
    for (i = 0 ; i < dim ; i++){
      x[i] += alpha * p[i];
      r[i] -= alpha * Ap[i];
      rt[i] -= alpha * ATpt[i];
      rr1 += r[i] * rt[i];
    }  
 
    beta = rr1 / rr;
 
    for (i = 0 ; i < dim ; i++){
      p[i] = r[i] + beta * p[i];
      pt[i] = rt[i] + beta * pt[i];
    }
 
    rr = rr1;
    count++;
  }
 
  /* Deallocate Working Spaces */
  free(r); 
  free(p); 
  free(Ap);
  free(rt); 
  free(pt); 
  free(ATpt);
  free(nzvalT);
  free(colidxT);
  free(rowptrT);
 
  return count;
}
}}}