= Ex32.c =
{{{#!C
/*
  Large Scale Computing
  Stokes Flow in a Cavity
  ex32.c
 
  Need Lapack and Blas Libraries
*/
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<time.h>
#include"stokeslet2d.h"
 
#define EPSILON 0.005 /* blob size*/
#define INTGRID 51    /* # of x-grid lines for internal velocity */
 
/* min and max macros */
#define max(a,b)        (((a) > (b)) ? (a) : (b))
#define min(a,b)        (((a) < (b)) ? (a) : (b))
 
double t_cost(void );
int main(int , char **);
int main(int argc, char **argv){
  double dp;
  int numpdepth;
  int numpwidth;
  int numparticles; 
  double *loc; /* particle location */
  double *vel; /* particle velocity */
  double *foc; /* particle force */
  double *mat; /* 2Nx2N dense matrix */
  double *cloc; /* array for internal points */
  double *cvel; /* array for internal velocity */
  int i,j;
  int nyg;
  FILE *fp;
 
  if (argc < 2){
    printf("Usage:%s [Depth of Cavity]\n",argv[0]);
    exit(-1);
  }
 
  /* get inputed depth */
  dp = atof(argv[1]);
 
  /* # of particles in depth */
  numpdepth = (int)(dp / EPSILON + 0.5);
 
  /* # of particles in width */
  numpwidth = (int)(1.0 / EPSILON + 0.5);
 
  /* total # od particles */
  numparticles = numpdepth * 2 + numpwidth * 2;
  printf("Total # of Particles=%d\n",numparticles);
 
  /* Allocate Space */
  /* DIM=2 defined by 'stokeslet2d.h' */
  loc = (double *)calloc(numparticles * DIM,sizeof(double));
  vel = (double *)calloc(numparticles * DIM,sizeof(double));
  foc = (double *)calloc(numparticles * DIM,sizeof(double));
  mat = (double *)calloc(numparticles * DIM * numparticles * DIM,sizeof(double));
  if ((loc == (double *)NULL) ||
      (vel == (double *)NULL) || 
      (foc == (double *)NULL) || 
      (mat == (double *)NULL) ){
    printf("Can't allocate memory!!!\n");
    exit(-1);
  }
 
  /* set location & velocity of particles (blob)*/
  for (i = 0 ; i < numparticles ; i++){
    foc[i * DIM] = 0.0;
    foc[i * DIM + 1] = 0.0;
    if ((i >= 0) && (i <= numpwidth)){ /* top */
      loc[i * DIM] = -0.5 + EPSILON * (double)i; /* x */
      loc[i * DIM + 1] = 0.0; /* y */
      vel[i * DIM] = 1.0;
      vel[i * DIM + 1] = 0.0;
    }else{
      if (i <= (numpwidth + numpdepth)){ /* right wall */      
	loc[i * DIM] = 0.5; /* x */
	loc[i * DIM + 1] = -EPSILON * (double)(i - numpwidth); /* y */	
	vel[i * DIM] = 0.0;
	vel[i * DIM + 1] = 0.0;
      }else{
	if (i <= (2 * numpwidth + numpdepth)){ /* bottom */
	  loc[i * DIM] = 0.5 - EPSILON * (double)(i - (numpwidth + numpdepth)); /* x */
	  loc[i * DIM + 1] = -EPSILON * numpdepth; /* y */
	  vel[i * DIM] = 0.0;
	  vel[i * DIM + 1] = 0.0;	  
	}else{                 /* left wall */    
	  loc[i * DIM] = -0.5; /* x */
	  loc[i * DIM + 1] = -EPSILON * (double)((2 * numpwidth + 2 * numpdepth) - i); /* y */	
	  vel[i * DIM] = 0.0;
	  vel[i * DIM + 1] = 0.0;	  
	}
      }
    }
  }
 
  /* make 2Nx2N Matrix */
  t_cost();
  slet2d_mkMatrix(numparticles,loc,EPSILON,mat);
  printf("setting matrix %f sec\n",t_cost());
 
  /* Sovle linear ststem */
  slet2d_solve(numparticles,mat,vel,foc);
  //slet2d_solve_CG(numparticles,mat,vel,foc);
  //slet2d_solve_GMRES(numparticles,mat,vel,foc);
 
  printf("Sovle linear ststem %f sec\n",t_cost());
 
  /* check the solution */
  slet2d_mkMatrix(numparticles,loc,EPSILON,mat);
  check_solution(numparticles,mat,vel,foc);
 
  /* deallocate big matrix */
  free(mat);
 
  /* out particle (blob) data */
  fp = fopen("particle.dat","w");
  for (i = 0 ; i < numparticles ; i++){
    fprintf(fp,"%e %e %e %e %e %e\n",
	    loc[i * DIM],loc[i * DIM + 1],
	    vel[i * DIM],vel[i * DIM + 1],
	    foc[i * DIM],foc[i * DIM + 1]);
  }
  fclose(fp);
 
  /* 
     compute internal velocity 
  */
  nyg = (int)(EPSILON * (double)(numpdepth * (INTGRID - 1)));
  cvel = (double *)calloc(INTGRID * nyg * DIM, sizeof(double));  
  cloc = (double *)calloc(INTGRID * nyg * DIM, sizeof(double));
 
  if ((cloc == (double *)NULL) ||
      (cvel == (double *)NULL)){
    printf("Can't allocate memory!!!\n");
    exit(-1);
  }
  /* setting location */
  for (j = 0 ; j < nyg ; j++){
    for (i = 0 ; i < INTGRID ; i++){
      cloc[DIM * (i + INTGRID * j)    ] = -0.5 + (double)i / (double)(INTGRID - 1);
      cloc[DIM * (i + INTGRID * j) + 1] = -(double)j / (double)(INTGRID - 1);
    }
  }
 
  /* compute velocities */
  t_cost();
  slet2d_velocity(numparticles, loc, foc, EPSILON, INTGRID * nyg, cloc, cvel);  
  printf("Compute internal velocity %f sec\n",t_cost());
 
  /* out velocities */
  fp = fopen("res.dat","w");
  for (j = 0 ; j < nyg ; j++){
    for (i = 0 ; i < INTGRID ; i++){
      fprintf(fp,"%e %e %e %e\n",
	      cloc[DIM * (i + INTGRID * j)    ], cloc[DIM * (i + INTGRID * j) + 1],
	      cvel[DIM * (i + INTGRID * j)    ], cvel[DIM * (i + INTGRID * j) + 1]);
    }
  }
  fclose(fp);
 
  /* free */
  free(cloc);
  free(cvel);
 
  free(loc);
  free(vel);
  free(foc);
}
 
/* for timing */
double t_cost(void ){
  static clock_t ptm;
  static int fl = 0;
  clock_t tm;
  double sec;
 
  tm = clock();
  if (fl == 0) ptm = tm;
  fl = 1;
 
  sec = (double)(tm - ptm) / (double)CLOCKS_PER_SEC;
 
  ptm = tm;
 
  return sec;
}
stokeslet2d.c

/*
  Large Scale Computing
  Stokeslet for 2D
 
  Ricardo Cortez,"The Method of Regularized Stokeslets", 
  (2001), SIAM J. Sci. Comput., Vol.23, No.4, pp.1204
 
  assuming mu = 1
*/
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include"stokeslet2d.h"
#include"gmres.h"
 
double term1(double ,double );
double term2(double ,double );
/* Blas Function */
double ddot_(int *,double *,int *,double *,int *);
double dnrm2_(int *, double *, int *);
 
/* make Matrix */
void slet2d_mkMatrix(int np,double *loc,double ep,double *mat){
  int i,j;
  double r;
  double dx,dy;
  double tr1,tr2;
 
  /*zeros mat*/
  for (i = 0 ; i < DIM * DIM * np * np ; i++) mat[i] = 0.0;
 
  /* make mat */
  /* Because of symmetric, we can loop over half */
  for (i = 0 ; i < np ; i++){
    for (j = i ; j < np ; j++){
      dx = loc[i * DIM    ] - loc[j * DIM    ];
      dy = loc[i * DIM + 1] - loc[j * DIM + 1];
      r = sqrt(dx * dx + dy * dy);
 
      tr1 = term1(r,ep) / (4.0 * M_PI);
      tr2 = term2(r,ep) / (4.0 * M_PI); 
 
      /* diagoanl elements and upper half */
      mat[i * DIM +      j * DIM      * np * DIM] += -tr1 + tr2 * dx * dx;
      mat[i * DIM +     (j * DIM + 1) * np * DIM] +=        tr2 * dx * dy;
      mat[i * DIM + 1 +  j * DIM      * np * DIM] +=        tr2 * dx * dy;
      mat[i * DIM + 1 + (j * DIM + 1) * np * DIM] += -tr1 + tr2 * dy * dy;      
      if (i != j){
	/* lower half */
	mat[j * DIM +      i * DIM      * np * DIM] += -tr1 + tr2 * dx * dx;
	mat[j * DIM +     (i * DIM + 1) * np * DIM] +=        tr2 * dx * dy;
	mat[j * DIM + 1 +  i * DIM      * np * DIM] +=        tr2 * dx * dy;
	mat[j * DIM + 1 + (i * DIM + 1) * np * DIM] += -tr1 + tr2 * dy * dy;
      }
    }
  }
}
 
/* Sovle Liear ststem */
/* Use Lapack */
void slet2d_solve(int np,double *mat,double *vel,double *foc){
  int nrhs;
  int n;
  int lda;
  int ldb;
  int *ipiv;
  int i;
  int info;
 
  /*solve Ax=b */
  /* A (amat) is n x n matrix */
  nrhs = 1; /* # of RHS is 1 */
  n = DIM * np;
  lda = DIM * np;
  ldb = DIM * np;
  ipiv = calloc(DIM * np,sizeof(int)); /* The pivot indices that define the permutation matrix P;
					  row i of the matrix was interchanged with row IPIV(i). */
  if (ipiv == NULL) exit(-1);
 
  /* set RHS */
  for (i = 0 ; i < n ; i++) foc[i] = vel[i];
 
  /* solve with LAPACK */
  dgesv_(&n, &nrhs, mat, &lda, ipiv, foc, &ldb, &info);
 
  /* check */
  /* if (info == 0) printf("successfully done\n"); */
  if (info < 0){
    printf("the %d-th argument had an illegal value\n",-info);
    exit(-1);
  }
  if (info > 0){
    printf("U(%d,%d) is exactly zero.\n",info,info);
    exit(-1);
  }
 
  /* free */
  free(ipiv);
}
 
/* CG use blas */
void slet2d_solve_CG(int np,double *mat,double *b,double *x){
  double tol = 1.0e-8;
  double *r;
  double *p;
  double *Ap;
  double rr,pAp,rr1,alpha,beta;
  int i,j,count;
  double al,bt,normb;
  char tr[1] = "N";
  int inc,n;
 
  n = DIM * np;
  inc = 1;
 
  /* Check if the solution is trivial. */
  normb = dnrm2_(&n, b, &inc);
  if (normb <= 0.0){
    for (i = 0 ; i < n ; i++) x[i] = 0.0;
    return;
  }  
 
  /* Allocate Working Spaces */
  r = (double *)calloc(n, sizeof(double));
  p = (double *)calloc(n, sizeof(double));
  Ap = (double *)calloc(n, sizeof(double));
  if ((r == NULL) || (p == NULL) || (Ap == NULL)){
    printf("memory allocation failed\n");
    return ;
  }
 
  /* compute r0 = b - Ax */
  al = -1.0;
  bt = 1.0;
  dcopy_(&n,b,&inc,r,&inc); /* r = b */
  dgemv_(tr,&n,&n,&al,mat,&n,x,&inc,&bt,r,&inc);  
 
  rr = 0.0;
  dcopy_(&n,r,&inc,p,&inc); /* p = r */
  rr = ddot_(&n,r,&inc,r,&inc); /* rr = r.r */ 
  printf("rr=%e\n",rr);
 
  /* cg iteration */
  count = 0;
  while(rr  > tol * tol * normb * normb){    
    // Ap = A*p
    al = 1.0;
    bt = 0.0;
    dgemv_(tr,&n,&n,&al,mat,&n,p,&inc,&bt,Ap,&inc);  
 
    /* alpha = r.r / p.Ap */
    pAp = 0.0;
    pAp = ddot_(&n,p,&inc,Ap,&inc); /* pAp = p.Ap */ 
    alpha = rr / pAp;
 
    /* Beta */
    rr1 = 0.0;
    daxpy_(&n,&alpha,p,&inc,x,&inc); /* x += alpha * p */
    al = -alpha;
    daxpy_(&n,&al,Ap,&inc,r,&inc); /* r -= alpha * Ap */
    rr1 = ddot_(&n,r,&inc,r,&inc); /* rr1 = r.r */     
 
    beta = rr1 / rr;
    /* p = r + beta * p  no blas routine :( */
    for (i = 0 ; i < n ; i++){
      p[i] = r[i] + beta * p[i];
    }
 
    rr = rr1;
    count++;
  }
 
  /* Deallocate Working Spaces */
  free(r); 
  free(p); 
  free(Ap);
}
 
void check_solution(int np,double *mat,double *b,double *x){
  double *r;
  double al,bt,rr;
  char tr[1] = "N";
  int inc,n;
 
  n = DIM * np;
  inc = 1;
 
  /* Allocate Working Spaces */
  r = (double *)calloc(n, sizeof(double));  
 
  /* compute r0 = b - Ax */
  al = -1.0;
  bt = 1.0;
  dcopy_(&n,b,&inc,r,&inc); /* r = b */
  dgemv_(tr,&n,&n,&al,mat,&n,x,&inc,&bt,r,&inc);  
 
  /* rr = |r| */
  rr = dnrm2_(&n, r, &inc);  
 
  printf("|b - Ax|=%e\n",rr);
  free(r);
}
 
/* Use Lapack */
void slet2d_solve_GMRES(int np,double *mat,double *vel,double *foc){
  int n;
  int restart;
  int max_iterations;
  double tol;
 
  /* set GMRES parameters */
  restart = 100;
  max_iterations = 100;
  tol = 1.0e-8;
 
  /*solve Ax=b */
  /* A (amat) is n x n matrix */
  n = DIM * np;
 
  /* solve with GMRES */
  gmres_solve(mat, foc, vel, n, restart, &tol, &max_iterations);
}
 
/* compute velocity */
void slet2d_velocity(int np, double *loc, double *foc, double ep, int nump, double *cloc,double *cvel){
  int i,p;
  double r;
  double dx,dy;
  double tr1,tr2;
 
  /* loop for grid */
  for (p = 0 ; p < nump ; p++){
    /* zeros */
    cvel[p * DIM    ] = 0.0;
    cvel[p * DIM + 1] = 0.0;
 
    /* loop for partilces (blob) */
    for (i = 0 ; i < np ; i++){
      dx = cloc[p * DIM    ] - loc[i * DIM    ];
      dy = cloc[p * DIM + 1] - loc[i * DIM + 1];
      r = sqrt(dx * dx + dy * dy);
 
      tr1 = term1(r,ep) / (4.0 * M_PI);
      tr2 = term2(r,ep) / (4.0 * M_PI);       
 
      tr2 *= foc[i * DIM] * dx + foc[i * DIM + 1] * dy; 
 
      cvel[p * DIM    ] += -foc[i * DIM    ] * tr1 + tr2 * dx;
      cvel[p * DIM + 1] += -foc[i * DIM + 1] * tr1 + tr2 * dy;
    }
  }
}
 
double term1(double r,double ep){
  double sq;
 
  sq = sqrt(r * r + ep * ep);
 
  return log(sq + ep) - ep * (sq + 2.0 * ep) / (sq + ep) / sq;
}
 
 
double term2(double r,double ep){
  double sq;
 
  sq = sqrt(r * r + ep * ep);
 
  return (sq + 2.0 * ep) / (sq + ep) / (sq + ep) / sq;
}}}}