Context Navigation

← Previous Change
Wiki History
Next Change →

ex43.c

Timestamp:: 05/14/15 14:53:47 (9 years ago)
Author:: cmaggio
Comment:: migrated content from ccs wiki

Legend:

: Unmodified
: Added
: Removed
: Modified

cypress/Programming/StokesFlow/ex43.c

               v1
+= Ex43.c =
+{{{#!C
+/*
+  Large Scale Computing
+  Stokes Flow in a Cavity
+  MPI version
+  ex43.c
+*/
+#include<stdio.h>
+#include<stdlib.h>
+#include<math.h>
+#include<time.h>
+#include"mpi.h"
+#include"stokeslet2d_dist.h"
+#define EPSILON 0.005 /* blob size*/
+#define INTGRID 51    /* # of x-grid lines for internal velocity */
+/* min and max macros */
+#define max(a,b)        (((a) > (b)) ? (a) : (b))
+#define min(a,b)        (((a) < (b)) ? (a) : (b))
+double t_cost(void );
+int main(int , char **);
+int main(int argc, char **argv){
+  double dp;
+  int numpdepth;
+  int numpwidth;
+  int numparticles;
+  double *loc; /* particle location */
+  double *vel; /* particle velocity */
+  double *foc; /* particle force */
+  double *mat; /* 2Nx2N dense matrix */
+  double cloc[2]; /* internal points */
+  double cvel[2]; /* internal velocity */
+  int mystart,myend,mysize;
+  int *ostart,*oend;
+  int myid,numproc;
+  int i,j,li;
+  int nyg,cpu;
+  FILE *fp;
+  char fn[256];
+  /* Initialize */
+  MPI_Init(&argc,&argv);
+  /* get myid and # of processors */
+  MPI_Comm_size(MPI_COMM_WORLD,&numproc);
+  MPI_Comm_rank(MPI_COMM_WORLD,&myid);
+  if (argc < 2){
+    if (myid == 0) printf("Usage:%s [Depth of Cavity]\n",argv[0]);
+    MPI_Abort(MPI_COMM_WORLD, -1);
+  }
+  /* get inputed depth */
+  dp = atof(argv[1]);
+  /* # of particles in depth */
+  numpdepth = (int)(dp / EPSILON + 0.5);
+  /* # of particles in width */
+  numpwidth = (int)(1.0 / EPSILON + 0.5);
+  /* total # od particles */
+  numparticles = numpdepth * 2 + numpwidth * 2;
+  if (myid == 0) printf("Total # of Particles=%d\n",numparticles);
+  /* divide blobs into # of cpus */
+  mystart = (numparticles / numproc) * myid;
+  if (numparticles % numproc > myid){
+    mystart += myid;
+    myend = mystart + (numparticles / numproc) + 1;
+  }else{
+    mystart += numparticles % numproc;
+    myend = mystart + (numparticles / numproc);
+  }
+  mysize = myend - mystart;
+  printf("CPU%d %d ~ %d : size=%d\n",myid,mystart,myend,mysize);
+  /* collect start end info */
+  ostart = (int *)calloc(numproc, sizeof(int));
+  oend = (int *)calloc(numproc, sizeof(int));
+  MPI_Allgather(&mysize, 1, MPI_INT, oend, 1, MPI_INT, MPI_COMM_WORLD);
+  ostart[0] = 0;
+  for (cpu = 1 ; cpu < numproc ; cpu++){
+    ostart[cpu] = ostart[cpu-1] + oend[cpu-1];
+    oend[cpu-1] += ostart[cpu-1];
+  }
+  oend[numproc-1] += ostart[numproc-1];
+  /* Allocate Space */
+  /* DIM=2 defined by 'stokeslet2d.h' */
+  loc = (double *)calloc(mysize * DIM,sizeof(double));
+  vel = (double *)calloc(mysize * DIM,sizeof(double));
+  foc = (double *)calloc(mysize * DIM,sizeof(double));
+  mat = (double *)calloc(mysize * DIM * numparticles * DIM,sizeof(double));
+  if ((loc == (double *)NULL) ||
+      (vel == (double *)NULL) ||
+      (foc == (double *)NULL) ||
+      (mat == (double *)NULL) ){
+    printf("Can't allocate memory!!!\n");
+    exit(-1);
+  }
+  /* Set location & velocity of particles (blob)*/
+  for (i = mystart ; i < myend ; i++){
+    li = i - mystart;
+    foc[li * DIM] = 0.0;
+    foc[li * DIM + 1] = 0.0;
+    if ((i >= 0) && (i <= numpwidth)){ /* top */
+      loc[li * DIM] = -0.5 + EPSILON * (double)i; /* x */
+      loc[li * DIM + 1] = 0.0; /* y */
+      vel[li * DIM] = 1.0;
+      vel[li * DIM + 1] = 0.0;
+    }else{
+      if (i <= (numpwidth + numpdepth)){ /* right wall */
+        loc[li * DIM] = 0.5; /* x */
+        loc[li * DIM + 1] = -EPSILON * (double)(i - numpwidth); /* y */
+        vel[li * DIM] = 0.0;
+        vel[li * DIM + 1] = 0.0;
+      }else{
+        if (i <= (2 * numpwidth + numpdepth)){ /* bottom */
+          loc[li * DIM] = 0.5 - EPSILON * (double)(i - (numpwidth + numpdepth)); /* x */
+          loc[li * DIM + 1] = -EPSILON * numpdepth; /* y */
+          vel[li * DIM] = 0.0;
+          vel[li * DIM + 1] = 0.0;
+        }else{                 /* left wall */
+          loc[li * DIM] = -0.5; /* x */
+          loc[li * DIM + 1] = -EPSILON * (double)((2 * numpwidth + 2 * numpdepth) - i); /* y */
+          vel[li * DIM] = 0.0;
+          vel[li * DIM + 1] = 0.0;
+        }
+      }
+    }
+  }
+  /* make 2Nx2N Matrix */
+  t_cost();
+  slet2d_mkMatrix_dist(numparticles,ostart,oend,loc,EPSILON,mat);
+  if (myid == 0) printf("setting matrix %f sec\n",t_cost());
+  /* Sovle linear ststem */
+  t_cost();
+  slet2d_solve_CG_dist(numparticles,ostart,oend,mat,vel,foc);
+  if (myid == 0) printf("Sovle linear ststem %f sec\n",t_cost());
+  /* deallocate big matrix */
+  free(mat);
+  /* out particle (blob) data */
+  sprintf(fn,"particle%02d.dat",myid);
+  fp = fopen(fn,"w");
+  for (i = 0 ; i < mysize ; i++){
+    fprintf(fp,"%e %e %e %e %e %e\n",
+            loc[i * DIM],loc[i * DIM + 1],
+            vel[i * DIM],vel[i * DIM + 1],
+            foc[i * DIM],foc[i * DIM + 1]);
+  }
+  fclose(fp);
+  /*
+     compute internal velocity
+  */
+  t_cost();
+  if (myid == 0){
+    fp = fopen("res.dat","w");
+  }
+  nyg = (int)(EPSILON * (double)(numpdepth * (INTGRID - 1)));
+  for (j = 0 ; j < nyg ; j++){
+    for (i = 0 ; i < INTGRID ; i++){
+      cloc[0] = -0.5 + (double)i / (double)(INTGRID - 1);
+      cloc[1] = -(double)j / (double)(INTGRID - 1);
+      slet2d_velocity_dist(numparticles, ostart,oend,loc, foc, EPSILON, cloc, cvel);
+      if (myid == 0){
+        fprintf(fp,"%e %e %e %e\n",cloc[0], cloc[1],cvel[0], cvel[1]);
+      }
+    }
+  }
+  if (myid == 0){
+    fclose(fp);
+  }
+  if (myid == 0)printf("Compute internal velocity %f sec\n",t_cost());
+  /* free */
+  free(loc);
+  free(vel);
+  free(foc);
+  printf("CPU%d is Done\n",myid);
+  MPI_Finalize();
+}
+/* for timing */
+double t_cost(void ){
+  static clock_t ptm;
+  static int fl = 0;
+  clock_t tm;
+  double sec;
+  tm = clock();
+  if (fl == 0) ptm = tm;
+  fl = 1;
+  sec = (double)(tm - ptm) / (double)CLOCKS_PER_SEC;
+  ptm = tm;
+  return sec;
+}
+stokeslet2d_dist.c
+/*
+  Large Scale Computing
+  Stokeslet for 2D
+  MPI Version
+  Ricardo Cortez,"The Method of Regularized Stokeslets",
+  (2001), SIAM J. Sci. Comput., Vol.23, No.4, pp.1204
+  assuming mu = 1
+*/
+#include<stdio.h>
+#include<stdlib.h>
+#include<math.h>
+#include"mpi.h"
+#include"stokeslet2d_dist.h"
+/* min and max macros */
+#define max(a,b)        (((a) > (b)) ? (a) : (b))
+#define min(a,b)        (((a) < (b)) ? (a) : (b))
+double term1(double ,double );
+double term2(double ,double );
+/* Blas Function */
+double ddot_(int *,double *,int *,double *,int *);
+double dnrm2_(int *, double *, int *);
+/* make Matrix */
+void slet2d_mkMatrix_dist(int np,int *ostart,int *oend,double *loc,double ep,double *mat){
+  int i,j;
+  int li,lj;
+  double r;
+  double dx,dy;
+  double tr1,tr2;
+  int mysize,maxsize;
+  int stend[2];
+  int myid,numproc;
+  double *oloc,*ploc;
+  int cpu;
+  /* get myid and # of processors */
+  MPI_Comm_size(MPI_COMM_WORLD,&numproc);
+  MPI_Comm_rank(MPI_COMM_WORLD,&myid);
+  /* get max size */
+  maxsize = 0;
+  for (cpu = 0 ; cpu < numproc ; cpu++){
+    maxsize = max(oend[cpu] - ostart[cpu],maxsize);
+  }
+  mysize = oend[myid] - ostart[myid];
+  /* alloc */
+  oloc = (double *)calloc(maxsize * DIM,sizeof(double));
+  /*zeros mat*/
+  for (i = 0 ; i < DIM * DIM * np * mysize ; i++) mat[i] = 0.0;
+  /* make mat */
+  for (cpu = 0 ; cpu < numproc ; cpu++){
+    /* get location vector from others */
+    ploc = oloc;
+    if (myid == cpu) ploc = loc;
+    MPI_Bcast(ploc, DIM * (oend[cpu] - ostart[cpu]), MPI_DOUBLE, cpu, MPI_COMM_WORLD);
+    /* compute local matrix*/
+    for (i = ostart[cpu] ; i < oend[cpu] ; i++){
+      for (j = ostart[myid] ; j < oend[myid] ; j++){
+        li = i - ostart[cpu];
+        lj = j - ostart[myid];
+        dx = ploc[li * DIM    ] - loc[lj * DIM    ];
+        dy = ploc[li * DIM + 1] - loc[lj * DIM + 1];
+        r = sqrt(dx * dx + dy * dy);
+        tr1 = term1(r,ep) / (4.0 * M_PI);
+        tr2 = term2(r,ep) / (4.0 * M_PI);
+        mat[i * DIM +      lj * DIM      * np * DIM] += -tr1 + tr2 * dx * dx;
+        mat[i * DIM +     (lj * DIM + 1) * np * DIM] +=        tr2 * dx * dy;
+        mat[i * DIM + 1 +  lj * DIM      * np * DIM] +=        tr2 * dx * dy;
+        mat[i * DIM + 1 + (lj * DIM + 1) * np * DIM] += -tr1 + tr2 * dy * dy;
+      }
+    }
+  }
+  free(oloc);
+}
+/* Sovle Liear ststem */
+/* CG */
+void slet2d_solve_CG_dist(int np, int *ostart, int *oend, double *mat, double *b, double *x){
+  double tol = 1.0e-8;
+  double *r;
+  double *p;
+  double *Ap;
+  double *work;
+  double rr,pAp,rr1,alpha,beta;
+  int i,j,count;
+  double w,normb;
+  int n;
+  int ms,me;
+  int cpu;
+  int myid,numproc;
+  int li,lj;
+  /* get myid and # of processors */
+  MPI_Comm_size(MPI_COMM_WORLD,&numproc);
+  MPI_Comm_rank(MPI_COMM_WORLD,&myid);
+  n = DIM * np;
+  ms = DIM * ostart[myid];
+  me = DIM * oend[myid];
+  /* Allocate Working Spaces */
+  r = (double *)calloc(me - ms, sizeof(double));
+  p = (double *)calloc(me - ms, sizeof(double));
+  Ap = (double *)calloc(me - ms, sizeof(double));
+  work = (double *)calloc(n, sizeof(double));
+  if ((r == NULL) ||
+      (p == NULL) ||
+      (Ap == NULL) ||
+      (work == NULL)){
+    printf("memory allocation failed\n");
+    return ;
+  }
+  /* initialize */
+  for (i = 0 ; i < me - ms ; i++){
+    r[i] = p[i] = Ap[i] = 0.0;
+  }
+  /* |b| */
+  w = 0.0;
+  for (i = 0 ; i < me - ms ; i++){
+    w += b[i] * b[i]; /* w = b.b */
+  }
+  MPI_Allreduce(&w, &normb, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+  normb = sqrt(normb);
+  /* compute r0 = b - Ax */
+  for (i = 0 ; i < n ; i++) work[i] = 0.0;
+  for (i = ms ; i < me ; i++){
+    li = i - ms;
+    work[i] = b[li];
+  }
+  for (j = ms ; j < me ; j++){
+    lj = j - ms;
+    for (i = 0 ; i < n ; i++){
+      work[i] -= mat[i + lj * n] * x[lj];
+    }
+  }
+  for (cpu = 0 ; cpu < numproc ; cpu++){
+    MPI_Reduce(&work[DIM * ostart[cpu]], r, DIM * (oend[cpu] - ostart[cpu]),
+               MPI_DOUBLE, MPI_SUM, cpu, MPI_COMM_WORLD);
+  }
+  w = 0.0;
+  for (i = 0 ; i < me - ms ; i++){
+    p[i] = r[i];  /* p = r */
+    w += r[i] * r[i]; /* rr = r.r */
+  }
+  MPI_Allreduce(&w, &rr, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+  /* cg iteration */
+  count = 0;
+  while(rr  > tol * tol * normb * normb){
+    /* Ap = A*p */
+    for (i = 0 ; i < n ; i++) work[i] = 0.0;
+    for (j = ms ; j < me ; j++){
+      lj = j - ms;
+      for (i = 0 ; i < n ; i++){
+        work[i] += mat[i + lj * n] * p[lj];
+      }
+    }
+    for (cpu = 0 ; cpu < numproc ; cpu++){
+      MPI_Reduce(&work[DIM * ostart[cpu]], Ap, DIM * (oend[cpu] - ostart[cpu]),
+                 MPI_DOUBLE, MPI_SUM, cpu, MPI_COMM_WORLD);
+    }
+    /* pAp = p.Ap */
+    w = 0.0;
+    for (i = 0 ; i < me - ms ; i++){
+      w += p[i] * Ap[i];
+    }
+    MPI_Allreduce(&w, &pAp, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+    /* alpha = r.r / p.Ap */
+    alpha = rr / pAp;
+    /* Beta */
+    for (i = 0 ; i < me - ms ; i++){
+      x[i] += alpha * p[i];  /* x += alpha * p */
+      r[i] -= alpha * Ap[i]; /* r -= alpha * Ap */
+    }
+    /* rr1 = r.r */
+    w = 0.0;
+    for (i = 0 ; i < me - ms ; i++){
+      w += r[i] * r[i];
+    }
+    MPI_Allreduce(&w, &rr1, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+    beta = rr1 / rr;
+    /* p = r + beta * p */
+    for (i = 0 ; i < me - ms ; i++){
+      p[i] = r[i] + beta * p[i];
+    }
+    rr = rr1;
+    count++;
+  }
+  if (myid == 0) printf("count=%d\n",count);
+  /* Deallocate Working Spaces */
+  free(r);
+  free(p);
+  free(Ap);
+  free(work);
+}
+/* compute velocity */
+void slet2d_velocity_dist(int np, int *ostart,int *oend,double *loc, double *foc, double ep,
+                          double *cloc,double *cvel){
+  int i,p;
+  double r;
+  double dx,dy;
+  double tr1,tr2;
+  double ov[2];
+  int myid,numproc;
+  int li;
+  /* get myid and # of processors */
+  MPI_Comm_size(MPI_COMM_WORLD,&numproc);
+  MPI_Comm_rank(MPI_COMM_WORLD,&myid);
+  /* zeros */
+  ov[0] = 0.0;
+  ov[1] = 0.0;
+  /* loop for partilces (blob) */
+  for (i = ostart[myid] ; i < oend[myid] ; i++){
+    li = i - ostart[myid];
+    dx = cloc[0] - loc[li * DIM    ];
+    dy = cloc[1] - loc[li * DIM + 1];
+    r = sqrt(dx * dx + dy * dy);
+    tr1 = term1(r,ep) / (4.0 * M_PI);
+    tr2 = term2(r,ep) / (4.0 * M_PI);
+    tr2 *= foc[li * DIM] * dx + foc[li * DIM + 1] * dy;
+    ov[0] += -foc[li * DIM    ] * tr1 + tr2 * dx;
+    ov[1] += -foc[li * DIM + 1] * tr1 + tr2 * dy;
+  }
+  /* sum up to CPU0 */
+  MPI_Reduce(ov, cvel, 2, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
+}
+double term1(double r,double ep){
+  double sq;
+  sq = sqrt(r * r + ep * ep);
+  return log(sq + ep) - ep * (sq + 2.0 * ep) / (sq + ep) / sq;
+}
+double term2(double r,double ep){
+  double sq;
+  sq = sqrt(r * r + ep * ep);
+  return (sq + 2.0 * ep) / (sq + ep) / (sq + ep) / sq;
+}
+}}}