= Ex32.c = {{{#!C /* Large Scale Computing Stokes Flow in a Cavity ex32.c Need Lapack and Blas Libraries */ #include #include #include #include #include"stokeslet2d.h" #define EPSILON 0.005 /* blob size*/ #define INTGRID 51 /* # of x-grid lines for internal velocity */ /* min and max macros */ #define max(a,b) (((a) > (b)) ? (a) : (b)) #define min(a,b) (((a) < (b)) ? (a) : (b)) double t_cost(void ); int main(int , char **); int main(int argc, char **argv){ double dp; int numpdepth; int numpwidth; int numparticles; double *loc; /* particle location */ double *vel; /* particle velocity */ double *foc; /* particle force */ double *mat; /* 2Nx2N dense matrix */ double *cloc; /* array for internal points */ double *cvel; /* array for internal velocity */ int i,j; int nyg; FILE *fp; if (argc < 2){ printf("Usage:%s [Depth of Cavity]\n",argv[0]); exit(-1); } /* get inputed depth */ dp = atof(argv[1]); /* # of particles in depth */ numpdepth = (int)(dp / EPSILON + 0.5); /* # of particles in width */ numpwidth = (int)(1.0 / EPSILON + 0.5); /* total # od particles */ numparticles = numpdepth * 2 + numpwidth * 2; printf("Total # of Particles=%d\n",numparticles); /* Allocate Space */ /* DIM=2 defined by 'stokeslet2d.h' */ loc = (double *)calloc(numparticles * DIM,sizeof(double)); vel = (double *)calloc(numparticles * DIM,sizeof(double)); foc = (double *)calloc(numparticles * DIM,sizeof(double)); mat = (double *)calloc(numparticles * DIM * numparticles * DIM,sizeof(double)); if ((loc == (double *)NULL) || (vel == (double *)NULL) || (foc == (double *)NULL) || (mat == (double *)NULL) ){ printf("Can't allocate memory!!!\n"); exit(-1); } /* set location & velocity of particles (blob)*/ for (i = 0 ; i < numparticles ; i++){ foc[i * DIM] = 0.0; foc[i * DIM + 1] = 0.0; if ((i >= 0) && (i <= numpwidth)){ /* top */ loc[i * DIM] = -0.5 + EPSILON * (double)i; /* x */ loc[i * DIM + 1] = 0.0; /* y */ vel[i * DIM] = 1.0; vel[i * DIM + 1] = 0.0; }else{ if (i <= (numpwidth + numpdepth)){ /* right wall */ loc[i * DIM] = 0.5; /* x */ loc[i * DIM + 1] = -EPSILON * (double)(i - numpwidth); /* y */ vel[i * DIM] = 0.0; vel[i * DIM + 1] = 0.0; }else{ if (i <= (2 * numpwidth + numpdepth)){ /* bottom */ loc[i * DIM] = 0.5 - EPSILON * (double)(i - (numpwidth + numpdepth)); /* x */ loc[i * DIM + 1] = -EPSILON * numpdepth; /* y */ vel[i * DIM] = 0.0; vel[i * DIM + 1] = 0.0; }else{ /* left wall */ loc[i * DIM] = -0.5; /* x */ loc[i * DIM + 1] = -EPSILON * (double)((2 * numpwidth + 2 * numpdepth) - i); /* y */ vel[i * DIM] = 0.0; vel[i * DIM + 1] = 0.0; } } } } /* make 2Nx2N Matrix */ t_cost(); slet2d_mkMatrix(numparticles,loc,EPSILON,mat); printf("setting matrix %f sec\n",t_cost()); /* Sovle linear ststem */ slet2d_solve(numparticles,mat,vel,foc); //slet2d_solve_CG(numparticles,mat,vel,foc); //slet2d_solve_GMRES(numparticles,mat,vel,foc); printf("Sovle linear ststem %f sec\n",t_cost()); /* check the solution */ slet2d_mkMatrix(numparticles,loc,EPSILON,mat); check_solution(numparticles,mat,vel,foc); /* deallocate big matrix */ free(mat); /* out particle (blob) data */ fp = fopen("particle.dat","w"); for (i = 0 ; i < numparticles ; i++){ fprintf(fp,"%e %e %e %e %e %e\n", loc[i * DIM],loc[i * DIM + 1], vel[i * DIM],vel[i * DIM + 1], foc[i * DIM],foc[i * DIM + 1]); } fclose(fp); /* compute internal velocity */ nyg = (int)(EPSILON * (double)(numpdepth * (INTGRID - 1))); cvel = (double *)calloc(INTGRID * nyg * DIM, sizeof(double)); cloc = (double *)calloc(INTGRID * nyg * DIM, sizeof(double)); if ((cloc == (double *)NULL) || (cvel == (double *)NULL)){ printf("Can't allocate memory!!!\n"); exit(-1); } /* setting location */ for (j = 0 ; j < nyg ; j++){ for (i = 0 ; i < INTGRID ; i++){ cloc[DIM * (i + INTGRID * j) ] = -0.5 + (double)i / (double)(INTGRID - 1); cloc[DIM * (i + INTGRID * j) + 1] = -(double)j / (double)(INTGRID - 1); } } /* compute velocities */ t_cost(); slet2d_velocity(numparticles, loc, foc, EPSILON, INTGRID * nyg, cloc, cvel); printf("Compute internal velocity %f sec\n",t_cost()); /* out velocities */ fp = fopen("res.dat","w"); for (j = 0 ; j < nyg ; j++){ for (i = 0 ; i < INTGRID ; i++){ fprintf(fp,"%e %e %e %e\n", cloc[DIM * (i + INTGRID * j) ], cloc[DIM * (i + INTGRID * j) + 1], cvel[DIM * (i + INTGRID * j) ], cvel[DIM * (i + INTGRID * j) + 1]); } } fclose(fp); /* free */ free(cloc); free(cvel); free(loc); free(vel); free(foc); } /* for timing */ double t_cost(void ){ static clock_t ptm; static int fl = 0; clock_t tm; double sec; tm = clock(); if (fl == 0) ptm = tm; fl = 1; sec = (double)(tm - ptm) / (double)CLOCKS_PER_SEC; ptm = tm; return sec; } stokeslet2d.c /* Large Scale Computing Stokeslet for 2D Ricardo Cortez,"The Method of Regularized Stokeslets", (2001), SIAM J. Sci. Comput., Vol.23, No.4, pp.1204 assuming mu = 1 */ #include #include #include #include"stokeslet2d.h" #include"gmres.h" double term1(double ,double ); double term2(double ,double ); /* Blas Function */ double ddot_(int *,double *,int *,double *,int *); double dnrm2_(int *, double *, int *); /* make Matrix */ void slet2d_mkMatrix(int np,double *loc,double ep,double *mat){ int i,j; double r; double dx,dy; double tr1,tr2; /*zeros mat*/ for (i = 0 ; i < DIM * DIM * np * np ; i++) mat[i] = 0.0; /* make mat */ /* Because of symmetric, we can loop over half */ for (i = 0 ; i < np ; i++){ for (j = i ; j < np ; j++){ dx = loc[i * DIM ] - loc[j * DIM ]; dy = loc[i * DIM + 1] - loc[j * DIM + 1]; r = sqrt(dx * dx + dy * dy); tr1 = term1(r,ep) / (4.0 * M_PI); tr2 = term2(r,ep) / (4.0 * M_PI); /* diagoanl elements and upper half */ mat[i * DIM + j * DIM * np * DIM] += -tr1 + tr2 * dx * dx; mat[i * DIM + (j * DIM + 1) * np * DIM] += tr2 * dx * dy; mat[i * DIM + 1 + j * DIM * np * DIM] += tr2 * dx * dy; mat[i * DIM + 1 + (j * DIM + 1) * np * DIM] += -tr1 + tr2 * dy * dy; if (i != j){ /* lower half */ mat[j * DIM + i * DIM * np * DIM] += -tr1 + tr2 * dx * dx; mat[j * DIM + (i * DIM + 1) * np * DIM] += tr2 * dx * dy; mat[j * DIM + 1 + i * DIM * np * DIM] += tr2 * dx * dy; mat[j * DIM + 1 + (i * DIM + 1) * np * DIM] += -tr1 + tr2 * dy * dy; } } } } /* Sovle Liear ststem */ /* Use Lapack */ void slet2d_solve(int np,double *mat,double *vel,double *foc){ int nrhs; int n; int lda; int ldb; int *ipiv; int i; int info; /*solve Ax=b */ /* A (amat) is n x n matrix */ nrhs = 1; /* # of RHS is 1 */ n = DIM * np; lda = DIM * np; ldb = DIM * np; ipiv = calloc(DIM * np,sizeof(int)); /* The pivot indices that define the permutation matrix P; row i of the matrix was interchanged with row IPIV(i). */ if (ipiv == NULL) exit(-1); /* set RHS */ for (i = 0 ; i < n ; i++) foc[i] = vel[i]; /* solve with LAPACK */ dgesv_(&n, &nrhs, mat, &lda, ipiv, foc, &ldb, &info); /* check */ /* if (info == 0) printf("successfully done\n"); */ if (info < 0){ printf("the %d-th argument had an illegal value\n",-info); exit(-1); } if (info > 0){ printf("U(%d,%d) is exactly zero.\n",info,info); exit(-1); } /* free */ free(ipiv); } /* CG use blas */ void slet2d_solve_CG(int np,double *mat,double *b,double *x){ double tol = 1.0e-8; double *r; double *p; double *Ap; double rr,pAp,rr1,alpha,beta; int i,j,count; double al,bt,normb; char tr[1] = "N"; int inc,n; n = DIM * np; inc = 1; /* Check if the solution is trivial. */ normb = dnrm2_(&n, b, &inc); if (normb <= 0.0){ for (i = 0 ; i < n ; i++) x[i] = 0.0; return; } /* Allocate Working Spaces */ r = (double *)calloc(n, sizeof(double)); p = (double *)calloc(n, sizeof(double)); Ap = (double *)calloc(n, sizeof(double)); if ((r == NULL) || (p == NULL) || (Ap == NULL)){ printf("memory allocation failed\n"); return ; } /* compute r0 = b - Ax */ al = -1.0; bt = 1.0; dcopy_(&n,b,&inc,r,&inc); /* r = b */ dgemv_(tr,&n,&n,&al,mat,&n,x,&inc,&bt,r,&inc); rr = 0.0; dcopy_(&n,r,&inc,p,&inc); /* p = r */ rr = ddot_(&n,r,&inc,r,&inc); /* rr = r.r */ printf("rr=%e\n",rr); /* cg iteration */ count = 0; while(rr > tol * tol * normb * normb){ // Ap = A*p al = 1.0; bt = 0.0; dgemv_(tr,&n,&n,&al,mat,&n,p,&inc,&bt,Ap,&inc); /* alpha = r.r / p.Ap */ pAp = 0.0; pAp = ddot_(&n,p,&inc,Ap,&inc); /* pAp = p.Ap */ alpha = rr / pAp; /* Beta */ rr1 = 0.0; daxpy_(&n,&alpha,p,&inc,x,&inc); /* x += alpha * p */ al = -alpha; daxpy_(&n,&al,Ap,&inc,r,&inc); /* r -= alpha * Ap */ rr1 = ddot_(&n,r,&inc,r,&inc); /* rr1 = r.r */ beta = rr1 / rr; /* p = r + beta * p no blas routine :( */ for (i = 0 ; i < n ; i++){ p[i] = r[i] + beta * p[i]; } rr = rr1; count++; } /* Deallocate Working Spaces */ free(r); free(p); free(Ap); } void check_solution(int np,double *mat,double *b,double *x){ double *r; double al,bt,rr; char tr[1] = "N"; int inc,n; n = DIM * np; inc = 1; /* Allocate Working Spaces */ r = (double *)calloc(n, sizeof(double)); /* compute r0 = b - Ax */ al = -1.0; bt = 1.0; dcopy_(&n,b,&inc,r,&inc); /* r = b */ dgemv_(tr,&n,&n,&al,mat,&n,x,&inc,&bt,r,&inc); /* rr = |r| */ rr = dnrm2_(&n, r, &inc); printf("|b - Ax|=%e\n",rr); free(r); } /* Use Lapack */ void slet2d_solve_GMRES(int np,double *mat,double *vel,double *foc){ int n; int restart; int max_iterations; double tol; /* set GMRES parameters */ restart = 100; max_iterations = 100; tol = 1.0e-8; /*solve Ax=b */ /* A (amat) is n x n matrix */ n = DIM * np; /* solve with GMRES */ gmres_solve(mat, foc, vel, n, restart, &tol, &max_iterations); } /* compute velocity */ void slet2d_velocity(int np, double *loc, double *foc, double ep, int nump, double *cloc,double *cvel){ int i,p; double r; double dx,dy; double tr1,tr2; /* loop for grid */ for (p = 0 ; p < nump ; p++){ /* zeros */ cvel[p * DIM ] = 0.0; cvel[p * DIM + 1] = 0.0; /* loop for partilces (blob) */ for (i = 0 ; i < np ; i++){ dx = cloc[p * DIM ] - loc[i * DIM ]; dy = cloc[p * DIM + 1] - loc[i * DIM + 1]; r = sqrt(dx * dx + dy * dy); tr1 = term1(r,ep) / (4.0 * M_PI); tr2 = term2(r,ep) / (4.0 * M_PI); tr2 *= foc[i * DIM] * dx + foc[i * DIM + 1] * dy; cvel[p * DIM ] += -foc[i * DIM ] * tr1 + tr2 * dx; cvel[p * DIM + 1] += -foc[i * DIM + 1] * tr1 + tr2 * dy; } } } double term1(double r,double ep){ double sq; sq = sqrt(r * r + ep * ep); return log(sq + ep) - ep * (sq + 2.0 * ep) / (sq + ep) / sq; } double term2(double r,double ep){ double sq; sq = sqrt(r * r + ep * ep); return (sq + 2.0 * ep) / (sq + ep) / (sq + ep) / sq; }}}}