= Ex32.f90 =
{{{#!fortran
!
!  Large Scale Computing
!  Stokes Flow in a Cavity
!  ex32.f90
!
!  Need Lapack and Blas Libraries
!
program StokesFlowInCavity
  use stokeslet2d
  implicit none
  double precision :: t_cost
  double precision,parameter :: EPSILON = 0.005 ! blob size
  integer,parameter :: INTGRID = 51  ! # of x-grid lines for internal velocity 
  double precision :: dp,dm
  integer :: numpdepth
  integer :: numpwidth
  integer :: numparticles
  double precision,dimension(:),allocatable :: loc ! particle location
  double precision,dimension(:),allocatable :: vel ! particle velocity
  double precision,dimension(:),allocatable :: foc ! particle force
  double precision,dimension(:),allocatable :: mat ! 2Nx2N dense matrix
  double precision,dimension(:),allocatable :: cloc ! array for internal points
  double precision,dimension(:),allocatable :: cvel; ! array for internal velocity
  integer :: i,j
  integer nyg
  character(len=32) :: arg
 
  ! Newer version of Fortran can get commadline arguments
  call get_command_argument(1,arg)
  if (len_trim(arg) == 0) then
     call get_command_argument(0,arg)
     print *,arg," [Depth of Cavity]"
     call exit(-1)
  end if
 
  ! get inputed depth
  read(arg,*) dp
 
  ! # of particles in depth 
  numpdepth = int(dp / EPSILON + 0.5)
 
  ! # of particles in width 
  numpwidth = int(1.0 / EPSILON + 0.5)
 
  ! total # od particles
  numparticles = numpdepth * 2 + numpwidth * 2
  print *,"Total # of Particles=",numparticles
 
  ! Allocate Space 
  ! DIM=2 defined by 'stokeslet2d.f90'
  allocate(loc(numparticles * DIM))
  allocate(vel(numparticles * DIM))
  allocate(foc(numparticles * DIM))
  allocate(mat(numparticles * DIM * numparticles * DIM))
 
  ! set location & velocity of particles (blob)
  do i = 0, numparticles-1
    foc(i * DIM + 1) = 0.d0;
    foc(i * DIM + 2) = 0.d0;
    if ((i >= 0) .AND. (i <= numpwidth)) then ! top 
       loc(i * DIM + 1) = -0.5 + EPSILON * dble(i) ! x 
       loc(i * DIM + 2) = 0.d0 !  y 
       vel(i * DIM + 1) = 1.d0
       vel(i * DIM + 2) = 0.d0
    else
       if (i <= (numpwidth + numpdepth)) then ! right wall
          loc(i * DIM + 1) = 0.5 ! x 
          loc(i * DIM + 2) = -EPSILON * dble(i - numpwidth) ! y
          vel(i * DIM + 1) = 0.d0
          vel(i * DIM + 2) = 0.d0
       else
          if (i <= (2 * numpwidth + numpdepth)) then ! bottom
             loc(i * DIM + 1) = 0.5 - EPSILON * dble(i - (numpwidth + numpdepth))! x 
             loc(i * DIM + 2) = -EPSILON * numpdepth ! y 
             vel(i * DIM + 1) = 0.d0
             vel(i * DIM + 2) = 0.d0	  
          else                 ! left wall
             loc(i * DIM + 1) = -0.5 ! x
             loc(i * DIM + 2) = -EPSILON * dble((2 * numpwidth + 2 * numpdepth) - i) ! y
             vel(i * DIM + 1) = 0.d0
             vel(i * DIM + 2) = 0.d0	  
          end if
       end if
    end if
 end do
 
 ! make 2Nx2N Matrix 
 dm=t_cost();
 call slet2d_mkMatrix(numparticles,loc,EPSILON,mat)
 print *,"setting matrix ",t_cost(),"(sec)"
 
 ! Sovle linear ststem 
 call slet2d_solve(numparticles,mat,vel,foc)
 !call slet2d_solve_CG(numparticles,mat,vel,foc)
 
 print *,"Sovle linear ststem",t_cost(),"(sec)"
 
 ! check the solution 
 call slet2d_mkMatrix(numparticles,loc,EPSILON,mat)
 call check_solution(numparticles,mat,vel,foc)
 
 ! deallocate big matrix 
 deallocate(mat)
 
 ! out particle (blob) data 
 open(10,FILE='particle.dat')
 do i = 0, numparticles-1
    write(10,'(6e16.8)') loc(i * DIM + 1),loc(i * DIM + 2), &
         vel(i * DIM + 1),vel(i * DIM + 2),  &
         foc(i * DIM + 1),foc(i * DIM + 2)
 end do
 close(10)
 
 ! 
 !  compute internal velocity 
 !
 nyg = int(EPSILON * dble(numpdepth * (INTGRID - 1)))
 !nyg = numpdepth * (INTGRID - 1) + 1
 allocate(cvel(INTGRID * nyg * DIM))
 allocate(cloc(INTGRID * nyg * DIM))
 !print *,"Internal Grid",INTGRID,"x",nyg
 
 ! setting location 
 do j = 0, nyg - 1
    do i = 0,INTGRID - 1
       cloc(DIM * (i + INTGRID * j) + 1) = -0.5 + dble(i) / dble(INTGRID - 1)
       cloc(DIM * (i + INTGRID * j) + 2) = -dble(j) / dble(INTGRID - 1)
    end do
 end do
 
 ! compute velocities 
 dm=t_cost();
 call slet2d_velocity(numparticles, loc, foc, EPSILON, INTGRID * nyg, cloc, cvel)
 print *,"Compute internal velocity",t_cost(),"(sec)"
 
 ! out velocities 
 open(20,FILE='res.dat')
 do j = 0, nyg - 1
    do i = 0, INTGRID - 1
       write(20,'(4e16.8)') cloc(DIM * (i + INTGRID * j) + 1), cloc(DIM * (i + INTGRID * j) + 2),&
            cvel(DIM * (i + INTGRID * j) + 1), cvel(DIM * (i + INTGRID * j) + 2)
    end do
 end do
 close(20)
 
 ! free 
 deallocate(cloc)
 deallocate(cvel)
 
 deallocate(loc)
 deallocate(vel)
 deallocate(foc)
end program StokesFlowInCavity
 
! for timing 
double precision function t_cost()
  real, save :: ptm
  integer,save :: fl = 0
  real :: tm,sec;
 
  call cpu_time(tm);
  if (fl == 0) then
     ptm = tm
  end if
  fl = 1;
 
  sec = tm - ptm
 
  ptm = tm
 
  t_cost = sec
  return
end function t_cost
stokeslet2d.f90

!
!  Large Scale Computing
!  Stokeslet for 2D
!  
!  Ricardo Cortez,"The Method of Regularized Stokeslets", 
!  (2001), SIAM J. Sci. Comput., Vol.23, No.4, pp.1204
!  
!  assuming mu = 1
!
module stokeslet2d
  implicit none
  parameter DIM=2
  REAL, PARAMETER :: PI = 3.141592653589793238462643383279502884197169399375
contains
 
  ! make Matrix 
  subroutine slet2d_mkMatrix(np,loc,ep,mat)
    integer :: np
    double precision, dimension(:), allocatable :: loc,mat
    double precision :: ep
    integer :: i,j
    double precision :: r
    double precision :: dx,dy
    double precision tr1,tr2
 
    !zeros mat
    do i = 1,  DIM * DIM * np * np
       mat(i) = 0.d0
    end do
 
    ! make mat
    ! Because of symmetric, we can loop over a half  
    do i = 0, np-1
       do j = i, np-1
          dx = loc(i * DIM + 1) - loc(j * DIM + 1)
          dy = loc(i * DIM + 2) - loc(j * DIM + 2)
          r = sqrt(dx * dx + dy * dy);
 
          tr1 = term1(r,ep) / (4.d0 * PI);
          tr2 = term2(r,ep) / (4.d0 * PI); 
 
          ! diagoanl elements and upper half 
          mat(i * DIM + 1  + j * DIM      * np * DIM) = &
               mat(i * DIM + 1  + j * DIM      * np * DIM) - tr1 + tr2 * dx * dx
          mat(i * DIM + 1 + (j * DIM + 1) * np * DIM) = &
               mat(i * DIM + 1 + (j * DIM + 1) * np * DIM) + tr2 * dx * dy
          mat(i * DIM + 2 +  j * DIM      * np * DIM) = &
               mat(i * DIM + 2 +  j * DIM      * np * DIM) + tr2 * dx * dy
          mat(i * DIM + 2 + (j * DIM + 1) * np * DIM) = &
               mat(i * DIM + 2 + (j * DIM + 1) * np * DIM) - tr1 + tr2 * dy * dy
          if (i /= j) then
             ! lower half 
             mat(j * DIM + 1 +  i * DIM      * np * DIM) = &
                  mat(j * DIM + 1 +  i * DIM      * np * DIM) - tr1 + tr2 * dx * dx
             mat(j * DIM + 1 + (i * DIM + 1) * np * DIM) = &
                  mat(j * DIM + 1 + (i * DIM + 1) * np * DIM) + tr2 * dx * dy
             mat(j * DIM + 2 +  i * DIM      * np * DIM) = &
                  mat(j * DIM + 2 +  i * DIM      * np * DIM) + tr2 * dx * dy
             mat(j * DIM + 2 + (i * DIM + 1) * np * DIM) = &
                  mat(j * DIM + 2 + (i * DIM + 1) * np * DIM) - tr1 + tr2 * dy * dy
          end if
       end do
    end do
    return
  end subroutine slet2d_mkMatrix
 
  ! Sovle Liear ststem 
  ! Use Lapack 
  subroutine slet2d_solve(np,mat,vel,foc)
    external dgesv
    integer ::  np
    double precision, dimension(:), allocatable :: mat,vel,foc
    integer :: nrhs
    integer :: n
    integer :: lda
    integer :: ldb
    integer,dimension(:),allocatable :: ipiv;
    integer :: i;
    integer :: info;
 
    ! solve Ax=b 
    ! A (amat) is n x n matrix 
    nrhs = 1 ! # of RHS is 1 
    n = DIM * np
    lda = DIM * np
    ldb = DIM * np
    ! The pivot indices that define the permutation matrix P
    ! row i of the matrix was interchanged with row IPIV(i). 
    allocate(ipiv(DIM * np))
 
    ! set RHS 
    do i = 1, n
       foc(i) = vel(i)
    end do
 
    ! solve with LAPACK 
    call dgesv(n, nrhs, mat, lda, ipiv, foc, ldb, info)
 
    ! check 
    ! if (info == 0) printf("successfully done\n"); 
    if (info < 0) then
       print *,"the",-info,"-th argument had an illegal value"
       call exit(-1)
    end if
    if (info > 0) then
       print *,"U(",info,",",info,") is exactly zero."
       call exit(-1)
    end if
 
    ! free 
    deallocate(ipiv)
    return
  end subroutine slet2d_solve
 
  ! CG use blas 
  subroutine slet2d_solve_CG(np,mat,b,x)
    external dnrm2,dcopy,dgemv,ddot,daxpy
    double precision :: ddot, dnrm2
    integer :: np
    double precision,dimension(:),allocatable :: mat,b,x
    double precision,parameter :: tol = 1.d-8
    double precision,dimension(:),allocatable :: r
    double precision,dimension(:),allocatable :: p
    double precision,dimension(:),allocatable :: Ap
    double precision :: rr,pAp,rr1,alpha,beta
    integer :: i,j,count
    double precision :: al,bt,normb
    character(len=1) ::  tr = "N"
    integer :: inc,n
 
    n = DIM * np
    inc = 1
 
    ! Check if the solution is trivial.
    normb = dnrm2(n, b, inc)
    if (normb <= 0.0) then
       do i = 1, n
          x(i) = 0.d0
       end do
       return
    end if
 
    ! Allocate Working Spaces 
    allocate(r(n))
    allocate(p(n))
    allocate(Ap(n))
 
    ! compute r0 = b - Ax 
    al = -1.d0
    bt = 1.d0
    call dcopy(n,b,inc,r,inc) ! r = b 
    call dgemv(tr,n,n,al,mat,n,x,inc,bt,r,inc) ! r=r-Ax
 
    rr = 0.d0
    call dcopy(n,r,inc,p,inc) ! p = r 
    rr = ddot(n,r,inc,r,inc) ! rr = r.r
    print *,"rr=",rr
 
    ! cg iteration 
    count = 0
    do while(rr  > tol * tol * normb * normb) 
       ! Ap = A*p
       al = 1.d0
       bt = 0.d0
       call dgemv(tr,n,n,al,mat,n,p,inc,bt,Ap,inc)
 
       ! alpha = r.r / p.Ap 
       pAp = 0.d0
       pAp = ddot(n,p,inc,Ap,inc)  ! pAp = p.Ap 
       alpha = rr / pAp
 
       ! Beta 
       rr1 = 0.d0
       call daxpy(n,alpha,p,inc,x,inc) ! x += alpha * p
       al = -alpha
       call daxpy(n,al,Ap,inc,r,inc) ! r -= alpha * Ap
       rr1 = ddot(n,r,inc,r,inc) ! rr1 = r.r
 
       beta = rr1 / rr
       ! p = r + beta * p  no blas routine :( 
       do i = 1,n
          p(i) = r(i) + beta * p(i)
       end do
 
       rr = rr1
       count = count + 1
    end do
 
    ! Deallocate Working Spaces 
    deallocate(r)
    deallocate(p)
    deallocate(Ap)
    return
  end subroutine slet2d_solve_CG
 
  subroutine check_solution(np,mat,b,x)
    external dcopy,dgemv,dnrm2
    double precision :: dnrm2
    integer :: np
    double precision,dimension(:),allocatable :: mat,b,x
    double precision,dimension(:),allocatable :: r
    double precision :: al,bt,rr
    character(len=1) :: tr = "N"
    integer :: inc,n
 
    n = DIM * np
    inc = 1
 
    ! Allocate Working Spaces 
    allocate(r(n))
 
    ! compute r0 = b - Ax 
    al = -1.d0
    bt = 1.d0
    call dcopy(n,b,inc,r,inc) ! r = b 
    call dgemv(tr,n,n,al,mat,n,x,inc,bt,r,inc)
 
    ! rr = |r| 
    rr = dnrm2(n, r, inc)
 
    print *,"|b - Ax|=",rr
    deallocate(r)
    return
  end subroutine check_solution
 
  !
  ! compute velocity 
  subroutine slet2d_velocity(np, loc, foc, ep, nump, cloc,cvel)
    integer :: np
    double precision,dimension(:),allocatable :: loc, foc
    double precision :: ep
    integer :: nump
    double precision,dimension(:),allocatable :: cloc,cvel
    integer :: i,p
    double precision :: r
    double precision :: dx,dy
    double precision :: tr1,tr2
 
    ! loop for grid 
    do p = 0, nump-1
       ! zeros 
       cvel(p * DIM + 1) = 0.d0
       cvel(p * DIM + 2) = 0.d0
 
       ! loop for partilces (blob)
       do i = 0, np-1
          dx = cloc(p * DIM + 1) - loc(i * DIM + 1)
          dy = cloc(p * DIM + 2) - loc(i * DIM + 2)
          r = sqrt(dx * dx + dy * dy)
 
          tr1 = term1(r,ep) / (4.d0 * PI)
          tr2 = term2(r,ep) / (4.d0 * PI)
 
          tr2 = tr2 * (foc(i * DIM + 1) * dx + foc(i * DIM + 2) * dy)
 
          cvel(p * DIM + 1) = cvel(p * DIM + 1) - foc(i * DIM + 1) * tr1 + tr2 * dx
          cvel(p * DIM + 2) = cvel(p * DIM + 2) - foc(i * DIM + 2) * tr1 + tr2 * dy
       end do
    end do
    return
  end subroutine slet2d_velocity
 
  double precision function term1(r,ep)
    double precision :: r,ep
    double precision ::sq
 
    sq = sqrt(r * r + ep * ep)
 
    term1 = log(sq + ep) - ep * (sq + 2.d0 * ep) / (sq + ep) / sq
    return
  end function term1
 
  double precision function term2(r,ep)
    double precision :: r,ep
    double precision :: sq
 
    sq = sqrt(r * r + ep * ep)
 
    term2 = (sq + 2.d0 * ep) / (sq + ep) / (sq + ep) / sq
    return
  end function term2
  !
end module stokeslet2d
}}}