= Blas Level 2 = == C == {{{#!c /* Tulane HPC Workshop Blas Level 2 : dgemv test */ #include #include #include #include #ifdef __APPLE__ #include #else #ifdef MKL_ILP64 #include #include #else #include #endif #endif #ifdef _OPENMP #include #endif #define SIZE 10000 /* my simple dgemv */ void mydgemv(int m, int n, double al, double *a, double *x, double bt, double *b) { int i, j; /* compute b = al*a*x + bt*b */ for (i = 0; i < n; i++) { b[i] *= bt; for (j = 0; j < m; j++) { b[i] += al * a[i + m * j] * x[j]; } } /* for (i = 0; i < n; i++) { b[i] *= bt; } for (j = 0; j < m; j++) { for (i = 0; i < n; i++) { b[i] += al * a[i + m * j] * x[j]; } } */ } int main(void) { int i, j, n, m, inc; double *xvec; double *bvec; double *amat; double alpha, beta; #ifdef _OPENMP double tsomp, teomp; #endif clock_t ts, te; /* alloc vector and matrix */ #ifdef MKL_ILP64 xvec = (double *)mkl_malloc(SIZE * sizeof(double), 64); bvec = (double *)mkl_malloc(SIZE * sizeof(double), 64); amat = (double *)mkl_malloc(SIZE * SIZE * sizeof(double), 64); #else xvec = (double *)calloc(SIZE, sizeof(double)); bvec = (double *)calloc(SIZE, sizeof(double)); amat = (double *)calloc(SIZE * SIZE, sizeof(double)); #endif if ((xvec == NULL ) || (bvec == NULL ) || (amat == NULL )) { exit(-1); } /* setup matrix A */ for (i = 0; i < SIZE; i++) { for (j = 0; j < SIZE; j++) { amat[i + SIZE * j] = i + 100.0 / (1.0 + fabs((double) i - (double) j)); } } /* setup vector x */ for (i = 0; i < SIZE; i++) xvec[i] = (double) i; /* calculate bvec = alpha*amat*xvec + beta*bvec */ alpha = 1.0; beta = 0.0; /* call my dgemv */ ts = clock(); mydgemv(SIZE, SIZE, alpha, amat, xvec, beta, bvec); te = clock(); printf("|b|=%f\n", cblas_dnrm2(SIZE, bvec, 1)); printf("Time : my dgemv = %f (sec)\n", 1.0 * (te - ts) / CLOCKS_PER_SEC); /* call blas dgemv */ #ifdef _OPENMP tsomp = omp_get_wtime(); #else ts = clock(); #endif cblas_dgemv(CblasColMajor, CblasNoTrans, SIZE, SIZE, 1.0, amat, SIZE, xvec,1, 0.0, bvec, 1); //cblas_dgemv(CblasRowMajor,CblasTrans,SIZE,SIZE,1.0,amat,SIZE,xvec,1,0.0,bvec,1); printf("|b|=%f\n", cblas_dnrm2(SIZE, bvec, 1)); #ifdef _OPENMP teomp = omp_get_wtime(); printf("Time : cblas dgemv = %f (sec) Threads=%d\n", (teomp - tsomp),omp_get_max_threads()); #else te = clock(); printf("Time : cblas dgemv = %f (sec)\n", 1.0 * (te - ts) / CLOCKS_PER_SEC); #endif #ifdef MKL_ILP64 mkl_free(xvec); mkl_free(bvec); mkl_free(amat); #else free(xvec); free(bvec); free(amat); #endif } }}} == C++ == {{{#!c++ // // Tulane HPC Workshop // // Blas Level 2 : dgemv test // // #include #include #include #include #include #include #include #ifdef __APPLE__ #include #else #ifdef MKL_ILP64 #include #include #else extern "C" { #include } #endif #endif #ifdef OMP #include #endif /* my simple dgemv */ void mydgemv(int m, int n, double al, double *a, double *x, double bt, double *b) { /* compute b = al*a*x + bt*b */ for (int i = 0; i < n; i++) { b[i] *= bt; for (int j = 0; j < m; j++) { b[i] += al * a[i + m * j] * x[j]; } } /* // should test which routine is faster for (int i = 0 ; i < n ; i++){ b[i] *= bt; } for (int j = 0 ; j < m ; j++){ for (int i = 0 ; i < n ; i++){ b[i] += al * a[i + m * j] * x[j]; } } */ } int main(int argc, char **argv) { const unsigned int size = 10000; /* alloc vector and matrix */ #ifdef MKL_ILP64 double *xvec = (double *)mkl_malloc(size * sizeof(double), 64); double *bvec = (double *)mkl_malloc(size * sizeof(double), 64); double *amat = (double *)mkl_malloc(size * size * sizeof(double), 64); #else double *xvec = new double[size]; double *bvec = new double[size]; double *amat = new double[size * size]; #endif /* setup matrix A */ for (int i = 0; i < size; i++) { for (int j = 0; j < size; j++) { amat[i + size * j] = i + 100.0 / (1.0 + std::abs(i - j)); } } /* setup vector x */ for (int i = 0; i < size; i++) xvec[i] = i; #ifdef _OPENMP double tsomp, teomp; #endif clock_t ts, te; /* call my dgemv */ ts = std::clock(); mydgemv(size, size, 1.0, amat, xvec, 0.0, bvec); te = std::clock(); std::cout << "|b|=" << cblas_dnrm2(size, bvec, 1) << std::endl; std::cout << "Time : my dgemv = " << 1.0 * (te - ts) / CLOCKS_PER_SEC << "(sec)\n"; /* call blas dgemv */ #ifdef _OPENMP tsomp = omp_get_wtime(); #else ts = std::clock(); #endif cblas_dgemv(CblasColMajor, CblasNoTrans, size, size, 1.0, amat, size, xvec, 1, 0.0, bvec, 1); //cblas_dgemv(CblasRowMajor,CblasTrans,size,size,1.0,amat,size,xvec,1,0.0,bvec,1); std::cout << "|b|=" << cblas_dnrm2(size,bvec,1) << std::endl; #ifdef _OPENMP teomp = omp_get_wtime(); std::cout << "Time : cblas dgemv = " << (teomp - tsomp) << "(sec) Threads=" << omp_get_max_threads() << std::endl; #else te = std::clock(); std::cout << "Time : cblas dgemv = " << 1.0 * (te - ts) / CLOCKS_PER_SEC << "(sec)\n"; #endif #ifdef MKL_ILP64 mkl_free(xvec); mkl_free(bvec); mkl_free(amat); #else delete[] xvec; delete[] bvec; delete[] amat; #endif } }}} == Fortran == {{{#!fortran ! ! Tulane HPC Workshop ! ! Blas Level 2 : dgemv test ! ! !------------------------------------------- ! my simple dgemv subroutine mydgemv(m, n, al, a, x, bt, b) implicit none integer :: i, j, n, m double precision :: x(m), b(n), a(n * m) double precision :: al, bt ! compute b = al*a*x + bt*b */ do i = 1,n b(i) = b(i) * bt do j = 1, m b(i) = b(i) + al * a(i + m * (j - 1)) * x(j) end do end do end subroutine mydgemv ! Program blas2 use omp_lib !Provides OpenMP* specific APIs implicit none integer, parameter:: SIZE = 5000 integer :: i, j double precision, allocatable, dimension(:) :: xvec,bvec,amat double precision :: ts, te double precision :: dnrm2 ! alloc vector and matrix allocate(xvec(SIZE),bvec(SIZE),amat(SIZE*SIZE)) ! setup matrix A do i = 1,SIZE do j = 1, SIZE amat(i + SIZE * (j - 1)) = i - 1 + 100.0 / (1.0 + abs(i - j)) end do end do ! setup vector x do i = 1,SIZE xvec(i) = i - 1 end do ! call my dgemv ts = omp_get_wtime() call mydgemv(SIZE, SIZE, 1.d0, amat, xvec, 0.d0, bvec); te = omp_get_wtime() print*, "|b|=", dnrm2(SIZE, bvec, 1) print*, "Time : my dgemv = ", (te - ts),"(sec)" ! call blas dgemv ts = omp_get_wtime(); call dgemv('N', SIZE, SIZE, 1.d0, amat, size, xvec,1, 0.d0, bvec, 1) te = omp_get_wtime(); print*, "|b|=", dnrm2(SIZE, bvec, 1) print*, "Time : blas dgemv = ", (te - ts),"(sec)" deallocate(xvec,bvec,amat) end Program blas2 }}}