Blas Level 1
C
/*
Tulane HPC Workshop
Blas Level 1 : norm2 test
*/
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<time.h>
#ifdef __APPLE__
#include <Accelerate/Accelerate.h>
#else
#ifdef MKL_ILP64
#include <mkl.h>
#include <mkl_cblas.h>
#else
#include <atlas/cblas.h>
#endif
#endif
#ifdef _OPENMP
#include <omp.h>
#endif
#define SIZE 50000000
/* my norm2 */
double mynorm2(int n, double *x) {
int i;
double nrm = 0.0;
for (i = 0; i < n; i++) {
nrm += x[i] * x[i];
}
return sqrt(nrm);
}
int main(int argc, char **argv) {
int i, n, m;
double *xvec;
double nrm;
#ifdef _OPENMP
double tsomp, teomp;
#endif
clock_t ts, te;
/* alloc vector and setting */
#ifdef MKL_ILP64
xvec = (double *)mkl_malloc(SIZE * sizeof(double),64);
#else
xvec = (double *)calloc(SIZE, sizeof(double));
#endif
if (xvec == NULL)
exit(-1);
for (i = 0; i < SIZE; i++)
xvec[i] = (double) (i % 100) / 50.0;
/* call my norm 2*/
ts = clock();
nrm = mynorm2(SIZE, xvec);
te = clock() ;
printf(" my norm |x|=%e time=%f (msec)\n", nrm, 1000.0 * (te - ts) / CLOCKS_PER_SEC);
/* call blas norm2 */
#ifdef _OPENMP
tsomp = omp_get_wtime();
#else
ts = clock();
#endif
nrm = cblas_dnrm2(SIZE, xvec, 1);
#ifdef _OPENMP
teomp = omp_get_wtime();
printf("blas norm |x|=%e time=%f (msec) threads=%d\n",nrm,1000.0 * (teomp - tsomp), omp_get_max_threads());
#else
te = clock();
printf("blas norm |x|=%e time=%f (msec)\n",nrm,1000.0 * (te - ts) / CLOCKS_PER_SEC);
#endif
/* cleanup */
#ifdef MKL_ILP64
mkl_free(xvec);
#else
free(xvec);
#endif
}
C++
//
// Tulane HPC Workshop
//
// Blas Level 1 : norm2 test
//
#include <iostream>
#include <cstdlib>
#include <fstream>
#include <iomanip>
#include <algorithm>
#include <cmath>
#include <ctime>
#ifdef __APPLE__
#include <Accelerate/Accelerate.h>
#else
#ifdef MKL_ILP64
#include <mkl.h>
#include <mkl_cblas.h>
#else
extern "C" {
#include <atlas/cblas.h>
}
#endif
#endif
#ifdef _OPENMP
#include <omp.h>
#endif
/* my norm2 */
double mynorm2(unsigned int n,double *x){
double nrm = 0.0;
for (unsigned int i = 0 ; i < n ; i++){
nrm += x[i] * x[i];
}
return std::sqrt(nrm);
}
int main(int argc, char **argv){
const unsigned int size= 50000000;
double nrm;
std::cout << std::setprecision(16);
/* alloc vector and setting */
#ifdef MKL_ILP64
double *xvec = (double *)mkl_malloc(size * sizeof(double), 64);
#else
double *xvec = new double[size];
#endif
for (int i = 0 ; i < size ; i++) xvec[i] = (i % 100) / 50.0;
#ifdef _OPENMP
double tsomp, teomp;
#endif
clock_t ts, te;
/* call my norm 2*/
ts = std::clock();
nrm = mynorm2(size,xvec);
te = std::clock();
std::cout << "MY NORM=" << nrm << std::endl;
std::cout << "Time cost for my norm = " << 1000.0 * (te - ts) / CLOCKS_PER_SEC << "(msec)\n";
/* call blas norm2 */
#ifdef _OPENMP
tsomp = omp_get_wtime();
#else
ts = std::clock();
#endif
nrm = cblas_dnrm2(size,xvec,1);
std::cout << "BLAS NORM=" << nrm << std::endl;
#ifdef _OPENMP
teomp = omp_get_wtime();
std::cout << "Time cost for cblas norm = " << 1000.0 * (teomp - tsomp) << "(msec) Threads=" << omp_get_max_threads() << std::endl;
#else
te = std::clock();
std::cout << "Time cost for cblas norm = " << 1000.0 * (te - ts) / CLOCKS_PER_SEC << "(msec)\n";
#endif
#ifdef MKL_ILP64
mkl_free(xvec);
#else
delete [] xvec;
#endif
}
Fortran
!
!
! Blas Level 1 : norm2 test
!
!-------------------------------------
! my norm2
double precision function mynorm2(n,x)
implicit none
integer :: i, n
double precision :: x(n)
double precision :: nrm
nrm = 0.d0
do i = 1,n
nrm = nrm + x(i) * x(i)
end do
mynorm2 = sqrt(nrm)
end function mynorm2
!
!
program Blas1
use omp_lib !Provides OpenMP* specific APIs
implicit none
integer, parameter:: SIZE = 50000000
integer :: i, n, m
double precision, allocatable, dimension(:) :: xvec
double precision :: nrm, ts, te
double precision :: dnrm2, mynorm2
! alloc vector and setting
allocate(xvec(SIZE))
do i = 1,SIZE
xvec(i) = dble (mod(i,100)) / 50.0
end do
! call my norm
ts = omp_get_wtime()
nrm = mynorm2(SIZE, xvec)
te = omp_get_wtime();
print*, "my norm |x|=",nrm, " time=", 1000.0 * (te - ts),"(msec)"
! call blas norm2
ts = omp_get_wtime()
nrm = dnrm2(SIZE, xvec, 1)
te = omp_get_wtime()
print*, "blas norm |x|=",nrm, " time=", 1000.0 * (te - ts),"(msec)"
deallocate(xvec)
end program Blas1