wiki:Blas Level 1

Blas Level 1

To use MKL Blas on Cypress, you have to load the module,

$ module load intel-psxe

C

/*
 Tulane HPC Workshop

 Blas Level 1 : norm2 test
 */
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<time.h>

#ifdef __APPLE__
#include <Accelerate/Accelerate.h>
#else
#ifdef MKL_ILP64
#include <mkl.h>
#include <mkl_cblas.h>
#else
#include <atlas/cblas.h>
#endif
#endif

#ifdef _OPENMP
#include <omp.h>
#endif

#define SIZE 50000000

/* my norm2 */
double mynorm2(int n, double *x) {
        int i;
        double nrm = 0.0;
        for (i = 0; i < n; i++) {
                nrm += x[i] * x[i];
        }
        return sqrt(nrm);
}

int main(int argc, char **argv) {
        int i, n, m;
        double *xvec;
        double nrm;
#ifdef _OPENMP
        double tsomp, teomp;
#endif
        clock_t ts, te;


        /* alloc vector and setting */
#ifdef MKL_ILP64
        xvec = (double *)mkl_malloc(SIZE * sizeof(double),64);
#else
        xvec = (double *)calloc(SIZE, sizeof(double));
#endif
        if (xvec == NULL)
                exit(-1);
        for (i = 0; i < SIZE; i++)
                xvec[i] = (double) (i % 100) / 50.0;

        /* call my norm 2*/
        ts = clock();
        nrm = mynorm2(SIZE, xvec);
        te = clock() ;
        printf("  my norm |x|=%e time=%f (msec)\n", nrm, 1000.0 * (te - ts) / CLOCKS_PER_SEC);

        /* call blas norm2 */
#ifdef _OPENMP
        tsomp = omp_get_wtime();
#else
        ts = clock();
#endif

        nrm = cblas_dnrm2(SIZE, xvec, 1);

#ifdef _OPENMP
        teomp = omp_get_wtime();
        printf("blas norm |x|=%e time=%f (msec) threads=%d\n",nrm,1000.0 * (teomp - tsomp), omp_get_max_threads());
#else
        te = clock();
        printf("blas norm |x|=%e time=%f (msec)\n",nrm,1000.0 * (te - ts) / CLOCKS_PER_SEC);
#endif


        /* cleanup */
#ifdef MKL_ILP64
        mkl_free(xvec);
#else
        free(xvec);
#endif
}

Compile it as a sequential code with dynamic link libraries,

icc -DMKL_ILP64 -mkl=sequential blas1.c  -lpthread -lm 

Compile it as a parallel (OpenMP) code with dynamic link libraries,

icc -DMKL_ILP64 -openmp -mkl=parallel blas1.c  -lpthread -lm 

On Mac/Linux

gcc  blas1.c  -lblas

C++

//
//  Tulane HPC Workshop
//
//  Blas Level 1 : norm2 test
//

#include <iostream>
#include <cstdlib>
#include <fstream>
#include <iomanip>
#include <algorithm>
#include <cmath>
#include <ctime>

#ifdef __APPLE__
#include <Accelerate/Accelerate.h>
#else
#ifdef MKL_ILP64
#include <mkl.h>
#include <mkl_cblas.h>
#else
extern "C" {
#include <atlas/cblas.h>
}
#endif
#endif

#ifdef _OPENMP
#include <omp.h>
#endif

/* my norm2 */
double mynorm2(unsigned int n,double *x){
  double nrm = 0.0;
  for (unsigned int i = 0 ; i < n ; i++){
    nrm +=  x[i] * x[i];
  }
  return std::sqrt(nrm);
}

int main(int argc, char **argv){
        const unsigned int size= 50000000;
        double nrm;

        std::cout << std::setprecision(16);

        /* alloc vector and setting */
#ifdef MKL_ILP64
        double *xvec = (double *)mkl_malloc(size * sizeof(double), 64);
#else
        double *xvec = new double[size];
#endif
        for (int i = 0 ; i < size ; i++) xvec[i] = (i % 100) / 50.0;

#ifdef _OPENMP
        double tsomp, teomp;
#endif
        clock_t ts, te;

        /* call my norm 2*/
        ts = std::clock();
        nrm = mynorm2(size,xvec);
        te = std::clock();
        std::cout << "MY NORM=" << nrm << std::endl;
        std::cout << "Time cost for my norm = " << 1000.0 * (te - ts) / CLOCKS_PER_SEC << "(msec)\n";

        /* call blas norm2 */
#ifdef _OPENMP
        tsomp = omp_get_wtime();
#else
        ts = std::clock();
#endif

        nrm = cblas_dnrm2(size,xvec,1);
        std::cout << "BLAS NORM=" << nrm << std::endl;

#ifdef _OPENMP
        teomp = omp_get_wtime();
        std::cout << "Time cost for cblas norm = " << 1000.0 * (teomp - tsomp) << "(msec) Threads=" << omp_get_max_threads() << std::endl;
#else
        te = std::clock();
        std::cout << "Time cost for cblas norm = " << 1000.0 * (te - ts) / CLOCKS_PER_SEC << "(msec)\n";
#endif



#ifdef MKL_ILP64
        mkl_free(xvec);
#else
        delete [] xvec;
#endif
}

Compile it as a sequential code with dynamic link libraries,

icpc -DMKL_ILP64 -mkl=sequential blas1.c  -lpthread -lm 

Compile it as a parallel (OpenMP) code with dynamic link libraries,

icpc -DMKL_ILP64 -openmp -mkl=parallel blas1.c  -lpthread -lm 

Fortran

!
!
! Blas Level 1 : norm2 test
!
!-------------------------------------
! my norm2
double precision function mynorm2(n,x)
    implicit none
    integer :: i, n
    double precision :: x(n)
    double precision :: nrm

    nrm = 0.d0
    do i = 1,n
        nrm = nrm + x(i) * x(i)
    end do

    mynorm2 = sqrt(nrm)
end function mynorm2
!
!
program Blas1
    use omp_lib           !Provides OpenMP* specific APIs
    implicit none
    integer, parameter:: SIZE = 50000000
    integer :: i, n, m
    double precision, allocatable, dimension(:) :: xvec
    double precision :: nrm, ts, te
    double precision :: dnrm2, mynorm2

        ! alloc vector and setting
        allocate(xvec(SIZE))
        do i = 1,SIZE
                xvec(i) = dble (mod(i,100)) / 50.0
        end do

        ! call my norm
        ts = omp_get_wtime()
        nrm = mynorm2(SIZE, xvec)
        te = omp_get_wtime();
        print*, "my norm |x|=",nrm, " time=", 1000.0 * (te - ts),"(msec)"

        ! call blas norm2
        ts = omp_get_wtime()
        nrm = dnrm2(SIZE, xvec, 1)
        te = omp_get_wtime()
        print*, "blas norm |x|=",nrm, " time=", 1000.0 * (te - ts),"(msec)"

        deallocate(xvec)
end program Blas1

Compile it as a sequential code with dynamic link libraries,

ifort -i8 -mkl=sequential blas1.c  -lpthread -lm 

Compile it as a parallel (OpenMP) code with dynamic link libraries,

ifort -i8 -openmp -mkl=parallel blas1.c  -lpthread -lm 
Last modified 4 years ago Last modified on Aug 21, 2015 3:19:11 PM