wiki:Blas Level 2

Blas Level 2

C

/*
 Tulane HPC Workshop

 Blas Level 2 : dgemv test
 */
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<time.h>

#ifdef __APPLE__
#include <Accelerate/Accelerate.h>
#else
#ifdef MKL_ILP64
#include <mkl.h>
#include <mkl_cblas.h>
#else
#include <atlas/cblas.h>
#endif
#endif

#ifdef _OPENMP
#include <omp.h>
#endif

#define SIZE 10000

/* my simple dgemv */
void mydgemv(int m, int n, double al, double *a, double *x, double bt,
                double *b) {
        int i, j;
        /* compute b = al*a*x + bt*b */
        for (i = 0; i < n; i++) {
                b[i] *= bt;
                for (j = 0; j < m; j++) {
                        b[i] += al * a[i + m * j] * x[j];
                }
        }
/*
        for (i = 0; i < n; i++) {
                b[i] *= bt;
        }
        for (j = 0; j < m; j++) {
                for (i = 0; i < n; i++) {
                        b[i] += al * a[i + m * j] * x[j];
                }
        }
*/
}
int main(void) {
        int i, j, n, m, inc;
        double *xvec;
        double *bvec;
        double *amat;
        double alpha, beta;
#ifdef _OPENMP
        double tsomp, teomp;
#endif
        clock_t ts, te;

        /* alloc vector and matrix */
#ifdef MKL_ILP64
        xvec = (double *)mkl_malloc(SIZE * sizeof(double), 64);
        bvec = (double *)mkl_malloc(SIZE * sizeof(double), 64);
        amat = (double *)mkl_malloc(SIZE * SIZE * sizeof(double), 64);
#else
        xvec = (double *)calloc(SIZE, sizeof(double));
        bvec = (double *)calloc(SIZE, sizeof(double));
        amat = (double *)calloc(SIZE * SIZE, sizeof(double));
#endif
        if ((xvec == NULL ) || (bvec == NULL ) || (amat == NULL )) {
                exit(-1);
        }

        /* setup matrix A */
        for (i = 0; i < SIZE; i++) {
                for (j = 0; j < SIZE; j++) {
                        amat[i + SIZE * j] = i + 100.0 / (1.0 + fabs((double) i - (double) j));
                }
        }

        /* setup vector x */
        for (i = 0; i < SIZE; i++)
                xvec[i] = (double) i;

        /* calculate bvec = alpha*amat*xvec + beta*bvec */
        alpha = 1.0;
        beta = 0.0;

        /* call my dgemv */
        ts = clock();
        mydgemv(SIZE, SIZE, alpha, amat, xvec, beta, bvec);
        te = clock();

        printf("|b|=%f\n", cblas_dnrm2(SIZE, bvec, 1));
        printf("Time : my dgemv = %f (sec)\n",  1.0 * (te - ts) / CLOCKS_PER_SEC);

        /* call blas dgemv */
#ifdef _OPENMP
        tsomp = omp_get_wtime();
#else
        ts = clock();
#endif

        cblas_dgemv(CblasColMajor, CblasNoTrans, SIZE, SIZE, 1.0, amat, SIZE, xvec,1, 0.0, bvec, 1);
        //cblas_dgemv(CblasRowMajor,CblasTrans,SIZE,SIZE,1.0,amat,SIZE,xvec,1,0.0,bvec,1);
        printf("|b|=%f\n", cblas_dnrm2(SIZE, bvec, 1));
#ifdef _OPENMP
        teomp = omp_get_wtime();
        printf("Time : cblas dgemv = %f (sec) Threads=%d\n",  (teomp - tsomp),omp_get_max_threads());
#else
        te = clock();
        printf("Time : cblas dgemv = %f (sec)\n",  1.0 * (te - ts)  / CLOCKS_PER_SEC);
#endif



#ifdef MKL_ILP64
        mkl_free(xvec);
        mkl_free(bvec);
        mkl_free(amat);
#else
        free(xvec);
        free(bvec);
        free(amat);
#endif
}

C++

//
//  Tulane HPC Workshop
//
//  Blas Level 2 : dgemv test
//
//
#include <iostream>
#include <cstdlib>
#include <fstream>
#include <iomanip>
#include <algorithm>
#include <cmath>
#include <ctime>
#ifdef __APPLE__
#include <Accelerate/Accelerate.h>
#else
#ifdef MKL_ILP64
#include <mkl.h>
#include <mkl_cblas.h>
#else
extern "C" {
#include <atlas/cblas.h>
}
#endif
#endif

#ifdef OMP
#include <omp.h>
#endif

/* my simple dgemv */
void mydgemv(int m, int n, double al, double *a, double *x, double bt,
                double *b) {
        /* compute b = al*a*x + bt*b */
        for (int i = 0; i < n; i++) {
                b[i] *= bt;
                for (int j = 0; j < m; j++) {
                        b[i] += al * a[i + m * j] * x[j];
                }
        }
        /*
         // should test which routine is faster
         for (int i = 0 ; i < n ; i++){
         b[i] *= bt;
         }
         for (int j = 0 ; j < m ; j++){
         for (int i = 0 ; i < n ; i++){
         b[i] += al * a[i + m * j] * x[j];
         }
         }
         */
}

int main(int argc, char **argv) {
        const unsigned int size = 10000;

        /* alloc vector and matrix */
#ifdef MKL_ILP64
        double *xvec = (double *)mkl_malloc(size * sizeof(double), 64);
        double *bvec = (double *)mkl_malloc(size * sizeof(double), 64);
        double *amat = (double *)mkl_malloc(size * size * sizeof(double), 64);
#else
        double *xvec = new double[size];
        double *bvec = new double[size];
        double *amat = new double[size * size];
#endif
        /* setup matrix A */
        for (int i = 0; i < size; i++) {
                for (int j = 0; j < size; j++) {
                        amat[i + size * j] = i + 100.0 / (1.0 + std::abs(i - j));
                }
        }

        /* setup vector x */
        for (int i = 0; i < size; i++)
                xvec[i] = i;

#ifdef _OPENMP
        double tsomp, teomp;
#endif
        clock_t ts, te;

        /* call my dgemv */
        ts = std::clock();
        mydgemv(size, size, 1.0, amat, xvec, 0.0, bvec);
        te = std::clock();
        std::cout << "|b|=" << cblas_dnrm2(size, bvec, 1) << std::endl;
        std::cout << "Time : my dgemv = " << 1.0 * (te - ts) / CLOCKS_PER_SEC << "(sec)\n";

        /* call blas dgemv */
#ifdef _OPENMP
        tsomp = omp_get_wtime();
#else
        ts = std::clock();
#endif
        cblas_dgemv(CblasColMajor, CblasNoTrans, size, size, 1.0, amat, size, xvec, 1, 0.0, bvec, 1);
        //cblas_dgemv(CblasRowMajor,CblasTrans,size,size,1.0,amat,size,xvec,1,0.0,bvec,1);
        std::cout << "|b|=" << cblas_dnrm2(size,bvec,1) << std::endl;
#ifdef _OPENMP
        teomp = omp_get_wtime();
        std::cout << "Time : cblas dgemv = " << (teomp - tsomp) << "(sec) Threads=" << omp_get_max_threads() << std::endl;
#else
        te = std::clock();
        std::cout << "Time : cblas dgemv = " << 1.0 * (te - ts) / CLOCKS_PER_SEC << "(sec)\n";
#endif

#ifdef MKL_ILP64
        mkl_free(xvec);
        mkl_free(bvec);
        mkl_free(amat);
#else
        delete[] xvec;
        delete[] bvec;
        delete[] amat;
#endif
}

Fortran

!
!  Tulane HPC Workshop
!
!  Blas Level 2 : dgemv test
!
!
!-------------------------------------------
! my simple dgemv
subroutine mydgemv(m, n, al,  a,  x,  bt, b)
    implicit none
    integer :: i, j, n, m
    double precision :: x(m), b(n), a(n * m)
    double precision :: al, bt

        ! compute b = al*a*x + bt*b */
        do i = 1,n
                b(i) = b(i) * bt
                do j = 1, m
                        b(i) = b(i) + al * a(i + m * (j - 1)) * x(j)
                end do
        end do
end subroutine mydgemv
!
Program blas2
    use omp_lib           !Provides OpenMP* specific APIs
    implicit none
    integer, parameter:: SIZE = 5000
    integer :: i, j
    double precision, allocatable, dimension(:) :: xvec,bvec,amat
    double precision :: ts, te
    double precision :: dnrm2

    ! alloc vector and matrix
    allocate(xvec(SIZE),bvec(SIZE),amat(SIZE*SIZE))

    ! setup matrix A
    do i = 1,SIZE
        do j = 1, SIZE
            amat(i + SIZE * (j - 1)) = i - 1 + 100.0 / (1.0 + abs(i - j))
        end do
    end do

    ! setup vector x
    do i = 1,SIZE
        xvec(i) = i - 1
    end do

    ! call my dgemv
    ts = omp_get_wtime()
    call mydgemv(SIZE, SIZE, 1.d0, amat, xvec, 0.d0, bvec);
    te = omp_get_wtime()
    print*, "|b|=", dnrm2(SIZE, bvec, 1)
    print*, "Time : my dgemv = ", (te - ts),"(sec)"

    ! call blas dgemv
    ts = omp_get_wtime();
    call dgemv('N', SIZE, SIZE, 1.d0, amat, size, xvec,1, 0.d0, bvec, 1)
    te = omp_get_wtime();
    print*, "|b|=", dnrm2(SIZE, bvec, 1)
    print*, "Time : blas dgemv = ", (te - ts),"(sec)"

    deallocate(xvec,bvec,amat)
end Program blas2
Last modified 4 years ago Last modified on Aug 20, 2015 12:49:16 PM