wiki:Blas Level 3

Version 1 (modified by fuji, 9 years ago) ( diff )

Blas Level 3

C

/*
 Tulane HPC Workshop

 Blas Level 3 : dgemm test
 */
#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#include<time.h>

#ifdef __APPLE__
#include <Accelerate/Accelerate.h>
#else
#ifdef MKL_ILP64
#include <mkl.h>
#include <mkl_cblas.h>
#else
#include <atlas/cblas.h>
#endif
#endif

#ifdef _OPENMP
#include <omp.h>
#endif

#define SIZE 2000

/* my simple dgemm */
void mydgemm(int m, int n, double al, double *a, double *b, double bt, double *c) {
        int i, j, jj;
        /* compute C := alpha * AB + beta * C */
        for (i = 0; i < n; i++) {
                for (j = 0; j < m; j++) {
                        c[i + m * j] *= bt;
                        for (jj = 0; jj < m; jj++) {
                                c[i + m * j] += al * a[i + m * jj] * b[jj + m * j];
                        }
                }
        }
}
int main(void) {
        int i, j, n, m, inc;
        double *cmat;
        double *bmat;
        double *amat;
        double alpha, beta;
#ifdef _OPENMP
        double tsomp, teomp;
#endif
        clock_t ts, te;

        /* alloc vector and matrix */
#ifdef MKL_ILP64
        cmat = (double *)mkl_malloc(SIZE * SIZE * sizeof(double), 64);
        bmat = (double *)mkl_malloc(SIZE * SIZE * sizeof(double), 64);
        amat = (double *)mkl_malloc(SIZE * SIZE * sizeof(double), 64);
#else
        cmat = calloc(SIZE * SIZE, sizeof(double));
        bmat = calloc(SIZE * SIZE, sizeof(double));
        amat = calloc(SIZE * SIZE, sizeof(double));
#endif
        if ((cmat == NULL ) || (bmat == NULL ) || (amat == NULL )) {
                exit(-1);
        }

        /* setup matrix A B C*/
        for (i = 0; i < SIZE; i++) {
                for (j = 0; j < SIZE; j++) {
                        amat[i + SIZE * j] = i + 100.0 / (1.0 + fabs((double) i - (double) j));
                        bmat[i + SIZE * j] = i + 100.0 / (1.0 + fabs((double) i - (double) j));
                        cmat[i + SIZE * j] = 0.0;
                }
        }

        /* calculate bvec = alpha*amat*bmat + beta*cmat */
        alpha = 1.0;
        beta = 0.0;

        /* call my dgemv */
        ts = clock();
        mydgemm(SIZE, SIZE, alpha, amat, bmat, beta, cmat);
        te = clock();

        printf("|C|=%f\n", cblas_dnrm2(SIZE * SIZE, cmat, 1));
        printf("Time : my dgemv = %f (sec)\n",  1.0 * (te - ts) / CLOCKS_PER_SEC);

        /* call blas dgemm */
#ifdef _OPENMP
        tsomp = omp_get_wtime();
#else
        ts = clock();
#endif
        cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, SIZE, SIZE, SIZE, 1.0, amat, SIZE, bmat,SIZE, 0.0, cmat, SIZE);
        printf("|C|=%f\n", cblas_dnrm2(SIZE * SIZE, cmat, 1));
#ifdef _OPENMP
        teomp = omp_get_wtime();
        printf("Time : cblas dgemv = %f (sec) Threads=%d\n",  (teomp - tsomp), omp_get_max_threads());
#else
        te = clock();
        printf("Time : cblas dgemv = %f (sec)\n",  1.0 * (te - ts) / CLOCKS_PER_SEC);
#endif

#ifdef MKL_ILP64
        mkl_free(cmat);
        mkl_free(bmat);
        mkl_free(amat);
#else
        free(cmat);
        free(bmat);
        free(amat);
#endif
}

C++

//
// Tulane HPC Workshop
//
// Blas Level 3 : dgemm test
//
#include <iostream>
#include <cstdlib>
#include <fstream>
#include <iomanip>
#include <algorithm>
#include <cmath>
#include <ctime>
#ifdef __APPLE__
#include <Accelerate/Accelerate.h>
#else
#ifdef MKL_ILP64
#include <mkl.h>
#include <mkl_cblas.h>
#else
extern "C" {
#include <atlas/cblas.h>
}
#endif
#endif

#ifdef _OPENMP
#include <omp.h>
#endif

/* my simple dgemm */
void mydgemm(int m, int n, double al, double *a, double *b, double bt, double *c) {
        /* compute C := alpha * AB + beta * C */
        for (int i = 0; i < n; i++) {
                for (int j = 0; j < m; j++) {
                        c[i + m * j] *= bt;
                        for (int jj = 0; jj < m; jj++) {
                                c[i + m * j] += al * a[i + m * jj] * b[jj + m * j];
                        }
                }
        }
}
int main(void) {
        const unsigned int size = 2000;

        /* alloc vector and matrix */
#ifdef MKL_ILP64
        double *cmat = (double *)mkl_malloc(size * size * sizeof(double), 64);
        double *bmat = (double *)mkl_malloc(size * size * sizeof(double), 64);
        double *amat = (double *)mkl_malloc(size * size * sizeof(double), 64);
#else
        double *cmat = new double[size * size];
        double *bmat = new double[size * size];
        double *amat = new double[size * size];
#endif

        double alpha, beta;

        /* setup matrix A B C*/
        for (int i = 0; i < size; i++) {
                for (int j = 0; j < size; j++) {
                        amat[i + size * j] = i + 100.0 / (1.0 + fabs((double) i - (double) j));
                        bmat[i + size * j] = i + 100.0 / (1.0 + fabs((double) i - (double) j));
                        cmat[i + size * j] = 0.0;
                }
        }

#ifdef _OPENMP
        double tsomp, teomp;
#endif
        clock_t ts, te;

        /* calculate bvec = alpha*amat*bmat + beta*cmat */
        alpha = 1.0;
        beta = 0.0;

        /* call my dgemm */
        ts = clock();
        mydgemm(size, size, alpha, amat, bmat, beta, cmat);
        te = clock();
        std::cout << "|C|=" << cblas_dnrm2(size * size, cmat, 1) << std::endl;
        std::cout << "Time : my dgemm = " << 1.0 * (te - ts) / CLOCKS_PER_SEC << "(sec)\n";

        /* call blas dgemm */
#ifdef _OPENMP
        tsomp = omp_get_wtime();
#else
        ts = clock();
#endif

        cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, size, size, size, 1.0, amat, size, bmat,size, 0.0, cmat, size);
        std::cout << "|C|=" << cblas_dnrm2(size * size, cmat, 1) << std::endl;

#ifdef _OPENMP
        teomp = omp_get_wtime();
        std::cout << "Time : cblas dgemm = " << (teomp - tsomp)  << "(sec) Threads=" << omp_get_max_threads() << std::endl;
#else
        te = clock();
        std::cout << "Time : cblas dgemm = " << 1.0 * (te - ts) / CLOCKS_PER_SEC << "(sec)\n";
#endif



#ifdef MKL_ILP64
        mkl_free(cmat);
        mkl_free(bmat);
        mkl_free(amat);
#else
        delete [] cmat;
        delete [] bmat;
        delete [] amat;
#endif
}

Fortran

!
! Tulane HPC Workshop
!
! Blas Level 3 : dgemm test
!

! my simple dgemm
subroutine mydgemm(m, n, al, a, b, bt, c)
    implicit none
    integer :: i, j, jj, n, m
    double precision :: a(n * m),b(n * m),c(n * m)
    double precision :: al, bt

        ! compute C := alpha * AB + beta * C
        do i = 1, n
                do j = 1, m
                        c(i + m * (j - 1)) = bt * c(i + m * (j - 1))
                        do jj = 1,m
                                c(i + m * (j - 1)) = c(i + m * (j - 1)) + al * a(i + m * (jj - 1)) * b(jj + m * (j - 1))
                        end do
                end do
        end do
end subroutine mydgemm
!
!
Program blas3
    use omp_lib           !Provides OpenMP* specific APIs
    implicit none
    integer, parameter:: SIZE = 2000
    integer :: i, j
    double precision, allocatable, dimension(:) :: amat,bmat,cmat
    double precision :: ts, te
    double precision :: dnrm2

        ! alloc vector and matrix
        allocate(amat(SIZE*SIZE),bmat(SIZE*SIZE),cmat(SIZE*SIZE))

        ! setup matrix A B C
        do i = 1,SIZE
        do j = 1, SIZE
            amat(i + SIZE * (j - 1)) = i - 1 + 100.0 / (1.0 + abs(i - j))
            bmat(i + SIZE * (j - 1)) = i - 1 + 100.0 / (1.0 + abs(i - j))
            cmat(i + SIZE * (j - 1)) = 0.d0
        end do
    end do

        ! call my dgemv
        ts = omp_get_wtime()
        call mydgemm(SIZE, SIZE, 1.d0, amat, bmat, 0.d0, cmat)
        te = omp_get_wtime()
        print*, "|C|=", dnrm2(SIZE*SIZE, cmat, 1)
    print*, "Time : my dgemv = ", (te - ts),"(sec)"


        ! call blas dgemm
        ts = omp_get_wtime();
    call dgemm('N', 'N', SIZE, SIZE, SIZE, 1.d0, amat, SIZE, bmat,SIZE, 0.d0, cmat, SIZE)
    te = omp_get_wtime();
    print*, "|C|=", dnrm2(SIZE*SIZE, cmat, 1)
    print*, "Time : blas dgemm = ", (te - ts),"(sec)"

    deallocate(amat,bmat,cmat)
end Program blas3
Note: See TracWiki for help on using the wiki.