| 1 | = Blas Level 3 = |
| 2 | == C == |
| 3 | {{{#!c |
| 4 | /* |
| 5 | Tulane HPC Workshop |
| 6 | |
| 7 | Blas Level 3 : dgemm test |
| 8 | */ |
| 9 | #include<stdio.h> |
| 10 | #include<stdlib.h> |
| 11 | #include<math.h> |
| 12 | #include<time.h> |
| 13 | |
| 14 | #ifdef __APPLE__ |
| 15 | #include <Accelerate/Accelerate.h> |
| 16 | #else |
| 17 | #ifdef MKL_ILP64 |
| 18 | #include <mkl.h> |
| 19 | #include <mkl_cblas.h> |
| 20 | #else |
| 21 | #include <atlas/cblas.h> |
| 22 | #endif |
| 23 | #endif |
| 24 | |
| 25 | #ifdef _OPENMP |
| 26 | #include <omp.h> |
| 27 | #endif |
| 28 | |
| 29 | #define SIZE 2000 |
| 30 | |
| 31 | /* my simple dgemm */ |
| 32 | void mydgemm(int m, int n, double al, double *a, double *b, double bt, double *c) { |
| 33 | int i, j, jj; |
| 34 | /* compute C := alpha * AB + beta * C */ |
| 35 | for (i = 0; i < n; i++) { |
| 36 | for (j = 0; j < m; j++) { |
| 37 | c[i + m * j] *= bt; |
| 38 | for (jj = 0; jj < m; jj++) { |
| 39 | c[i + m * j] += al * a[i + m * jj] * b[jj + m * j]; |
| 40 | } |
| 41 | } |
| 42 | } |
| 43 | } |
| 44 | int main(void) { |
| 45 | int i, j, n, m, inc; |
| 46 | double *cmat; |
| 47 | double *bmat; |
| 48 | double *amat; |
| 49 | double alpha, beta; |
| 50 | #ifdef _OPENMP |
| 51 | double tsomp, teomp; |
| 52 | #endif |
| 53 | clock_t ts, te; |
| 54 | |
| 55 | /* alloc vector and matrix */ |
| 56 | #ifdef MKL_ILP64 |
| 57 | cmat = (double *)mkl_malloc(SIZE * SIZE * sizeof(double), 64); |
| 58 | bmat = (double *)mkl_malloc(SIZE * SIZE * sizeof(double), 64); |
| 59 | amat = (double *)mkl_malloc(SIZE * SIZE * sizeof(double), 64); |
| 60 | #else |
| 61 | cmat = calloc(SIZE * SIZE, sizeof(double)); |
| 62 | bmat = calloc(SIZE * SIZE, sizeof(double)); |
| 63 | amat = calloc(SIZE * SIZE, sizeof(double)); |
| 64 | #endif |
| 65 | if ((cmat == NULL ) || (bmat == NULL ) || (amat == NULL )) { |
| 66 | exit(-1); |
| 67 | } |
| 68 | |
| 69 | /* setup matrix A B C*/ |
| 70 | for (i = 0; i < SIZE; i++) { |
| 71 | for (j = 0; j < SIZE; j++) { |
| 72 | amat[i + SIZE * j] = i + 100.0 / (1.0 + fabs((double) i - (double) j)); |
| 73 | bmat[i + SIZE * j] = i + 100.0 / (1.0 + fabs((double) i - (double) j)); |
| 74 | cmat[i + SIZE * j] = 0.0; |
| 75 | } |
| 76 | } |
| 77 | |
| 78 | /* calculate bvec = alpha*amat*bmat + beta*cmat */ |
| 79 | alpha = 1.0; |
| 80 | beta = 0.0; |
| 81 | |
| 82 | /* call my dgemv */ |
| 83 | ts = clock(); |
| 84 | mydgemm(SIZE, SIZE, alpha, amat, bmat, beta, cmat); |
| 85 | te = clock(); |
| 86 | |
| 87 | printf("|C|=%f\n", cblas_dnrm2(SIZE * SIZE, cmat, 1)); |
| 88 | printf("Time : my dgemv = %f (sec)\n", 1.0 * (te - ts) / CLOCKS_PER_SEC); |
| 89 | |
| 90 | /* call blas dgemm */ |
| 91 | #ifdef _OPENMP |
| 92 | tsomp = omp_get_wtime(); |
| 93 | #else |
| 94 | ts = clock(); |
| 95 | #endif |
| 96 | cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, SIZE, SIZE, SIZE, 1.0, amat, SIZE, bmat,SIZE, 0.0, cmat, SIZE); |
| 97 | printf("|C|=%f\n", cblas_dnrm2(SIZE * SIZE, cmat, 1)); |
| 98 | #ifdef _OPENMP |
| 99 | teomp = omp_get_wtime(); |
| 100 | printf("Time : cblas dgemv = %f (sec) Threads=%d\n", (teomp - tsomp), omp_get_max_threads()); |
| 101 | #else |
| 102 | te = clock(); |
| 103 | printf("Time : cblas dgemv = %f (sec)\n", 1.0 * (te - ts) / CLOCKS_PER_SEC); |
| 104 | #endif |
| 105 | |
| 106 | #ifdef MKL_ILP64 |
| 107 | mkl_free(cmat); |
| 108 | mkl_free(bmat); |
| 109 | mkl_free(amat); |
| 110 | #else |
| 111 | free(cmat); |
| 112 | free(bmat); |
| 113 | free(amat); |
| 114 | #endif |
| 115 | } |
| 116 | }}} |
| 117 | |
| 118 | == C++ == |
| 119 | {{{#!c++ |
| 120 | // |
| 121 | // Tulane HPC Workshop |
| 122 | // |
| 123 | // Blas Level 3 : dgemm test |
| 124 | // |
| 125 | #include <iostream> |
| 126 | #include <cstdlib> |
| 127 | #include <fstream> |
| 128 | #include <iomanip> |
| 129 | #include <algorithm> |
| 130 | #include <cmath> |
| 131 | #include <ctime> |
| 132 | #ifdef __APPLE__ |
| 133 | #include <Accelerate/Accelerate.h> |
| 134 | #else |
| 135 | #ifdef MKL_ILP64 |
| 136 | #include <mkl.h> |
| 137 | #include <mkl_cblas.h> |
| 138 | #else |
| 139 | extern "C" { |
| 140 | #include <atlas/cblas.h> |
| 141 | } |
| 142 | #endif |
| 143 | #endif |
| 144 | |
| 145 | #ifdef _OPENMP |
| 146 | #include <omp.h> |
| 147 | #endif |
| 148 | |
| 149 | /* my simple dgemm */ |
| 150 | void mydgemm(int m, int n, double al, double *a, double *b, double bt, double *c) { |
| 151 | /* compute C := alpha * AB + beta * C */ |
| 152 | for (int i = 0; i < n; i++) { |
| 153 | for (int j = 0; j < m; j++) { |
| 154 | c[i + m * j] *= bt; |
| 155 | for (int jj = 0; jj < m; jj++) { |
| 156 | c[i + m * j] += al * a[i + m * jj] * b[jj + m * j]; |
| 157 | } |
| 158 | } |
| 159 | } |
| 160 | } |
| 161 | int main(void) { |
| 162 | const unsigned int size = 2000; |
| 163 | |
| 164 | /* alloc vector and matrix */ |
| 165 | #ifdef MKL_ILP64 |
| 166 | double *cmat = (double *)mkl_malloc(size * size * sizeof(double), 64); |
| 167 | double *bmat = (double *)mkl_malloc(size * size * sizeof(double), 64); |
| 168 | double *amat = (double *)mkl_malloc(size * size * sizeof(double), 64); |
| 169 | #else |
| 170 | double *cmat = new double[size * size]; |
| 171 | double *bmat = new double[size * size]; |
| 172 | double *amat = new double[size * size]; |
| 173 | #endif |
| 174 | |
| 175 | double alpha, beta; |
| 176 | |
| 177 | /* setup matrix A B C*/ |
| 178 | for (int i = 0; i < size; i++) { |
| 179 | for (int j = 0; j < size; j++) { |
| 180 | amat[i + size * j] = i + 100.0 / (1.0 + fabs((double) i - (double) j)); |
| 181 | bmat[i + size * j] = i + 100.0 / (1.0 + fabs((double) i - (double) j)); |
| 182 | cmat[i + size * j] = 0.0; |
| 183 | } |
| 184 | } |
| 185 | |
| 186 | #ifdef _OPENMP |
| 187 | double tsomp, teomp; |
| 188 | #endif |
| 189 | clock_t ts, te; |
| 190 | |
| 191 | /* calculate bvec = alpha*amat*bmat + beta*cmat */ |
| 192 | alpha = 1.0; |
| 193 | beta = 0.0; |
| 194 | |
| 195 | /* call my dgemm */ |
| 196 | ts = clock(); |
| 197 | mydgemm(size, size, alpha, amat, bmat, beta, cmat); |
| 198 | te = clock(); |
| 199 | std::cout << "|C|=" << cblas_dnrm2(size * size, cmat, 1) << std::endl; |
| 200 | std::cout << "Time : my dgemm = " << 1.0 * (te - ts) / CLOCKS_PER_SEC << "(sec)\n"; |
| 201 | |
| 202 | /* call blas dgemm */ |
| 203 | #ifdef _OPENMP |
| 204 | tsomp = omp_get_wtime(); |
| 205 | #else |
| 206 | ts = clock(); |
| 207 | #endif |
| 208 | |
| 209 | cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, size, size, size, 1.0, amat, size, bmat,size, 0.0, cmat, size); |
| 210 | std::cout << "|C|=" << cblas_dnrm2(size * size, cmat, 1) << std::endl; |
| 211 | |
| 212 | #ifdef _OPENMP |
| 213 | teomp = omp_get_wtime(); |
| 214 | std::cout << "Time : cblas dgemm = " << (teomp - tsomp) << "(sec) Threads=" << omp_get_max_threads() << std::endl; |
| 215 | #else |
| 216 | te = clock(); |
| 217 | std::cout << "Time : cblas dgemm = " << 1.0 * (te - ts) / CLOCKS_PER_SEC << "(sec)\n"; |
| 218 | #endif |
| 219 | |
| 220 | |
| 221 | |
| 222 | #ifdef MKL_ILP64 |
| 223 | mkl_free(cmat); |
| 224 | mkl_free(bmat); |
| 225 | mkl_free(amat); |
| 226 | #else |
| 227 | delete [] cmat; |
| 228 | delete [] bmat; |
| 229 | delete [] amat; |
| 230 | #endif |
| 231 | } |
| 232 | }}} |
| 233 | |
| 234 | == Fortran == |
| 235 | {{{#!fortran |
| 236 | ! |
| 237 | ! Tulane HPC Workshop |
| 238 | ! |
| 239 | ! Blas Level 3 : dgemm test |
| 240 | ! |
| 241 | |
| 242 | ! my simple dgemm |
| 243 | subroutine mydgemm(m, n, al, a, b, bt, c) |
| 244 | implicit none |
| 245 | integer :: i, j, jj, n, m |
| 246 | double precision :: a(n * m),b(n * m),c(n * m) |
| 247 | double precision :: al, bt |
| 248 | |
| 249 | ! compute C := alpha * AB + beta * C |
| 250 | do i = 1, n |
| 251 | do j = 1, m |
| 252 | c(i + m * (j - 1)) = bt * c(i + m * (j - 1)) |
| 253 | do jj = 1,m |
| 254 | c(i + m * (j - 1)) = c(i + m * (j - 1)) + al * a(i + m * (jj - 1)) * b(jj + m * (j - 1)) |
| 255 | end do |
| 256 | end do |
| 257 | end do |
| 258 | end subroutine mydgemm |
| 259 | ! |
| 260 | ! |
| 261 | Program blas3 |
| 262 | use omp_lib !Provides OpenMP* specific APIs |
| 263 | implicit none |
| 264 | integer, parameter:: SIZE = 2000 |
| 265 | integer :: i, j |
| 266 | double precision, allocatable, dimension(:) :: amat,bmat,cmat |
| 267 | double precision :: ts, te |
| 268 | double precision :: dnrm2 |
| 269 | |
| 270 | ! alloc vector and matrix |
| 271 | allocate(amat(SIZE*SIZE),bmat(SIZE*SIZE),cmat(SIZE*SIZE)) |
| 272 | |
| 273 | ! setup matrix A B C |
| 274 | do i = 1,SIZE |
| 275 | do j = 1, SIZE |
| 276 | amat(i + SIZE * (j - 1)) = i - 1 + 100.0 / (1.0 + abs(i - j)) |
| 277 | bmat(i + SIZE * (j - 1)) = i - 1 + 100.0 / (1.0 + abs(i - j)) |
| 278 | cmat(i + SIZE * (j - 1)) = 0.d0 |
| 279 | end do |
| 280 | end do |
| 281 | |
| 282 | ! call my dgemv |
| 283 | ts = omp_get_wtime() |
| 284 | call mydgemm(SIZE, SIZE, 1.d0, amat, bmat, 0.d0, cmat) |
| 285 | te = omp_get_wtime() |
| 286 | print*, "|C|=", dnrm2(SIZE*SIZE, cmat, 1) |
| 287 | print*, "Time : my dgemv = ", (te - ts),"(sec)" |
| 288 | |
| 289 | |
| 290 | ! call blas dgemm |
| 291 | ts = omp_get_wtime(); |
| 292 | call dgemm('N', 'N', SIZE, SIZE, SIZE, 1.d0, amat, SIZE, bmat,SIZE, 0.d0, cmat, SIZE) |
| 293 | te = omp_get_wtime(); |
| 294 | print*, "|C|=", dnrm2(SIZE*SIZE, cmat, 1) |
| 295 | print*, "Time : blas dgemm = ", (te - ts),"(sec)" |
| 296 | |
| 297 | deallocate(amat,bmat,cmat) |
| 298 | end Program blas3 |
| 299 | }}} |