| | 1 | = Blas Level 3 = |
| | 2 | == C == |
| | 3 | {{{#!c |
| | 4 | /* |
| | 5 | Tulane HPC Workshop |
| | 6 | |
| | 7 | Blas Level 3 : dgemm test |
| | 8 | */ |
| | 9 | #include<stdio.h> |
| | 10 | #include<stdlib.h> |
| | 11 | #include<math.h> |
| | 12 | #include<time.h> |
| | 13 | |
| | 14 | #ifdef __APPLE__ |
| | 15 | #include <Accelerate/Accelerate.h> |
| | 16 | #else |
| | 17 | #ifdef MKL_ILP64 |
| | 18 | #include <mkl.h> |
| | 19 | #include <mkl_cblas.h> |
| | 20 | #else |
| | 21 | #include <atlas/cblas.h> |
| | 22 | #endif |
| | 23 | #endif |
| | 24 | |
| | 25 | #ifdef _OPENMP |
| | 26 | #include <omp.h> |
| | 27 | #endif |
| | 28 | |
| | 29 | #define SIZE 2000 |
| | 30 | |
| | 31 | /* my simple dgemm */ |
| | 32 | void mydgemm(int m, int n, double al, double *a, double *b, double bt, double *c) { |
| | 33 | int i, j, jj; |
| | 34 | /* compute C := alpha * AB + beta * C */ |
| | 35 | for (i = 0; i < n; i++) { |
| | 36 | for (j = 0; j < m; j++) { |
| | 37 | c[i + m * j] *= bt; |
| | 38 | for (jj = 0; jj < m; jj++) { |
| | 39 | c[i + m * j] += al * a[i + m * jj] * b[jj + m * j]; |
| | 40 | } |
| | 41 | } |
| | 42 | } |
| | 43 | } |
| | 44 | int main(void) { |
| | 45 | int i, j, n, m, inc; |
| | 46 | double *cmat; |
| | 47 | double *bmat; |
| | 48 | double *amat; |
| | 49 | double alpha, beta; |
| | 50 | #ifdef _OPENMP |
| | 51 | double tsomp, teomp; |
| | 52 | #endif |
| | 53 | clock_t ts, te; |
| | 54 | |
| | 55 | /* alloc vector and matrix */ |
| | 56 | #ifdef MKL_ILP64 |
| | 57 | cmat = (double *)mkl_malloc(SIZE * SIZE * sizeof(double), 64); |
| | 58 | bmat = (double *)mkl_malloc(SIZE * SIZE * sizeof(double), 64); |
| | 59 | amat = (double *)mkl_malloc(SIZE * SIZE * sizeof(double), 64); |
| | 60 | #else |
| | 61 | cmat = calloc(SIZE * SIZE, sizeof(double)); |
| | 62 | bmat = calloc(SIZE * SIZE, sizeof(double)); |
| | 63 | amat = calloc(SIZE * SIZE, sizeof(double)); |
| | 64 | #endif |
| | 65 | if ((cmat == NULL ) || (bmat == NULL ) || (amat == NULL )) { |
| | 66 | exit(-1); |
| | 67 | } |
| | 68 | |
| | 69 | /* setup matrix A B C*/ |
| | 70 | for (i = 0; i < SIZE; i++) { |
| | 71 | for (j = 0; j < SIZE; j++) { |
| | 72 | amat[i + SIZE * j] = i + 100.0 / (1.0 + fabs((double) i - (double) j)); |
| | 73 | bmat[i + SIZE * j] = i + 100.0 / (1.0 + fabs((double) i - (double) j)); |
| | 74 | cmat[i + SIZE * j] = 0.0; |
| | 75 | } |
| | 76 | } |
| | 77 | |
| | 78 | /* calculate bvec = alpha*amat*bmat + beta*cmat */ |
| | 79 | alpha = 1.0; |
| | 80 | beta = 0.0; |
| | 81 | |
| | 82 | /* call my dgemv */ |
| | 83 | ts = clock(); |
| | 84 | mydgemm(SIZE, SIZE, alpha, amat, bmat, beta, cmat); |
| | 85 | te = clock(); |
| | 86 | |
| | 87 | printf("|C|=%f\n", cblas_dnrm2(SIZE * SIZE, cmat, 1)); |
| | 88 | printf("Time : my dgemv = %f (sec)\n", 1.0 * (te - ts) / CLOCKS_PER_SEC); |
| | 89 | |
| | 90 | /* call blas dgemm */ |
| | 91 | #ifdef _OPENMP |
| | 92 | tsomp = omp_get_wtime(); |
| | 93 | #else |
| | 94 | ts = clock(); |
| | 95 | #endif |
| | 96 | cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, SIZE, SIZE, SIZE, 1.0, amat, SIZE, bmat,SIZE, 0.0, cmat, SIZE); |
| | 97 | printf("|C|=%f\n", cblas_dnrm2(SIZE * SIZE, cmat, 1)); |
| | 98 | #ifdef _OPENMP |
| | 99 | teomp = omp_get_wtime(); |
| | 100 | printf("Time : cblas dgemv = %f (sec) Threads=%d\n", (teomp - tsomp), omp_get_max_threads()); |
| | 101 | #else |
| | 102 | te = clock(); |
| | 103 | printf("Time : cblas dgemv = %f (sec)\n", 1.0 * (te - ts) / CLOCKS_PER_SEC); |
| | 104 | #endif |
| | 105 | |
| | 106 | #ifdef MKL_ILP64 |
| | 107 | mkl_free(cmat); |
| | 108 | mkl_free(bmat); |
| | 109 | mkl_free(amat); |
| | 110 | #else |
| | 111 | free(cmat); |
| | 112 | free(bmat); |
| | 113 | free(amat); |
| | 114 | #endif |
| | 115 | } |
| | 116 | }}} |
| | 117 | |
| | 118 | == C++ == |
| | 119 | {{{#!c++ |
| | 120 | // |
| | 121 | // Tulane HPC Workshop |
| | 122 | // |
| | 123 | // Blas Level 3 : dgemm test |
| | 124 | // |
| | 125 | #include <iostream> |
| | 126 | #include <cstdlib> |
| | 127 | #include <fstream> |
| | 128 | #include <iomanip> |
| | 129 | #include <algorithm> |
| | 130 | #include <cmath> |
| | 131 | #include <ctime> |
| | 132 | #ifdef __APPLE__ |
| | 133 | #include <Accelerate/Accelerate.h> |
| | 134 | #else |
| | 135 | #ifdef MKL_ILP64 |
| | 136 | #include <mkl.h> |
| | 137 | #include <mkl_cblas.h> |
| | 138 | #else |
| | 139 | extern "C" { |
| | 140 | #include <atlas/cblas.h> |
| | 141 | } |
| | 142 | #endif |
| | 143 | #endif |
| | 144 | |
| | 145 | #ifdef _OPENMP |
| | 146 | #include <omp.h> |
| | 147 | #endif |
| | 148 | |
| | 149 | /* my simple dgemm */ |
| | 150 | void mydgemm(int m, int n, double al, double *a, double *b, double bt, double *c) { |
| | 151 | /* compute C := alpha * AB + beta * C */ |
| | 152 | for (int i = 0; i < n; i++) { |
| | 153 | for (int j = 0; j < m; j++) { |
| | 154 | c[i + m * j] *= bt; |
| | 155 | for (int jj = 0; jj < m; jj++) { |
| | 156 | c[i + m * j] += al * a[i + m * jj] * b[jj + m * j]; |
| | 157 | } |
| | 158 | } |
| | 159 | } |
| | 160 | } |
| | 161 | int main(void) { |
| | 162 | const unsigned int size = 2000; |
| | 163 | |
| | 164 | /* alloc vector and matrix */ |
| | 165 | #ifdef MKL_ILP64 |
| | 166 | double *cmat = (double *)mkl_malloc(size * size * sizeof(double), 64); |
| | 167 | double *bmat = (double *)mkl_malloc(size * size * sizeof(double), 64); |
| | 168 | double *amat = (double *)mkl_malloc(size * size * sizeof(double), 64); |
| | 169 | #else |
| | 170 | double *cmat = new double[size * size]; |
| | 171 | double *bmat = new double[size * size]; |
| | 172 | double *amat = new double[size * size]; |
| | 173 | #endif |
| | 174 | |
| | 175 | double alpha, beta; |
| | 176 | |
| | 177 | /* setup matrix A B C*/ |
| | 178 | for (int i = 0; i < size; i++) { |
| | 179 | for (int j = 0; j < size; j++) { |
| | 180 | amat[i + size * j] = i + 100.0 / (1.0 + fabs((double) i - (double) j)); |
| | 181 | bmat[i + size * j] = i + 100.0 / (1.0 + fabs((double) i - (double) j)); |
| | 182 | cmat[i + size * j] = 0.0; |
| | 183 | } |
| | 184 | } |
| | 185 | |
| | 186 | #ifdef _OPENMP |
| | 187 | double tsomp, teomp; |
| | 188 | #endif |
| | 189 | clock_t ts, te; |
| | 190 | |
| | 191 | /* calculate bvec = alpha*amat*bmat + beta*cmat */ |
| | 192 | alpha = 1.0; |
| | 193 | beta = 0.0; |
| | 194 | |
| | 195 | /* call my dgemm */ |
| | 196 | ts = clock(); |
| | 197 | mydgemm(size, size, alpha, amat, bmat, beta, cmat); |
| | 198 | te = clock(); |
| | 199 | std::cout << "|C|=" << cblas_dnrm2(size * size, cmat, 1) << std::endl; |
| | 200 | std::cout << "Time : my dgemm = " << 1.0 * (te - ts) / CLOCKS_PER_SEC << "(sec)\n"; |
| | 201 | |
| | 202 | /* call blas dgemm */ |
| | 203 | #ifdef _OPENMP |
| | 204 | tsomp = omp_get_wtime(); |
| | 205 | #else |
| | 206 | ts = clock(); |
| | 207 | #endif |
| | 208 | |
| | 209 | cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, size, size, size, 1.0, amat, size, bmat,size, 0.0, cmat, size); |
| | 210 | std::cout << "|C|=" << cblas_dnrm2(size * size, cmat, 1) << std::endl; |
| | 211 | |
| | 212 | #ifdef _OPENMP |
| | 213 | teomp = omp_get_wtime(); |
| | 214 | std::cout << "Time : cblas dgemm = " << (teomp - tsomp) << "(sec) Threads=" << omp_get_max_threads() << std::endl; |
| | 215 | #else |
| | 216 | te = clock(); |
| | 217 | std::cout << "Time : cblas dgemm = " << 1.0 * (te - ts) / CLOCKS_PER_SEC << "(sec)\n"; |
| | 218 | #endif |
| | 219 | |
| | 220 | |
| | 221 | |
| | 222 | #ifdef MKL_ILP64 |
| | 223 | mkl_free(cmat); |
| | 224 | mkl_free(bmat); |
| | 225 | mkl_free(amat); |
| | 226 | #else |
| | 227 | delete [] cmat; |
| | 228 | delete [] bmat; |
| | 229 | delete [] amat; |
| | 230 | #endif |
| | 231 | } |
| | 232 | }}} |
| | 233 | |
| | 234 | == Fortran == |
| | 235 | {{{#!fortran |
| | 236 | ! |
| | 237 | ! Tulane HPC Workshop |
| | 238 | ! |
| | 239 | ! Blas Level 3 : dgemm test |
| | 240 | ! |
| | 241 | |
| | 242 | ! my simple dgemm |
| | 243 | subroutine mydgemm(m, n, al, a, b, bt, c) |
| | 244 | implicit none |
| | 245 | integer :: i, j, jj, n, m |
| | 246 | double precision :: a(n * m),b(n * m),c(n * m) |
| | 247 | double precision :: al, bt |
| | 248 | |
| | 249 | ! compute C := alpha * AB + beta * C |
| | 250 | do i = 1, n |
| | 251 | do j = 1, m |
| | 252 | c(i + m * (j - 1)) = bt * c(i + m * (j - 1)) |
| | 253 | do jj = 1,m |
| | 254 | c(i + m * (j - 1)) = c(i + m * (j - 1)) + al * a(i + m * (jj - 1)) * b(jj + m * (j - 1)) |
| | 255 | end do |
| | 256 | end do |
| | 257 | end do |
| | 258 | end subroutine mydgemm |
| | 259 | ! |
| | 260 | ! |
| | 261 | Program blas3 |
| | 262 | use omp_lib !Provides OpenMP* specific APIs |
| | 263 | implicit none |
| | 264 | integer, parameter:: SIZE = 2000 |
| | 265 | integer :: i, j |
| | 266 | double precision, allocatable, dimension(:) :: amat,bmat,cmat |
| | 267 | double precision :: ts, te |
| | 268 | double precision :: dnrm2 |
| | 269 | |
| | 270 | ! alloc vector and matrix |
| | 271 | allocate(amat(SIZE*SIZE),bmat(SIZE*SIZE),cmat(SIZE*SIZE)) |
| | 272 | |
| | 273 | ! setup matrix A B C |
| | 274 | do i = 1,SIZE |
| | 275 | do j = 1, SIZE |
| | 276 | amat(i + SIZE * (j - 1)) = i - 1 + 100.0 / (1.0 + abs(i - j)) |
| | 277 | bmat(i + SIZE * (j - 1)) = i - 1 + 100.0 / (1.0 + abs(i - j)) |
| | 278 | cmat(i + SIZE * (j - 1)) = 0.d0 |
| | 279 | end do |
| | 280 | end do |
| | 281 | |
| | 282 | ! call my dgemv |
| | 283 | ts = omp_get_wtime() |
| | 284 | call mydgemm(SIZE, SIZE, 1.d0, amat, bmat, 0.d0, cmat) |
| | 285 | te = omp_get_wtime() |
| | 286 | print*, "|C|=", dnrm2(SIZE*SIZE, cmat, 1) |
| | 287 | print*, "Time : my dgemv = ", (te - ts),"(sec)" |
| | 288 | |
| | 289 | |
| | 290 | ! call blas dgemm |
| | 291 | ts = omp_get_wtime(); |
| | 292 | call dgemm('N', 'N', SIZE, SIZE, SIZE, 1.d0, amat, SIZE, bmat,SIZE, 0.d0, cmat, SIZE) |
| | 293 | te = omp_get_wtime(); |
| | 294 | print*, "|C|=", dnrm2(SIZE*SIZE, cmat, 1) |
| | 295 | print*, "Time : blas dgemm = ", (te - ts),"(sec)" |
| | 296 | |
| | 297 | deallocate(amat,bmat,cmat) |
| | 298 | end Program blas3 |
| | 299 | }}} |