| | 1 | = Blas Level 2 = |
| | 2 | |
| | 3 | == C == |
| | 4 | {{{#!c |
| | 5 | /* |
| | 6 | Tulane HPC Workshop |
| | 7 | |
| | 8 | Blas Level 2 : dgemv test |
| | 9 | */ |
| | 10 | #include<stdio.h> |
| | 11 | #include<stdlib.h> |
| | 12 | #include<math.h> |
| | 13 | #include<time.h> |
| | 14 | |
| | 15 | #ifdef __APPLE__ |
| | 16 | #include <Accelerate/Accelerate.h> |
| | 17 | #else |
| | 18 | #ifdef MKL_ILP64 |
| | 19 | #include <mkl.h> |
| | 20 | #include <mkl_cblas.h> |
| | 21 | #else |
| | 22 | #include <atlas/cblas.h> |
| | 23 | #endif |
| | 24 | #endif |
| | 25 | |
| | 26 | #ifdef _OPENMP |
| | 27 | #include <omp.h> |
| | 28 | #endif |
| | 29 | |
| | 30 | #define SIZE 10000 |
| | 31 | |
| | 32 | /* my simple dgemv */ |
| | 33 | void mydgemv(int m, int n, double al, double *a, double *x, double bt, |
| | 34 | double *b) { |
| | 35 | int i, j; |
| | 36 | /* compute b = al*a*x + bt*b */ |
| | 37 | for (i = 0; i < n; i++) { |
| | 38 | b[i] *= bt; |
| | 39 | for (j = 0; j < m; j++) { |
| | 40 | b[i] += al * a[i + m * j] * x[j]; |
| | 41 | } |
| | 42 | } |
| | 43 | /* |
| | 44 | for (i = 0; i < n; i++) { |
| | 45 | b[i] *= bt; |
| | 46 | } |
| | 47 | for (j = 0; j < m; j++) { |
| | 48 | for (i = 0; i < n; i++) { |
| | 49 | b[i] += al * a[i + m * j] * x[j]; |
| | 50 | } |
| | 51 | } |
| | 52 | */ |
| | 53 | } |
| | 54 | int main(void) { |
| | 55 | int i, j, n, m, inc; |
| | 56 | double *xvec; |
| | 57 | double *bvec; |
| | 58 | double *amat; |
| | 59 | double alpha, beta; |
| | 60 | #ifdef _OPENMP |
| | 61 | double tsomp, teomp; |
| | 62 | #endif |
| | 63 | clock_t ts, te; |
| | 64 | |
| | 65 | /* alloc vector and matrix */ |
| | 66 | #ifdef MKL_ILP64 |
| | 67 | xvec = (double *)mkl_malloc(SIZE * sizeof(double), 64); |
| | 68 | bvec = (double *)mkl_malloc(SIZE * sizeof(double), 64); |
| | 69 | amat = (double *)mkl_malloc(SIZE * SIZE * sizeof(double), 64); |
| | 70 | #else |
| | 71 | xvec = (double *)calloc(SIZE, sizeof(double)); |
| | 72 | bvec = (double *)calloc(SIZE, sizeof(double)); |
| | 73 | amat = (double *)calloc(SIZE * SIZE, sizeof(double)); |
| | 74 | #endif |
| | 75 | if ((xvec == NULL ) || (bvec == NULL ) || (amat == NULL )) { |
| | 76 | exit(-1); |
| | 77 | } |
| | 78 | |
| | 79 | /* setup matrix A */ |
| | 80 | for (i = 0; i < SIZE; i++) { |
| | 81 | for (j = 0; j < SIZE; j++) { |
| | 82 | amat[i + SIZE * j] = i + 100.0 / (1.0 + fabs((double) i - (double) j)); |
| | 83 | } |
| | 84 | } |
| | 85 | |
| | 86 | /* setup vector x */ |
| | 87 | for (i = 0; i < SIZE; i++) |
| | 88 | xvec[i] = (double) i; |
| | 89 | |
| | 90 | /* calculate bvec = alpha*amat*xvec + beta*bvec */ |
| | 91 | alpha = 1.0; |
| | 92 | beta = 0.0; |
| | 93 | |
| | 94 | /* call my dgemv */ |
| | 95 | ts = clock(); |
| | 96 | mydgemv(SIZE, SIZE, alpha, amat, xvec, beta, bvec); |
| | 97 | te = clock(); |
| | 98 | |
| | 99 | printf("|b|=%f\n", cblas_dnrm2(SIZE, bvec, 1)); |
| | 100 | printf("Time : my dgemv = %f (sec)\n", 1.0 * (te - ts) / CLOCKS_PER_SEC); |
| | 101 | |
| | 102 | /* call blas dgemv */ |
| | 103 | #ifdef _OPENMP |
| | 104 | tsomp = omp_get_wtime(); |
| | 105 | #else |
| | 106 | ts = clock(); |
| | 107 | #endif |
| | 108 | |
| | 109 | cblas_dgemv(CblasColMajor, CblasNoTrans, SIZE, SIZE, 1.0, amat, SIZE, xvec,1, 0.0, bvec, 1); |
| | 110 | //cblas_dgemv(CblasRowMajor,CblasTrans,SIZE,SIZE,1.0,amat,SIZE,xvec,1,0.0,bvec,1); |
| | 111 | printf("|b|=%f\n", cblas_dnrm2(SIZE, bvec, 1)); |
| | 112 | #ifdef _OPENMP |
| | 113 | teomp = omp_get_wtime(); |
| | 114 | printf("Time : cblas dgemv = %f (sec) Threads=%d\n", (teomp - tsomp),omp_get_max_threads()); |
| | 115 | #else |
| | 116 | te = clock(); |
| | 117 | printf("Time : cblas dgemv = %f (sec)\n", 1.0 * (te - ts) / CLOCKS_PER_SEC); |
| | 118 | #endif |
| | 119 | |
| | 120 | |
| | 121 | |
| | 122 | #ifdef MKL_ILP64 |
| | 123 | mkl_free(xvec); |
| | 124 | mkl_free(bvec); |
| | 125 | mkl_free(amat); |
| | 126 | #else |
| | 127 | free(xvec); |
| | 128 | free(bvec); |
| | 129 | free(amat); |
| | 130 | #endif |
| | 131 | } |
| | 132 | }}} |
| | 133 | |
| | 134 | == C++ == |
| | 135 | {{{#!c++ |
| | 136 | // |
| | 137 | // Tulane HPC Workshop |
| | 138 | // |
| | 139 | // Blas Level 2 : dgemv test |
| | 140 | // |
| | 141 | // |
| | 142 | #include <iostream> |
| | 143 | #include <cstdlib> |
| | 144 | #include <fstream> |
| | 145 | #include <iomanip> |
| | 146 | #include <algorithm> |
| | 147 | #include <cmath> |
| | 148 | #include <ctime> |
| | 149 | #ifdef __APPLE__ |
| | 150 | #include <Accelerate/Accelerate.h> |
| | 151 | #else |
| | 152 | #ifdef MKL_ILP64 |
| | 153 | #include <mkl.h> |
| | 154 | #include <mkl_cblas.h> |
| | 155 | #else |
| | 156 | extern "C" { |
| | 157 | #include <atlas/cblas.h> |
| | 158 | } |
| | 159 | #endif |
| | 160 | #endif |
| | 161 | |
| | 162 | #ifdef OMP |
| | 163 | #include <omp.h> |
| | 164 | #endif |
| | 165 | |
| | 166 | /* my simple dgemv */ |
| | 167 | void mydgemv(int m, int n, double al, double *a, double *x, double bt, |
| | 168 | double *b) { |
| | 169 | /* compute b = al*a*x + bt*b */ |
| | 170 | for (int i = 0; i < n; i++) { |
| | 171 | b[i] *= bt; |
| | 172 | for (int j = 0; j < m; j++) { |
| | 173 | b[i] += al * a[i + m * j] * x[j]; |
| | 174 | } |
| | 175 | } |
| | 176 | /* |
| | 177 | // should test which routine is faster |
| | 178 | for (int i = 0 ; i < n ; i++){ |
| | 179 | b[i] *= bt; |
| | 180 | } |
| | 181 | for (int j = 0 ; j < m ; j++){ |
| | 182 | for (int i = 0 ; i < n ; i++){ |
| | 183 | b[i] += al * a[i + m * j] * x[j]; |
| | 184 | } |
| | 185 | } |
| | 186 | */ |
| | 187 | } |
| | 188 | |
| | 189 | int main(int argc, char **argv) { |
| | 190 | const unsigned int size = 10000; |
| | 191 | |
| | 192 | /* alloc vector and matrix */ |
| | 193 | #ifdef MKL_ILP64 |
| | 194 | double *xvec = (double *)mkl_malloc(size * sizeof(double), 64); |
| | 195 | double *bvec = (double *)mkl_malloc(size * sizeof(double), 64); |
| | 196 | double *amat = (double *)mkl_malloc(size * size * sizeof(double), 64); |
| | 197 | #else |
| | 198 | double *xvec = new double[size]; |
| | 199 | double *bvec = new double[size]; |
| | 200 | double *amat = new double[size * size]; |
| | 201 | #endif |
| | 202 | /* setup matrix A */ |
| | 203 | for (int i = 0; i < size; i++) { |
| | 204 | for (int j = 0; j < size; j++) { |
| | 205 | amat[i + size * j] = i + 100.0 / (1.0 + std::abs(i - j)); |
| | 206 | } |
| | 207 | } |
| | 208 | |
| | 209 | /* setup vector x */ |
| | 210 | for (int i = 0; i < size; i++) |
| | 211 | xvec[i] = i; |
| | 212 | |
| | 213 | #ifdef _OPENMP |
| | 214 | double tsomp, teomp; |
| | 215 | #endif |
| | 216 | clock_t ts, te; |
| | 217 | |
| | 218 | /* call my dgemv */ |
| | 219 | ts = std::clock(); |
| | 220 | mydgemv(size, size, 1.0, amat, xvec, 0.0, bvec); |
| | 221 | te = std::clock(); |
| | 222 | std::cout << "|b|=" << cblas_dnrm2(size, bvec, 1) << std::endl; |
| | 223 | std::cout << "Time : my dgemv = " << 1.0 * (te - ts) / CLOCKS_PER_SEC << "(sec)\n"; |
| | 224 | |
| | 225 | /* call blas dgemv */ |
| | 226 | #ifdef _OPENMP |
| | 227 | tsomp = omp_get_wtime(); |
| | 228 | #else |
| | 229 | ts = std::clock(); |
| | 230 | #endif |
| | 231 | cblas_dgemv(CblasColMajor, CblasNoTrans, size, size, 1.0, amat, size, xvec, 1, 0.0, bvec, 1); |
| | 232 | //cblas_dgemv(CblasRowMajor,CblasTrans,size,size,1.0,amat,size,xvec,1,0.0,bvec,1); |
| | 233 | std::cout << "|b|=" << cblas_dnrm2(size,bvec,1) << std::endl; |
| | 234 | #ifdef _OPENMP |
| | 235 | teomp = omp_get_wtime(); |
| | 236 | std::cout << "Time : cblas dgemv = " << (teomp - tsomp) << "(sec) Threads=" << omp_get_max_threads() << std::endl; |
| | 237 | #else |
| | 238 | te = std::clock(); |
| | 239 | std::cout << "Time : cblas dgemv = " << 1.0 * (te - ts) / CLOCKS_PER_SEC << "(sec)\n"; |
| | 240 | #endif |
| | 241 | |
| | 242 | #ifdef MKL_ILP64 |
| | 243 | mkl_free(xvec); |
| | 244 | mkl_free(bvec); |
| | 245 | mkl_free(amat); |
| | 246 | #else |
| | 247 | delete[] xvec; |
| | 248 | delete[] bvec; |
| | 249 | delete[] amat; |
| | 250 | #endif |
| | 251 | } |
| | 252 | }}} |
| | 253 | |
| | 254 | == Fortran == |
| | 255 | |
| | 256 | {{{#!fortran |
| | 257 | ! |
| | 258 | ! Tulane HPC Workshop |
| | 259 | ! |
| | 260 | ! Blas Level 2 : dgemv test |
| | 261 | ! |
| | 262 | ! |
| | 263 | !------------------------------------------- |
| | 264 | ! my simple dgemv |
| | 265 | subroutine mydgemv(m, n, al, a, x, bt, b) |
| | 266 | implicit none |
| | 267 | integer :: i, j, n, m |
| | 268 | double precision :: x(m), b(n), a(n * m) |
| | 269 | double precision :: al, bt |
| | 270 | |
| | 271 | ! compute b = al*a*x + bt*b */ |
| | 272 | do i = 1,n |
| | 273 | b(i) = b(i) * bt |
| | 274 | do j = 1, m |
| | 275 | b(i) = b(i) + al * a(i + m * (j - 1)) * x(j) |
| | 276 | end do |
| | 277 | end do |
| | 278 | end subroutine mydgemv |
| | 279 | ! |
| | 280 | Program blas2 |
| | 281 | use omp_lib !Provides OpenMP* specific APIs |
| | 282 | implicit none |
| | 283 | integer, parameter:: SIZE = 5000 |
| | 284 | integer :: i, j |
| | 285 | double precision, allocatable, dimension(:) :: xvec,bvec,amat |
| | 286 | double precision :: ts, te |
| | 287 | double precision :: dnrm2 |
| | 288 | |
| | 289 | ! alloc vector and matrix |
| | 290 | allocate(xvec(SIZE),bvec(SIZE),amat(SIZE*SIZE)) |
| | 291 | |
| | 292 | ! setup matrix A |
| | 293 | do i = 1,SIZE |
| | 294 | do j = 1, SIZE |
| | 295 | amat(i + SIZE * (j - 1)) = i - 1 + 100.0 / (1.0 + abs(i - j)) |
| | 296 | end do |
| | 297 | end do |
| | 298 | |
| | 299 | ! setup vector x |
| | 300 | do i = 1,SIZE |
| | 301 | xvec(i) = i - 1 |
| | 302 | end do |
| | 303 | |
| | 304 | ! call my dgemv |
| | 305 | ts = omp_get_wtime() |
| | 306 | call mydgemv(SIZE, SIZE, 1.d0, amat, xvec, 0.d0, bvec); |
| | 307 | te = omp_get_wtime() |
| | 308 | print*, "|b|=", dnrm2(SIZE, bvec, 1) |
| | 309 | print*, "Time : my dgemv = ", (te - ts),"(sec)" |
| | 310 | |
| | 311 | ! call blas dgemv |
| | 312 | ts = omp_get_wtime(); |
| | 313 | call dgemv('N', SIZE, SIZE, 1.d0, amat, size, xvec,1, 0.d0, bvec, 1) |
| | 314 | te = omp_get_wtime(); |
| | 315 | print*, "|b|=", dnrm2(SIZE, bvec, 1) |
| | 316 | print*, "Time : blas dgemv = ", (te - ts),"(sec)" |
| | 317 | |
| | 318 | deallocate(xvec,bvec,amat) |
| | 319 | end Program blas2 |
| | 320 | }}} |