| | 1 | = Blas Level 1 = |
| | 2 | |
| | 3 | == C == |
| | 4 | {{{#!c |
| | 5 | /* |
| | 6 | Tulane HPC Workshop |
| | 7 | |
| | 8 | Blas Level 1 : norm2 test |
| | 9 | */ |
| | 10 | #include<stdio.h> |
| | 11 | #include<stdlib.h> |
| | 12 | #include<math.h> |
| | 13 | #include<time.h> |
| | 14 | |
| | 15 | #ifdef __APPLE__ |
| | 16 | #include <Accelerate/Accelerate.h> |
| | 17 | #else |
| | 18 | #ifdef MKL_ILP64 |
| | 19 | #include <mkl.h> |
| | 20 | #include <mkl_cblas.h> |
| | 21 | #else |
| | 22 | #include <atlas/cblas.h> |
| | 23 | #endif |
| | 24 | #endif |
| | 25 | |
| | 26 | #ifdef _OPENMP |
| | 27 | #include <omp.h> |
| | 28 | #endif |
| | 29 | |
| | 30 | #define SIZE 50000000 |
| | 31 | |
| | 32 | /* my norm2 */ |
| | 33 | double mynorm2(int n, double *x) { |
| | 34 | int i; |
| | 35 | double nrm = 0.0; |
| | 36 | for (i = 0; i < n; i++) { |
| | 37 | nrm += x[i] * x[i]; |
| | 38 | } |
| | 39 | return sqrt(nrm); |
| | 40 | } |
| | 41 | |
| | 42 | int main(int argc, char **argv) { |
| | 43 | int i, n, m; |
| | 44 | double *xvec; |
| | 45 | double nrm; |
| | 46 | #ifdef _OPENMP |
| | 47 | double tsomp, teomp; |
| | 48 | #endif |
| | 49 | clock_t ts, te; |
| | 50 | |
| | 51 | |
| | 52 | /* alloc vector and setting */ |
| | 53 | #ifdef MKL_ILP64 |
| | 54 | xvec = (double *)mkl_malloc(SIZE * sizeof(double),64); |
| | 55 | #else |
| | 56 | xvec = (double *)calloc(SIZE, sizeof(double)); |
| | 57 | #endif |
| | 58 | if (xvec == NULL) |
| | 59 | exit(-1); |
| | 60 | for (i = 0; i < SIZE; i++) |
| | 61 | xvec[i] = (double) (i % 100) / 50.0; |
| | 62 | |
| | 63 | /* call my norm 2*/ |
| | 64 | ts = clock(); |
| | 65 | nrm = mynorm2(SIZE, xvec); |
| | 66 | te = clock() ; |
| | 67 | printf(" my norm |x|=%e time=%f (msec)\n", nrm, 1000.0 * (te - ts) / CLOCKS_PER_SEC); |
| | 68 | |
| | 69 | /* call blas norm2 */ |
| | 70 | #ifdef _OPENMP |
| | 71 | tsomp = omp_get_wtime(); |
| | 72 | #else |
| | 73 | ts = clock(); |
| | 74 | #endif |
| | 75 | |
| | 76 | nrm = cblas_dnrm2(SIZE, xvec, 1); |
| | 77 | |
| | 78 | #ifdef _OPENMP |
| | 79 | teomp = omp_get_wtime(); |
| | 80 | printf("blas norm |x|=%e time=%f (msec) threads=%d\n",nrm,1000.0 * (teomp - tsomp), omp_get_max_threads()); |
| | 81 | #else |
| | 82 | te = clock(); |
| | 83 | printf("blas norm |x|=%e time=%f (msec)\n",nrm,1000.0 * (te - ts) / CLOCKS_PER_SEC); |
| | 84 | #endif |
| | 85 | |
| | 86 | |
| | 87 | /* cleanup */ |
| | 88 | #ifdef MKL_ILP64 |
| | 89 | mkl_free(xvec); |
| | 90 | #else |
| | 91 | free(xvec); |
| | 92 | #endif |
| | 93 | } |
| | 94 | }}} |
| | 95 | |
| | 96 | == C++ == |
| | 97 | {{{#!c++ |
| | 98 | // |
| | 99 | // Tulane HPC Workshop |
| | 100 | // |
| | 101 | // Blas Level 1 : norm2 test |
| | 102 | // |
| | 103 | |
| | 104 | #include <iostream> |
| | 105 | #include <cstdlib> |
| | 106 | #include <fstream> |
| | 107 | #include <iomanip> |
| | 108 | #include <algorithm> |
| | 109 | #include <cmath> |
| | 110 | #include <ctime> |
| | 111 | |
| | 112 | #ifdef __APPLE__ |
| | 113 | #include <Accelerate/Accelerate.h> |
| | 114 | #else |
| | 115 | #ifdef MKL_ILP64 |
| | 116 | #include <mkl.h> |
| | 117 | #include <mkl_cblas.h> |
| | 118 | #else |
| | 119 | extern "C" { |
| | 120 | #include <atlas/cblas.h> |
| | 121 | } |
| | 122 | #endif |
| | 123 | #endif |
| | 124 | |
| | 125 | #ifdef _OPENMP |
| | 126 | #include <omp.h> |
| | 127 | #endif |
| | 128 | |
| | 129 | /* my norm2 */ |
| | 130 | double mynorm2(unsigned int n,double *x){ |
| | 131 | double nrm = 0.0; |
| | 132 | for (unsigned int i = 0 ; i < n ; i++){ |
| | 133 | nrm += x[i] * x[i]; |
| | 134 | } |
| | 135 | return std::sqrt(nrm); |
| | 136 | } |
| | 137 | |
| | 138 | int main(int argc, char **argv){ |
| | 139 | const unsigned int size= 50000000; |
| | 140 | double nrm; |
| | 141 | |
| | 142 | std::cout << std::setprecision(16); |
| | 143 | |
| | 144 | /* alloc vector and setting */ |
| | 145 | #ifdef MKL_ILP64 |
| | 146 | double *xvec = (double *)mkl_malloc(size * sizeof(double), 64); |
| | 147 | #else |
| | 148 | double *xvec = new double[size]; |
| | 149 | #endif |
| | 150 | for (int i = 0 ; i < size ; i++) xvec[i] = (i % 100) / 50.0; |
| | 151 | |
| | 152 | #ifdef _OPENMP |
| | 153 | double tsomp, teomp; |
| | 154 | #endif |
| | 155 | clock_t ts, te; |
| | 156 | |
| | 157 | /* call my norm 2*/ |
| | 158 | ts = std::clock(); |
| | 159 | nrm = mynorm2(size,xvec); |
| | 160 | te = std::clock(); |
| | 161 | std::cout << "MY NORM=" << nrm << std::endl; |
| | 162 | std::cout << "Time cost for my norm = " << 1000.0 * (te - ts) / CLOCKS_PER_SEC << "(msec)\n"; |
| | 163 | |
| | 164 | /* call blas norm2 */ |
| | 165 | #ifdef _OPENMP |
| | 166 | tsomp = omp_get_wtime(); |
| | 167 | #else |
| | 168 | ts = std::clock(); |
| | 169 | #endif |
| | 170 | |
| | 171 | nrm = cblas_dnrm2(size,xvec,1); |
| | 172 | std::cout << "BLAS NORM=" << nrm << std::endl; |
| | 173 | |
| | 174 | #ifdef _OPENMP |
| | 175 | teomp = omp_get_wtime(); |
| | 176 | std::cout << "Time cost for cblas norm = " << 1000.0 * (teomp - tsomp) << "(msec) Threads=" << omp_get_max_threads() << std::endl; |
| | 177 | #else |
| | 178 | te = std::clock(); |
| | 179 | std::cout << "Time cost for cblas norm = " << 1000.0 * (te - ts) / CLOCKS_PER_SEC << "(msec)\n"; |
| | 180 | #endif |
| | 181 | |
| | 182 | |
| | 183 | |
| | 184 | #ifdef MKL_ILP64 |
| | 185 | mkl_free(xvec); |
| | 186 | #else |
| | 187 | delete [] xvec; |
| | 188 | #endif |
| | 189 | } |
| | 190 | }}} |
| | 191 | |
| | 192 | == Fortran == |
| | 193 | {{{#!fortran |
| | 194 | ! |
| | 195 | ! |
| | 196 | ! Blas Level 1 : norm2 test |
| | 197 | ! |
| | 198 | !------------------------------------- |
| | 199 | ! my norm2 |
| | 200 | double precision function mynorm2(n,x) |
| | 201 | implicit none |
| | 202 | integer :: i, n |
| | 203 | double precision :: x(n) |
| | 204 | double precision :: nrm |
| | 205 | |
| | 206 | nrm = 0.d0 |
| | 207 | do i = 1,n |
| | 208 | nrm = nrm + x(i) * x(i) |
| | 209 | end do |
| | 210 | |
| | 211 | mynorm2 = sqrt(nrm) |
| | 212 | end function mynorm2 |
| | 213 | ! |
| | 214 | ! |
| | 215 | program Blas1 |
| | 216 | use omp_lib !Provides OpenMP* specific APIs |
| | 217 | implicit none |
| | 218 | integer, parameter:: SIZE = 50000000 |
| | 219 | integer :: i, n, m |
| | 220 | double precision, allocatable, dimension(:) :: xvec |
| | 221 | double precision :: nrm, ts, te |
| | 222 | double precision :: dnrm2, mynorm2 |
| | 223 | |
| | 224 | ! alloc vector and setting |
| | 225 | allocate(xvec(SIZE)) |
| | 226 | do i = 1,SIZE |
| | 227 | xvec(i) = dble (mod(i,100)) / 50.0 |
| | 228 | end do |
| | 229 | |
| | 230 | ! call my norm |
| | 231 | ts = omp_get_wtime() |
| | 232 | nrm = mynorm2(SIZE, xvec) |
| | 233 | te = omp_get_wtime(); |
| | 234 | print*, "my norm |x|=",nrm, " time=", 1000.0 * (te - ts),"(msec)" |
| | 235 | |
| | 236 | ! call blas norm2 |
| | 237 | ts = omp_get_wtime() |
| | 238 | nrm = dnrm2(SIZE, xvec, 1) |
| | 239 | te = omp_get_wtime() |
| | 240 | print*, "blas norm |x|=",nrm, " time=", 1000.0 * (te - ts),"(msec)" |
| | 241 | |
| | 242 | deallocate(xvec) |
| | 243 | end program Blas1 |
| | 244 | }}} |