| | 1 | [[PageOutline]] |
| | 2 | = Code Debugging = |
| | 3 | Ideally, code should be debuged on your desktop computer before being moved to a cluster environment. There are a number of debugging techniques, which you can learn from the internet. |
| | 4 | |
| | 5 | == print == |
| | 6 | Insert 'print' into the source code. |
| | 7 | |
| | 8 | in C/C++ |
| | 9 | {{{#!c |
| | 10 | /* check */ |
| | 11 | #ifdef DEBUG |
| | 12 | if (info == 0) printf("successfully done\n"); |
| | 13 | #endif |
| | 14 | }}} |
| | 15 | |
| | 16 | in Fortran |
| | 17 | {{{#!fortran |
| | 18 | #ifdef debug |
| | 19 | if (info == 0) then |
| | 20 | print *,"successfully done" |
| | 21 | endif |
| | 22 | #endif |
| | 23 | }}} |
| | 24 | |
| | 25 | Compile with ''-DDEBUG'' option |
| | 26 | {{{ |
| | 27 | icc -g -pg -DDEBUG -c stokeslet2d.c |
| | 28 | }}} |
| | 29 | |
| | 30 | Makefile |
| | 31 | {{{#!make |
| | 32 | # |
| | 33 | # CCS WORKSHOP |
| | 34 | # Stokes Flow in a Cavity |
| | 35 | # |
| | 36 | # Makefile |
| | 37 | # |
| | 38 | # |
| | 39 | TARGET = ex32s ex32m |
| | 40 | # |
| | 41 | ALL: $(TARGET) |
| | 42 | # |
| | 43 | CC = icc |
| | 44 | |
| | 45 | #CFLAGS = -O3 |
| | 46 | CFLAGS = -g -pg -DDEBUG |
| | 47 | # |
| | 48 | # |
| | 49 | # |
| | 50 | SRC_EX32c = ex32.c stokeslet2d.c gmres.c |
| | 51 | # |
| | 52 | # |
| | 53 | MKL_SQ_LIBS = -L$(MKLROOT)/lib/intel64/ \ |
| | 54 | -I$(MKLROOT)/mkl/include \ |
| | 55 | -Wl,--start-group \ |
| | 56 | $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a \ |
| | 57 | $(MKLROOT)/lib/intel64/libmkl_sequential.a \ |
| | 58 | $(MKLROOT)/lib/intel64/libmkl_core.a \ |
| | 59 | -Wl,--end-group \ |
| | 60 | -lpthread |
| | 61 | # |
| | 62 | MKL_MT_LIBS = -L$(MKLROOT)/lib/intel64/ \ |
| | 63 | -I$(MKLROOT)/mkl/include \ |
| | 64 | -Wl,--start-group \ |
| | 65 | $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a \ |
| | 66 | $(MKLROOT)/lib/intel64/libmkl_intel_thread.a \ |
| | 67 | $(MKLROOT)/lib/intel64/libmkl_core.a \ |
| | 68 | -Wl,--end-group \ |
| | 69 | -liomp5 \ |
| | 70 | -lpthread |
| | 71 | # |
| | 72 | # |
| | 73 | # |
| | 74 | OBJ_EX32c = $(SRC_EX32c:.c=.o) |
| | 75 | # |
| | 76 | # |
| | 77 | ex32s : $(OBJ_EX32c) |
| | 78 | $(CC) $(CFLAGS) -o $@ $(OBJ_EX32c) $(MKL_SQ_LIBS) |
| | 79 | |
| | 80 | ex32m : $(OBJ_EX32c) |
| | 81 | $(CC) $(CFLAGS) -o $@ $(OBJ_EX32c) $(MKL_MT_LIBS) |
| | 82 | |
| | 83 | # |
| | 84 | # |
| | 85 | %.o : %.c |
| | 86 | $(CC) $(CFLAGS) -c $< |
| | 87 | |
| | 88 | # |
| | 89 | clean: |
| | 90 | rm -f *.o $(TARGET) |
| | 91 | }}} |
| | 92 | |
| | 93 | == GDB == |
| | 94 | GDB is the standard debugger. |
| | 95 | [http://www.gnu.org/software/gdb/documentation/] |
| | 96 | |
| | 97 | To debug with '''GDB''', submit an interactive job. [[https://wiki.hpc.tulane.edu/trac/wiki/cypress/using#SubmittingInteractiveJobs|See here]] |
| | 98 | |
| | 99 | Compiling with '''-g''' option |
| | 100 | {{{ |
| | 101 | icc -g -pg -DDEBUG -c stokeslet2d.c |
| | 102 | }}} |
| | 103 | run '''gdb''' |
| | 104 | {{{ |
| | 105 | user@host>gdb ./ex32s |
| | 106 | GNU gdb (GDB) Red Hat Enterprise Linux (7.2-56.el6) |
| | 107 | Copyright (C) 2010 Free Software Foundation, Inc. |
| | 108 | License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html> |
| | 109 | This is free software: you are free to change and redistribute it. |
| | 110 | There is NO WARRANTY, to the extent permitted by law. Type "show copying" |
| | 111 | and "show warranty" for details. |
| | 112 | This GDB was configured as "x86_64-redhat-linux-gnu". |
| | 113 | For bug reporting instructions, please see: |
| | 114 | <http://www.gnu.org/software/gdb/bugs/>... |
| | 115 | Reading symbols from /ccs-autofs/u01/fuji/LabWork/FlowInCavity/ex32s...done. |
| | 116 | (gdb) |
| | 117 | }}} |
| | 118 | |
| | 119 | show source by command, list '''line#''' |
| | 120 | {{{ |
| | 121 | (gdb) list 44 |
| | 122 | 39 printf("Usage:%s [Depth of Cavity]\n",argv[0]); |
| | 123 | 40 exit(-1); |
| | 124 | 41 } |
| | 125 | 42 |
| | 126 | 43 /* get inputed depth */ |
| | 127 | 44 dp = atof(argv[1]); |
| | 128 | 45 |
| | 129 | 46 /* # of particles in depth */ |
| | 130 | 47 numpdepth = (int)(dp / EPSILON + 0.5); |
| | 131 | 48 |
| | 132 | (gdb) |
| | 133 | }}} |
| | 134 | set breakpoint by command, '''b line#''' |
| | 135 | {{{ |
| | 136 | (gdb) b 47 |
| | 137 | Breakpoint 1 at 0x4044c2: file ex32.c, line 47. |
| | 138 | (gdb) |
| | 139 | }}} |
| | 140 | '''run [command line option]''' |
| | 141 | {{{ |
| | 142 | (gdb) run 5 |
| | 143 | Starting program: /ccs-autofs/u01/fuji/LabWork/FlowInCavity/ex32s 1 |
| | 144 | [Thread debugging using libthread_db enabled] |
| | 145 | |
| | 146 | Breakpoint 1, main (argc=2, argv=0x7fffffffd5c8) at ex32.c:47 |
| | 147 | 47 numpdepth = (int)(dp / EPSILON + 0.5); |
| | 148 | Missing separate debuginfos, use: debuginfo-install glibc-2.12-1.80.el6_3.3.x86_64 |
| | 149 | (gdb) |
| | 150 | }}} |
| | 151 | |
| | 152 | print values |
| | 153 | {{{ |
| | 154 | (gdb) p dp |
| | 155 | $1 = 5 |
| | 156 | (gdb) p numpdepth |
| | 157 | $2 = 0 |
| | 158 | (gdb) |
| | 159 | }}} |
| | 160 | |
| | 161 | continue one step |
| | 162 | {{{ |
| | 163 | (gdb) next |
| | 164 | 50 numpwidth = (int)(1.0 / EPSILON + 0.5); |
| | 165 | (gdb) p numpdepth |
| | 166 | $3 = 1000 |
| | 167 | (gdb) |
| | 168 | }}} |
| | 169 | |
| | 170 | exit |
| | 171 | {{{ |
| | 172 | (gdb) quit |
| | 173 | A debugging session is active. |
| | 174 | |
| | 175 | Inferior 1 [process 6222] will be killed. |
| | 176 | |
| | 177 | Quit anyway? (y or n) y |
| | 178 | }}} |
| | 179 | |
| | 180 | == Valgrind == |
| | 181 | [http://valgrind.org/] |
| | 182 | |
| | 183 | ''Valgrind'' tools can detect many memory management and threading bugs, and profile your programs in detail. |
| | 184 | |
| | 185 | |
| | 186 | === Detect Invalid Access === |
| | 187 | |
| | 188 | Example code: (this code has a bug) |
| | 189 | {{{#!c |
| | 190 | #include <stdio.h> |
| | 191 | #include <stdlib.h> |
| | 192 | #include <string.h> |
| | 193 | |
| | 194 | char * foo() { |
| | 195 | char a[200]; |
| | 196 | strcpy(a, "hello world cup\n"); |
| | 197 | return a; |
| | 198 | } |
| | 199 | |
| | 200 | int main() { |
| | 201 | char * a = foo(); |
| | 202 | char c = a[0]; |
| | 203 | printf("a[0] = %c\n", c); |
| | 204 | printf("a = %s\n", a); |
| | 205 | return 0; |
| | 206 | } |
| | 207 | }}} |
| | 208 | |
| | 209 | Start an interactive session, |
| | 210 | {{{#!bash |
| | 211 | [fuji@cypress2 ~]$ idev -c 1 --gres=mic:0 |
| | 212 | Requesting 1 node(s) task(s) to workshop queue of workshop partition |
| | 213 | 1 task(s)/node, 1 cpu(s)/task, mic:0 MIC device(s)/node |
| | 214 | Time: 0 (hr) 60 (min). |
| | 215 | Submitted batch job 52605 |
| | 216 | JOBID=52605 begin on cypress01-089 |
| | 217 | --> Creating interactive terminal session (login) on node cypress01-089. |
| | 218 | --> You have 0 (hr) 60 (min). |
| | 219 | Last login: Wed Aug 19 21:05:45 2015 from cypress2.cm.cluster |
| | 220 | [fuji@cypress01-089 ~]$ |
| | 221 | }}} |
| | 222 | compile and run, |
| | 223 | {{{#!bash |
| | 224 | [fuji@cypress01-089 ~]$ module load intel-psxe/2015-update1 |
| | 225 | [fuji@cypress01-089 ~]$ icc off_stack.c |
| | 226 | off_stack.c(8): warning #1251: returning pointer to local variable |
| | 227 | return a; |
| | 228 | ^ |
| | 229 | |
| | 230 | [fuji@cypress01-089 ~]$ ./a.out |
| | 231 | a[0] = h |
| | 232 | a = hello world cup |
| | 233 | }}} |
| | 234 | |
| | 235 | {{{#!bash |
| | 236 | [fuji@cypress01-089 ~]$ icc -O0 -g off_stack.c |
| | 237 | off_stack.c(8): warning #1251: returning pointer to local variable |
| | 238 | return a; |
| | 239 | ^ |
| | 240 | |
| | 241 | [fuji@cypress01-089 ~]$ valgrind ./a.out |
| | 242 | ==33367== Memcheck, a memory error detector |
| | 243 | ==33367== Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al. |
| | 244 | ==33367== Using Valgrind-3.8.1 and LibVEX; rerun with -h for copyright info |
| | 245 | ==33367== Command: ./a.out |
| | 246 | ==33367== |
| | 247 | ==33367== Invalid read of size 1 |
| | 248 | ==33367== at 0x4005C5: main (off_stack.c:13) |
| | 249 | ==33367== Address 0x7feffdd50 is just below the stack ptr. To suppress, use: --workaround-gcc296-bugs=yes |
| | 250 | ==33367== |
| | 251 | a[0] = h |
| | 252 | a = |
| | 253 | ==33367== |
| | 254 | ==33367== HEAP SUMMARY: |
| | 255 | ==33367== in use at exit: 0 bytes in 0 blocks |
| | 256 | ==33367== total heap usage: 0 allocs, 0 frees, 0 bytes allocated |
| | 257 | ==33367== |
| | 258 | ==33367== All heap blocks were freed -- no leaks are possible |
| | 259 | ==33367== |
| | 260 | ==33367== For counts of detected and suppressed errors, rerun with: -v |
| | 261 | ==33367== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 6 from 6) |
| | 262 | [fuji@cypress01-089 ~]$ |
| | 263 | }}} |
| | 264 | |
| | 265 | === Detect Uninitialized Data Access === |
| | 266 | |
| | 267 | Example code: (this code has a bug) |
| | 268 | {{{#!c |
| | 269 | #include <stdio.h> |
| | 270 | #include <stdlib.h> |
| | 271 | |
| | 272 | int main() { |
| | 273 | double * p = malloc(sizeof(double) * 10); |
| | 274 | if (p[0] < 1) { |
| | 275 | printf("p[0] < 1\n"); |
| | 276 | } else { |
| | 277 | printf("p[1] >= 1\n"); |
| | 278 | } |
| | 279 | return 0; |
| | 280 | } |
| | 281 | }}} |
| | 282 | |
| | 283 | {{{#!bash |
| | 284 | [fuji@cypress01-089 Valgrind]$ icc uninit.c |
| | 285 | [fuji@cypress01-089 Valgrind]$ ./a.out |
| | 286 | p[0] < 1 |
| | 287 | [fuji@cypress01-089 Valgrind]$ icc -O0 -g uninit.c |
| | 288 | [fuji@cypress01-089 Valgrind]$ valgrind ./a.out |
| | 289 | ==34643== Memcheck, a memory error detector |
| | 290 | ==34643== Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al. |
| | 291 | ==34643== Using Valgrind-3.8.1 and LibVEX; rerun with -h for copyright info |
| | 292 | ==34643== Command: ./a.out |
| | 293 | ==34643== |
| | 294 | ==34643== Conditional jump or move depends on uninitialised value(s) |
| | 295 | ==34643== at 0x4005A9: main (uninit.c:6) |
| | 296 | ==34643== |
| | 297 | ==34643== Conditional jump or move depends on uninitialised value(s) |
| | 298 | ==34643== at 0x4005AB: main (uninit.c:6) |
| | 299 | ==34643== |
| | 300 | p[0] < 1 |
| | 301 | ==34643== |
| | 302 | ==34643== HEAP SUMMARY: |
| | 303 | ==34643== in use at exit: 80 bytes in 1 blocks |
| | 304 | ==34643== total heap usage: 1 allocs, 0 frees, 80 bytes allocated |
| | 305 | ==34643== |
| | 306 | ==34643== LEAK SUMMARY: |
| | 307 | ==34643== definitely lost: 80 bytes in 1 blocks |
| | 308 | ==34643== indirectly lost: 0 bytes in 0 blocks |
| | 309 | ==34643== possibly lost: 0 bytes in 0 blocks |
| | 310 | ==34643== still reachable: 0 bytes in 0 blocks |
| | 311 | ==34643== suppressed: 0 bytes in 0 blocks |
| | 312 | ==34643== Rerun with --leak-check=full to see details of leaked memory |
| | 313 | ==34643== |
| | 314 | ==34643== For counts of detected and suppressed errors, rerun with: -v |
| | 315 | ==34643== Use --track-origins=yes to see where uninitialised values come from |
| | 316 | ==34643== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 6 from 6) |
| | 317 | }}} |
| | 318 | |
| | 319 | |
| | 320 | === Detect Memory Leaks === |
| | 321 | |
| | 322 | Example code: (this code has a bug) |
| | 323 | |
| | 324 | {{{#!c++ |
| | 325 | #include <iostream> |
| | 326 | #include <cstring> |
| | 327 | |
| | 328 | char * foo() { |
| | 329 | char *a = new char[200]; |
| | 330 | std::strcpy(a, "hello workshop"); |
| | 331 | return a; |
| | 332 | } |
| | 333 | |
| | 334 | int main() { |
| | 335 | char * a = foo(); |
| | 336 | char * b = foo(); |
| | 337 | std::cout << "a = " << a << std::endl; |
| | 338 | std::cout << "b = " << b << std::endl; |
| | 339 | return 0; |
| | 340 | } |
| | 341 | }}} |
| | 342 | |
| | 343 | {{{#!bash |
| | 344 | [fuji@cypress1 TestCodes]$ icpc -g mleak.cpp |
| | 345 | [fuji@cypress1 TestCodes]$ valgrind --leak-check=full ./a.out |
| | 346 | ==10272== Memcheck, a memory error detector |
| | 347 | ==10272== Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al. |
| | 348 | ==10272== Using Valgrind-3.8.1 and LibVEX; rerun with -h for copyright info |
| | 349 | ==10272== Command: ./a.out |
| | 350 | ==10272== |
| | 351 | a = hello workshop |
| | 352 | b = hello workshop |
| | 353 | ==10272== |
| | 354 | ==10272== HEAP SUMMARY: |
| | 355 | ==10272== in use at exit: 400 bytes in 2 blocks |
| | 356 | ==10272== total heap usage: 2 allocs, 0 frees, 400 bytes allocated |
| | 357 | ==10272== |
| | 358 | ==10272== 200 bytes in 1 blocks are definitely lost in loss record 1 of 2 |
| | 359 | ==10272== at 0x4C28192: operator new[](unsigned long) (vg_replace_malloc.c:363) |
| | 360 | ==10272== by 0x4009D8: foo() (mleak.cpp:5) |
| | 361 | ==10272== by 0x400A0F: main (mleak.cpp:11) |
| | 362 | ==10272== |
| | 363 | ==10272== 200 bytes in 1 blocks are definitely lost in loss record 2 of 2 |
| | 364 | ==10272== at 0x4C28192: operator new[](unsigned long) (vg_replace_malloc.c:363) |
| | 365 | ==10272== by 0x4009D8: foo() (mleak.cpp:5) |
| | 366 | ==10272== by 0x400A20: main (mleak.cpp:12) |
| | 367 | ==10272== |
| | 368 | ==10272== LEAK SUMMARY: |
| | 369 | ==10272== definitely lost: 400 bytes in 2 blocks |
| | 370 | ==10272== indirectly lost: 0 bytes in 0 blocks |
| | 371 | ==10272== possibly lost: 0 bytes in 0 blocks |
| | 372 | ==10272== still reachable: 0 bytes in 0 blocks |
| | 373 | ==10272== suppressed: 0 bytes in 0 blocks |
| | 374 | ==10272== |
| | 375 | ==10272== For counts of detected and suppressed errors, rerun with: -v |
| | 376 | ==10272== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 6 from 6) |
| | 377 | }}} |
| | 378 | |
| | 379 | == Intel® Inspector XE == |
| | 380 | Memory and Thread Debugger: |
| | 381 | * Debug memory errors like leaks and allocation errors and threading errors like data races and deadlocks. |
| | 382 | |
| | 383 | ==== Setting Environment and Compiling your code ==== |
| | 384 | Load module to setup Intel compilers and tools. |
| | 385 | {{{#!bash |
| | 386 | [fuji@cypress1 ~]$ module load intel-psxe/2015-update1 |
| | 387 | }}} |
| | 388 | Compiling codes with '-g' option to tells the compiler to generate full debugging information in the object file. |
| | 389 | {{{#!bash |
| | 390 | [fuji@cypress1 ~]$ icc -g -o mytest mytest.c |
| | 391 | }}} |
| | 392 | |
| | 393 | ==== Run and Collect Information ==== |
| | 394 | Start an interactive job, |
| | 395 | {{{#!bash |
| | 396 | [fuji@cypress1 ~]$ idev |
| | 397 | }}} |
| | 398 | To collect information, run the code, for example, |
| | 399 | {{{#!bash |
| | 400 | [fuji@cypress1 ~]$ inspxe-cl -collect=mi2 -app-working-dir=$PWD -result-dir=$PWD/results $PWD/mytest |
| | 401 | }}} |
| | 402 | |
| | 403 | '''-collect=''' options |
| | 404 | |
| | 405 | Memory error analysis types |
| | 406 | ||= mi1 =|| Detect memory leaks || |
| | 407 | ||= mi2 =|| Detect memory leaks and memory access problems || |
| | 408 | ||= mi3 =|| Find locations of memory leaks and memory access problems || |
| | 409 | |
| | 410 | Threading error analysis_types |
| | 411 | ||= ti1 =|| Detect deadlocks || |
| | 412 | ||= ti2 =|| Detect deadlocks and data races || |
| | 413 | ||= ti3 =|| Find locations of deadlocks and data races || |
| | 414 | |
| | 415 | To show results, for example, |
| | 416 | {{{#!bash |
| | 417 | [fuji@cypress1 ~]$ inspxe-cl -R problems -r $PWD/results |
| | 418 | }}} |
| | 419 | See [https://software.intel.com/en-us/node/528226 here] for details. |
| | 420 | |
| | 421 | [[Inspector Brief Tutorial]] |
| | 422 | |
| | 423 | == Intel® Advisor XE == |
| | 424 | Threading design and prototyping tool for software architects: |
| | 425 | * Analyze, design, tune and check your threading design before implementation |
| | 426 | * Explore and test threading options without disrupting normal development |
| | 427 | * Predict threading errors & performance scaling on systems with more cores |
| | 428 | |
| | 429 | === Survey === |
| | 430 | Survey the application to determine hotspots. Typically an optimized |
| | 431 | (non-debug) version of the application is used when surveying an application. |
| | 432 | |
| | 433 | Run and Collect info. |
| | 434 | {{{#!bash |
| | 435 | $ icc -g -O3 mycode.c |
| | 436 | $ advixe-cl --collect survey --project-dir ./advi ./a.out |
| | 437 | }}} |
| | 438 | |
| | 439 | Show report |
| | 440 | {{{#!bash |
| | 441 | $ advixe-cl --report survey --project-dir ./advi ./a.out |
| | 442 | }}} |
| | 443 | |
| | 444 | === Add Annotations === |
| | 445 | Add annotations to the application source code, and rebuild the application. |
| | 446 | Please see the Getting Started Tutorial for more information. |
| | 447 | |
| | 448 | For C/C++ |
| | 449 | {{{#!c |
| | 450 | #include "advisor-annotate.h" |
| | 451 | ..... |
| | 452 | ANNOTATE_SITE_BEGIN(sitename1); |
| | 453 | for ( .... |
| | 454 | { |
| | 455 | ANNOTATE_TASK_BEGIN(taskname1); |
| | 456 | ... |
| | 457 | ANNOTATE_TASK_END(); |
| | 458 | } |
| | 459 | ANNOTATE_SITE_END(); |
| | 460 | }}} |
| | 461 | |
| | 462 | Fortran |
| | 463 | {{{#!fortran |
| | 464 | use advisor_annotate |
| | 465 | ..... |
| | 466 | call annotate_site_begin(sitename1) |
| | 467 | do ..... |
| | 468 | call annotate_task_begin(taskname1) |
| | 469 | .... |
| | 470 | call annotate_task_end() |
| | 471 | enddo |
| | 472 | call annotate_site_end() |
| | 473 | }}} |
| | 474 | |
| | 475 | === Suitability === |
| | 476 | Collect suitability data. Note that annotations must be present in the source |
| | 477 | code for this collection to be successful. Typically an optimized (non-debug) version |
| | 478 | of the application is used when collecting suitability data. |
| | 479 | |
| | 480 | {{{#!bash |
| | 481 | $ icc -g -O3 mycode.c -I $ADVISOR_XE_2015_DIR/include |
| | 482 | $ advixe-cl --collect suitability --project-dir ./advi ./a.out |
| | 483 | }}} |
| | 484 | |
| | 485 | {{{#!bash |
| | 486 | $ advixe-cl --report suitability --project-dir ./advi ./a.out |
| | 487 | }}} |
| | 488 | |
| | 489 | |
| | 490 | === Correctness === |
| | 491 | Collect correctness data. Note that annotations must be present in the source |
| | 492 | code for this collection to be successful. Typically an application with debug symbols |
| | 493 | is used when collecting correctness data. |
| | 494 | |
| | 495 | {{{#!bash |
| | 496 | $ icc -g -O0 mycode.c |
| | 497 | $ advixe-cl --collect correctness --project-dir ./advi ./a.out |
| | 498 | }}} |
| | 499 | |
| | 500 | {{{#!bash |
| | 501 | $ advixe-cl --report correctness --project-dir ./advi ./a.out |
| | 502 | }}} |
| | 503 | |
| | 504 | Display a list of annotations present. |
| | 505 | {{{#!bash |
| | 506 | advixe-cl --report annotations --project-dir ./advi ./a.out |
| | 507 | }}} |
| | 508 | Update the application using the chosen parallel coding constructs. Rebuild the application and test. |
| | 509 | |
| | 510 | [[Advisor Brief Tutorial]] |
| | 511 | |
| | 512 | == Intel® VTune™ Amplifier 2015 == |
| | 513 | * Intuitive CPU & GPU performance tuning, multi-core scalability, bandwidth and more |
| | 514 | * Quick performance insight with advanced data visualization |
| | 515 | * Automate regression tests and collect data remotely |
| | 516 | |
| | 517 | Compiling codes with '-g' option to tells the compiler to generate full debugging information in the object file. |
| | 518 | {{{#!bash |
| | 519 | [fuji@cypress1 ~]$ icc -g -o mytest mytest.c |
| | 520 | }}} |
| | 521 | |
| | 522 | ==== Run and Collect Information ==== |
| | 523 | Start an interactive job, |
| | 524 | {{{#!bash |
| | 525 | [fuji@cypress1 ~]$ idev |
| | 526 | }}} |
| | 527 | To collect information, run the code, for example, |
| | 528 | {{{#!bash |
| | 529 | [fuji@cypress1 ~]$ amplxe-cl -collect hotspot ./mytest |
| | 530 | }}} |
| | 531 | This will create a directory like '''r000hs'''. |
| | 532 | |
| | 533 | '''-collect ''' options |
| | 534 | |
| | 535 | ||= concurrency =|| Concurrency analysis || |
| | 536 | ||= hotspots =|| Hotspots analysis || |
| | 537 | ||= lightweight-hotspots =|| Lightweight Hotspots analysis || |
| | 538 | ||= locksandwaits =|| Locks and Waits analysis || |
| | 539 | |
| | 540 | To show results, for example, |
| | 541 | {{{#!bash |
| | 542 | [fuji@cypress1 ~]$ amplxe-cl -report hotspot -r r000hs |
| | 543 | }}} |
| | 544 | |
| | 545 | '''-report ''' options |
| | 546 | |
| | 547 | ||= summary =|| Display data for the overall performance of the target. || |
| | 548 | ||= hotspots =|| Display functions with the highest CPU time. || |
| | 549 | ||= wait-time =|| Display Wait time. || |
| | 550 | ||= perf =|| Display performance data for each module of the target. || |
| | 551 | ||= perf-detail =|| Display performance data for each function of the target. || |
| | 552 | ||= callstacks =|| Display CPU or Wait time for call stacks. || |
| | 553 | ||= top-down =|| Display a call tree for your target application and provide CPU and Wait time for each function. || |
| | 554 | ||= gprof-cc =|| Display CPU or wait time in the gprof-like format. || |
| | 555 | |
| | 556 | [[VTune Brief Tutorial]] |