GCC Code Coverage Report


Directory: src/
File: src/support/talp_output.c
Date: 2024-11-22 17:07:10
Exec Total Coverage
Lines: 539 594 90.7%
Functions: 39 41 95.1%
Branches: 165 237 69.6%

Line Branch Exec Source
1 /*********************************************************************************/
2 /* Copyright 2009-2024 Barcelona Supercomputing Center */
3 /* */
4 /* This file is part of the DLB library. */
5 /* */
6 /* DLB is free software: you can redistribute it and/or modify */
7 /* it under the terms of the GNU Lesser General Public License as published by */
8 /* the Free Software Foundation, either version 3 of the License, or */
9 /* (at your option) any later version. */
10 /* */
11 /* DLB is distributed in the hope that it will be useful, */
12 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
13 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
14 /* GNU Lesser General Public License for more details. */
15 /* */
16 /* You should have received a copy of the GNU Lesser General Public License */
17 /* along with DLB. If not, see <https://www.gnu.org/licenses/>. */
18 /*********************************************************************************/
19
20 #ifdef HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23
24 #include "support/talp_output.h"
25
26 #include "apis/dlb_talp.h"
27 #include "LB_core/DLB_talp.h"
28 #include "support/debug.h"
29 #include "support/gslist.h"
30 #include "support/mytime.h"
31 #include "support/perf_metrics.h"
32
33 #include <errno.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <limits.h>
38 #include <locale.h>
39 #include <pthread.h>
40
41
42 9 static float sanitized_ipc(float instructions, float cycles) {
43
3/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
9 if (instructions > 0 && cycles > 0) {
44 1 return instructions / cycles;
45 } else {
46 8 return 0.0f;
47 }
48 }
49
50
51 /*********************************************************************************/
52 /* Monitoring Region */
53 /*********************************************************************************/
54
55 7 void talp_output_print_monitoring_region(const dlb_monitor_t *monitor,
56 const char *cpuset_str, bool have_mpi, bool have_openmp, bool have_papi) {
57
58 char elapsed_time_str[16];
59 7 ns_to_human(elapsed_time_str, 16, monitor->elapsed_time);
60
61 7 info("################# Monitoring Region Summary #################");
62 7 info("### Name: %s", monitor->name);
63 7 info("### Elapsed Time: %s", elapsed_time_str);
64 7 info("### Useful time: %"PRId64" ns",
65 7 monitor->useful_time);
66
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 4 times.
7 if (have_mpi) {
67 3 info("### Not useful MPI: %"PRId64" ns",
68 3 monitor->mpi_time);
69 }
70
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.
7 if (have_openmp) {
71 info("### Not useful OMP Load Balance: %"PRId64" ns",
72 monitor->omp_load_imbalance_time);
73 info("### Not useful OMP Scheduling: %"PRId64" ns",
74 monitor->omp_scheduling_time);
75 info("### Not useful OMP Serialization: %"PRId64" ns",
76 monitor->omp_serialization_time);
77 }
78 7 info("### CpuSet: %s", cpuset_str);
79
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.
7 if (have_papi) {
80 float ipc = sanitized_ipc(monitor->instructions, monitor->cycles);
81 info("### IPC: %.2f ", ipc);
82 }
83
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 4 times.
7 if (have_mpi) {
84 3 info("### Number of MPI calls: %"PRId64,
85 3 monitor->num_mpi_calls);
86 }
87
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.
7 if (have_openmp) {
88 info("### Number of OpenMP parallels: %"PRId64,
89 monitor->num_omp_parallels);
90 info("### Number of OpenMP tasks: %"PRId64,
91 monitor->num_omp_tasks);
92 }
93 7 }
94
95
96 /*********************************************************************************/
97 /* POP Metrics */
98 /*********************************************************************************/
99
100 static GSList *pop_metrics_records = NULL;
101
102 20 void talp_output_record_pop_metrics(const dlb_pop_metrics_t *metrics) {
103
104 /* Copy structure */
105 20 dlb_pop_metrics_t *new_record = malloc(sizeof(dlb_pop_metrics_t));
106 20 *new_record = *metrics;
107
108 /* Add record to list */
109 20 pop_metrics_records = g_slist_prepend(pop_metrics_records, new_record);
110 20 }
111
112 14 static void pop_metrics_print(void) {
113
114 14 for (GSList *node = pop_metrics_records;
115
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 14 times.
28 node != NULL;
116 14 node = node->next) {
117
118 14 dlb_pop_metrics_t *record = node->data;
119
120
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 5 times.
14 if (record->elapsed_time > 0) {
121
122 9 float avg_ipc = sanitized_ipc(record->instructions, record->cycles);
123 char elapsed_time_str[16];
124 9 ns_to_human(elapsed_time_str, 16, record->elapsed_time);
125 9 info("############### Monitoring Region POP Metrics ###############");
126 9 info("### Name: %s", record->name);
127 9 info("### Elapsed Time: %s", elapsed_time_str);
128
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 8 times.
9 if (avg_ipc > 0.0f) {
129 1 info("### Average IPC: %1.2f", avg_ipc);
130 }
131
1/2
✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
9 if (record->mpi_parallel_efficiency > 0.0f &&
132
1/2
✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
9 record->omp_parallel_efficiency > 0.0f) {
133 9 info("### Parallel efficiency: %1.2f",
134 9 record->parallel_efficiency);
135 }
136
1/2
✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
9 if (record->mpi_parallel_efficiency > 0.0f) {
137 9 info("### MPI Parallel efficiency: %1.2f",
138 9 record->mpi_parallel_efficiency);
139 9 info("### - MPI Communication efficiency: %1.2f",
140 9 record->mpi_communication_efficiency);
141 9 info("### - MPI Load Balance: %1.2f",
142 9 record->mpi_load_balance);
143 9 info("### - MPI Load Balance in: %1.2f",
144 9 record->mpi_load_balance_in);
145 9 info("### - MPI Load Balance out: %1.2f",
146 9 record->mpi_load_balance_out);
147 }
148
1/2
✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
9 if (record->omp_parallel_efficiency > 0.0f) {
149 9 info("### OpenMP Parallel efficiency: %1.2f",
150 9 record->omp_parallel_efficiency);
151 9 info("### - OpenMP Load Balance: %1.2f",
152 9 record->omp_load_balance);
153 9 info("### - OpenMP Scheduling efficiency: %1.2f",
154 9 record->omp_scheduling_efficiency);
155 9 info("### - OpenMP Serialization efficiency: %1.2f",
156 9 record->omp_serialization_efficiency);
157 }
158 } else {
159 5 info("############### Monitoring Region POP Metrics ###############");
160 5 info("### Name: %s", record->name);
161 5 info("### No data ###");
162 }
163 }
164 14 }
165
166 1 static void pop_metrics_to_json(FILE *out_file) {
167
168
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (pop_metrics_records != NULL) {
169 1 fprintf(out_file,
170 " \"Application\": {\n");
171
172 1 for (GSList *node = pop_metrics_records;
173
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
174 1 node = node->next) {
175
176 1 dlb_pop_metrics_t *record = node->data;
177
178 1 fprintf(out_file,
179 " \"%s\": {\n"
180 " \"numCpus\": %d,\n"
181 " \"numMpiRanks\": %d,\n"
182 " \"numNodes\": %d,\n"
183 " \"avgCpus\": %.1f,\n"
184 " \"cycles\": %.0f,\n"
185 " \"instructions\": %.0f,\n"
186 " \"numMeasurements\": %"PRId64",\n"
187 " \"numMpiCalls\": %"PRId64",\n"
188 " \"numOmpParallels\": %"PRId64",\n"
189 " \"numOmpTasks\": %"PRId64",\n"
190 " \"elapsedTime\": %"PRId64",\n"
191 " \"usefulTime\": %"PRId64",\n"
192 " \"mpiTime\": %"PRId64",\n"
193 " \"ompLoadImbalanceTime\": %"PRId64",\n"
194 " \"ompSchedulingTime\": %"PRId64",\n"
195 " \"ompSerializationTime\": %"PRId64",\n"
196 " \"usefulNormdApp\": %.0f,\n"
197 " \"mpiNormdApp\": %.0f,\n"
198 " \"maxUsefulNormdProc\": %.0f,\n"
199 " \"maxUsefulNormdNode\": %.0f,\n"
200 " \"mpiNormdOfMaxUseful\": %.0f,\n"
201 " \"parallelEfficiency\": %.2f,\n"
202 " \"mpiParallelEfficiency\": %.2f,\n"
203 " \"mpiCommunicationEfficiency\": %.2f,\n"
204 " \"mpiLoadBalance\": %.2f,\n"
205 " \"mpiLoadBalanceIn\": %.2f,\n"
206 " \"mpiLoadBalanceOut\": %.2f,\n"
207 " \"ompParallelEfficiency\": %.2f,\n"
208 " \"ompLoadBalance\": %.2f,\n"
209 " \"ompSchedulingEfficiency\": %.2f,\n"
210 " \"ompSerializationEfficiency\": %.2f\n"
211 " }%s\n",
212 1 record->name,
213 record->num_cpus,
214 record->num_mpi_ranks,
215 record->num_nodes,
216 1 record->avg_cpus,
217 record->cycles,
218 record->instructions,
219 record->num_measurements,
220 record->num_mpi_calls,
221 record->num_omp_parallels,
222 record->num_omp_tasks,
223 record->elapsed_time,
224 record->useful_time,
225 record->mpi_time,
226 record->omp_load_imbalance_time,
227 record->omp_scheduling_time,
228 record->omp_serialization_time,
229 record->useful_normd_app,
230 record->mpi_normd_app,
231 record->max_useful_normd_proc,
232 record->max_useful_normd_node,
233 record->mpi_normd_of_max_useful,
234 1 record->parallel_efficiency,
235 1 record->mpi_parallel_efficiency,
236 1 record->mpi_communication_efficiency,
237 1 record->mpi_load_balance,
238 1 record->mpi_load_balance_in,
239 1 record->mpi_load_balance_out,
240 1 record->omp_parallel_efficiency,
241 1 record->omp_load_balance,
242 1 record->omp_scheduling_efficiency,
243 1 record->omp_serialization_efficiency,
244
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 node->next != NULL ? "," : "");
245 }
246 1 fprintf(out_file,
247 " }"); /* no eol */
248 }
249 1 }
250
251 1 static void pop_metrics_to_xml(FILE *out_file) {
252
253 1 for (GSList *node = pop_metrics_records;
254
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
255 1 node = node->next) {
256
257 1 dlb_pop_metrics_t *record = node->data;
258
259 1 fprintf(out_file,
260 " <Application>\n"
261 " <name>%s</name>\n"
262 " <numCpus>%d</numCpus>\n"
263 " <numMpiRanks>%d</numMpiRanks>\n"
264 " <numNodes>%d</numNodes>\n"
265 " <avgCpus>%.1f</avgCpus>\n"
266 " <cycles>%.0f</cycles>\n"
267 " <instructions>%.0f</instructions>\n"
268 " <numMeasurements>%"PRId64"</numMeasurements>\n"
269 " <numMpiCalls>%"PRId64"</numMpiCalls>\n"
270 " <numOmpParallels>%"PRId64"</numOmpParallels>\n"
271 " <numOmpTasks>%"PRId64"</numOmpTasks>\n"
272 " <elapsedTime>%"PRId64"</elapsedTime>\n"
273 " <usefulTime>%"PRId64"</usefulTime>\n"
274 " <mpiTime>%"PRId64"</mpiTime>\n"
275 " <ompLoadImbalanceTime>%"PRId64"</ompLoadImbalanceTime>\n"
276 " <ompSchedulingTime>%"PRId64"</ompSchedulingTime>\n"
277 " <ompSerializationTime>%"PRId64"</ompSerializationTime>\n"
278 " <usefulNormdApp>%.0f</usefulNormdApp>\n"
279 " <mpiNormdApp>%.0f</mpiNormdApp>\n"
280 " <maxUsefulNormdProc>%.0f</maxUsefulNormdProc>\n"
281 " <maxUsefulNormdNode>%.0f</maxUsefulNormdNode>\n"
282 " <mpiNormdOfMaxUseful>%.0f</mpiNormdOfMaxUseful>\n"
283 " <parallelEfficiency>%.2f</parallelEfficiency>\n"
284 " <mpiParallelEfficiency>%.2f</mpiParallelEfficiency>\n"
285 " <mpiCommunicationEfficiency>%.2f</mpiCommunicationEfficiency>\n"
286 " <mpiLoadBalance>%.2f</mpiLoadBalance>\n"
287 " <mpiLoadBalanceIn>%.2f</mpiLoadBalanceIn>\n"
288 " <mpiLoadBalanceOut>%.2f</mpiLoadBalanceOut>\n"
289 " <ompParallelEfficiency>%.2f</ompParallelEfficiency>\n"
290 " <ompLoadBalance>%.2f</ompLoadBalance>\n"
291 " <ompSchedulingEfficiency>%.2f</ompSchedulingEfficiency>\n"
292 " <ompSerializationEfficiency>%.2f</ompSerializationEfficiency>\n"
293 " </Application>\n",
294 1 record->name,
295 record->num_cpus,
296 record->num_mpi_ranks,
297 record->num_nodes,
298 1 record->avg_cpus,
299 record->cycles,
300 record->instructions,
301 record->num_measurements,
302 record->num_mpi_calls,
303 record->num_omp_parallels,
304 record->num_omp_tasks,
305 record->elapsed_time,
306 record->useful_time,
307 record->mpi_time,
308 record->omp_load_imbalance_time,
309 record->omp_scheduling_time,
310 record->omp_serialization_time,
311 record->useful_normd_app,
312 record->mpi_normd_app,
313 record->max_useful_normd_proc,
314 record->max_useful_normd_node,
315 record->mpi_normd_of_max_useful,
316 1 record->parallel_efficiency,
317 1 record->mpi_parallel_efficiency,
318 1 record->mpi_communication_efficiency,
319 1 record->mpi_load_balance,
320 1 record->mpi_load_balance_in,
321 1 record->mpi_load_balance_out,
322 1 record->omp_parallel_efficiency,
323 1 record->omp_load_balance,
324 1 record->omp_scheduling_efficiency,
325 1 record->omp_serialization_efficiency
326 );
327 }
328 1 }
329
330 1 static void pop_metrics_to_txt(FILE *out_file) {
331
332 1 for (GSList *node = pop_metrics_records;
333
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
334 1 node = node->next) {
335
336 1 dlb_pop_metrics_t *record = node->data;
337
338
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (record->elapsed_time > 0) {
339 1 fprintf(out_file,
340 "############### Monitoring Region POP Metrics ###############\n"
341 "### Name: %s\n"
342 "### Number of CPUs: %d\n"
343 "### Number of MPI processes: %d\n"
344 "### Number of nodes: %d\n"
345 "### Average CPUs: %.1f\n"
346 "### Cycles: %.0f\n"
347 "### Instructions: %.0f\n"
348 "### Number of measurements: %"PRId64"\n"
349 "### Number of MPI calls: %"PRId64"\n"
350 "### Number of OpenMP parallel regions: %"PRId64"\n"
351 "### Number of OpenMP explicit tasks: %"PRId64"\n"
352 "### Elapsed Time (ns): %"PRId64"\n"
353 "### Useful Time (ns): %"PRId64"\n"
354 "### MPI Time (ns): %"PRId64"\n"
355 "### OpenMP Load Imbalance Time (ns): %"PRId64"\n"
356 "### OpenMP Scheduling Time (ns): %"PRId64"\n"
357 "### OpenMP Serialization Time (ns): %"PRId64"\n"
358 "### Useful Time normalized to App: %.0f\n"
359 "### MPI Time normalized to App: %.0f\n"
360 "### Maximum useful time across processes: %.0f\n"
361 "### Maximum useful time across nodes: %.0f\n"
362 "### MPI time normalized at process level of\n"
363 "### the process with the max useful time: %.0f\n"
364 "### Parallel efficiency: %.2f\n"
365 "### MPI Parallel efficiency: %.2f\n"
366 "### - MPI Communication efficiency: %.2f\n"
367 "### - MPI Load Balance: %.2f\n"
368 "### - MPI Load Balance in: %.2f\n"
369 "### - MPI Load Balance out: %.2f\n"
370 "### OpenMP Parallel efficiency: %.2f\n"
371 "### - OpenMP Load Balance: %.2f\n"
372 "### - OpenMP Scheduling efficiency: %.2f\n"
373 "### - OpenMP Serialization efficiency: %.2f\n",
374 1 record->name,
375 record->num_cpus,
376 record->num_mpi_ranks,
377 record->num_nodes,
378 1 record->avg_cpus,
379 record->cycles,
380 record->instructions,
381 record->num_measurements,
382 record->num_mpi_calls,
383 record->num_omp_parallels,
384 record->num_omp_tasks,
385 record->elapsed_time,
386 record->useful_time,
387 record->mpi_time,
388 record->omp_load_imbalance_time,
389 record->omp_scheduling_time,
390 record->omp_serialization_time,
391 record->useful_normd_app,
392 record->mpi_normd_app,
393 record->max_useful_normd_proc,
394 record->max_useful_normd_node,
395 record->mpi_normd_of_max_useful,
396 1 record->parallel_efficiency,
397 1 record->mpi_parallel_efficiency,
398 1 record->mpi_communication_efficiency,
399 1 record->mpi_load_balance,
400 1 record->mpi_load_balance_in,
401 1 record->mpi_load_balance_out,
402 1 record->omp_parallel_efficiency,
403 1 record->omp_load_balance,
404 1 record->omp_scheduling_efficiency,
405 1 record->omp_serialization_efficiency
406 );
407 } else {
408 fprintf(out_file,
409 "############### Monitoring Region POP Metrics ###############\n"
410 "### Name: %s\n"
411 "### No data ###\n",
412 record->name);
413 }
414 }
415 1 }
416
417 3 static void pop_metrics_to_csv(FILE *out_file, bool append) {
418
419
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 if (pop_metrics_records == NULL) return;
420
421
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (!append) {
422 /* Print header */
423 2 fprintf(out_file,
424 "name,"
425 "numCpus,"
426 "numMpiRanks,"
427 "numNodes,"
428 "avgCpus,"
429 "cycles,"
430 "instructions,"
431 "numMeasurements,"
432 "numMpiCalls,"
433 "numOmpParallels,"
434 "numOmpTasks,"
435 "elapsedTime,"
436 "usefulTime,"
437 "mpiTime,"
438 "ompLoadImbalanceTime,"
439 "ompSchedulingTime,"
440 "ompSerializationTime,"
441 "usefulNormdApp,"
442 "mpiNormdApp,"
443 "maxUsefulNormdProc,"
444 "maxUsefulNormdNode,"
445 "mpiNormdOfMaxUseful,"
446 "parallelEfficiency,"
447 "mpiParallelEfficiency,"
448 "mpiCommunicationEfficiency,"
449 "mpiLoadBalance,"
450 "mpiLoadBalanceIn,"
451 "mpiLoadBalanceOut,"
452 "ompParallelEfficiency,"
453 "ompLoadBalance,"
454 "ompSchedulingEfficiency,"
455 "ompSerializationEfficiency\n"
456 );
457 }
458
459 3 for (GSList *node = pop_metrics_records;
460
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 node != NULL;
461 3 node = node->next) {
462
463 3 dlb_pop_metrics_t *record = node->data;
464
465 3 fprintf(out_file,
466 "\"%s\"," /* name */
467 "%d," /* numCpus */
468 "%d," /* numMpiRanks */
469 "%d," /* numNodes */
470 "%.1f," /* avgCpus */
471 "%.0f," /* cycles */
472 "%.0f," /* instructions */
473 "%"PRId64"," /* numMeasurements */
474 "%"PRId64"," /* numMpiRanks */
475 "%"PRId64"," /* numOmpParallels */
476 "%"PRId64"," /* numOmpTasks */
477 "%"PRId64"," /* elapsedTime */
478 "%"PRId64"," /* usefulTime */
479 "%"PRId64"," /* mpiTime */
480 "%"PRId64"," /* ompLoadImbalanceTime */
481 "%"PRId64"," /* ompSchedulingTime */
482 "%"PRId64"," /* ompSerializationTime */
483 "%.0f," /* usefulNormdApp */
484 "%.0f," /* mpiNormdApp */
485 "%.0f," /* maxUsefulNormdProc */
486 "%.0f," /* maxUsefulNormdNode */
487 "%.0f," /* mpiNormdOfMaxUseful */
488 "%.2f," /* parallelEfficiency */
489 "%.2f," /* mpiParallelEfficiency */
490 "%.2f," /* mpiCommunicationEfficiency */
491 "%.2f," /* mpiLoadBalance */
492 "%.2f," /* mpiLoadBalanceIn */
493 "%.2f," /* mpiLoadBalanceOut */
494 "%.2f," /* ompParallelEfficiency */
495 "%.2f," /* ompLoadBalance */
496 "%.2f," /* ompSchedulingEfficiency */
497 "%.2f\n", /* ompSerializationEfficiency */
498 3 record->name,
499 record->num_cpus,
500 record->num_mpi_ranks,
501 record->num_nodes,
502 3 record->avg_cpus,
503 record->cycles,
504 record->instructions,
505 record->num_measurements,
506 record->num_mpi_calls,
507 record->num_omp_parallels,
508 record->num_omp_tasks,
509 record->elapsed_time,
510 record->useful_time,
511 record->mpi_time,
512 record->omp_load_imbalance_time,
513 record->omp_scheduling_time,
514 record->omp_serialization_time,
515 record->useful_normd_app,
516 record->mpi_normd_app,
517 record->max_useful_normd_proc,
518 record->max_useful_normd_node,
519 record->mpi_normd_of_max_useful,
520 3 record->parallel_efficiency,
521 3 record->mpi_parallel_efficiency,
522 3 record->mpi_communication_efficiency,
523 3 record->mpi_load_balance,
524 3 record->mpi_load_balance_in,
525 3 record->mpi_load_balance_out,
526 3 record->omp_parallel_efficiency,
527 3 record->omp_load_balance,
528 3 record->omp_scheduling_efficiency,
529 3 record->omp_serialization_efficiency
530 );
531 }
532 }
533
534 20 static void pop_metrics_finalize(void) {
535
536 /* Free every record data */
537 20 for (GSList *node = pop_metrics_records;
538
2/2
✓ Branch 0 taken 20 times.
✓ Branch 1 taken 20 times.
40 node != NULL;
539 20 node = node->next) {
540
541 20 dlb_pop_metrics_t *record = node->data;
542 20 free(record);
543 }
544
545 /* Free list */
546 20 g_slist_free(pop_metrics_records);
547 20 pop_metrics_records = NULL;
548 20 }
549
550
551 /*********************************************************************************/
552 /* Node */
553 /*********************************************************************************/
554
555 static GSList *node_records = NULL;
556
557 5 void talp_output_record_node(const node_record_t *node_record) {
558
559 5 int nelems = node_record->nelems;
560
561 /* Allocate new record */
562 5 size_t process_records_size = sizeof(process_in_node_record_t) * nelems;
563 5 size_t node_record_size = sizeof(node_record_t) + process_records_size;
564 5 node_record_t *new_record = malloc(node_record_size);
565
566 /* Memcpy the entire struct */
567 5 memcpy(new_record, node_record, node_record_size);
568
569 /* Insert to list */
570 5 node_records = g_slist_prepend(node_records, new_record);
571 5 }
572
573 14 static void node_print(void) {
574
575 14 for (GSList *node = node_records;
576
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 14 times.
15 node != NULL;
577 1 node = node->next) {
578
579 1 node_record_t *node_record = node->data;
580
581 1 info(" |----------------------------------------------------------|");
582 1 info(" | Extended Report Node %4d |",
583 node_record->node_id);
584 1 info(" |----------------------------------------------------------|");
585 1 info(" | Process | Useful Time | MPI Time |");
586 1 info(" |------------|----------------------|----------------------|");
587
588
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 for (int i = 0; i < node_record->nelems; ++i) {
589 2 info(" | %-10d | %18e s | %18e s |",
590 node_record->processes[i].pid,
591 nsecs_to_secs(node_record->processes[i].useful_time),
592 nsecs_to_secs(node_record->processes[i].mpi_time));
593 2 info(" |------------|----------------------|----------------------|");
594 }
595
596
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (node_record->nelems > 0) {
597 1 info(" |------------|----------------------|----------------------|");
598 1 info(" | %-10s | %18e s | %18e s |", "Node Avg",
599 nsecs_to_secs(node_record->avg_useful_time),
600 nsecs_to_secs(node_record->avg_mpi_time));
601 1 info(" |------------|----------------------|----------------------|");
602 1 info(" | %-10s | %18e s | %18e s |", "Node Max",
603 nsecs_to_secs(node_record->max_useful_time),
604 nsecs_to_secs(node_record->max_mpi_time));
605 1 info(" |------------|----------------------|----------------------|");
606 }
607 }
608 14 }
609
610 1 static void node_to_json(FILE *out_file) {
611
612
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (node_records == NULL) return;
613
614 /* If there are pop_metrics, append to the existing dictionary */
615
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (pop_metrics_records != NULL) {
616 1 fprintf(out_file,",\n");
617 }
618
619 1 fprintf(out_file,
620 " \"node\": [\n");
621
622 1 for (GSList *node = node_records;
623
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
624 1 node = node->next) {
625
626 1 node_record_t *node_record = node->data;
627
628 1 fprintf(out_file,
629 " {\n"
630 " \"id\": \"%d\",\n"
631 " \"process\": [\n",
632 node_record->node_id);
633
634
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 for (int i = 0; i < node_record->nelems; ++i) {
635 2 fprintf(out_file,
636 " {\n"
637 " \"id\": %d,\n"
638 " \"usefulTime\": %"PRId64",\n"
639 " \"mpiTime\": %"PRId64"\n"
640 " }%s\n",
641 node_record->processes[i].pid,
642 node_record->processes[i].useful_time,
643 node_record->processes[i].mpi_time,
644
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 i+1 < node_record->nelems ? "," : "");
645 }
646
647 1 fprintf(out_file,
648 " ],\n"
649 " \"nodeAvg\": {\n"
650 " \"usefulTime\": %"PRId64",\n"
651 " \"mpiTime\": %"PRId64"\n"
652 " },\n"
653 " \"nodeMax\": {\n"
654 " \"usefulTime\": %"PRId64",\n"
655 " \"mpiTime\": %"PRId64"\n"
656 " }\n"
657 " }%s\n",
658 node_record->avg_useful_time,
659 node_record->avg_mpi_time,
660 node_record->max_useful_time,
661 node_record->max_mpi_time,
662
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 node->next != NULL ? "," : "");
663 }
664 1 fprintf(out_file,
665 " ]"); /* no eol */
666 }
667
668 1 static void node_to_xml(FILE *out_file) {
669
670 1 for (GSList *node = node_records;
671
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
672 1 node = node->next) {
673
674 1 node_record_t *node_record = node->data;
675
676 1 fprintf(out_file,
677 " <node>\n"
678 " <id>%d</id>\n",
679 node_record->node_id);
680
681
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 for (int i = 0; i < node_record->nelems; ++i) {
682 2 fprintf(out_file,
683 " <process>\n"
684 " <id>%d</id>\n"
685 " <usefulTime>%"PRId64"</usefulTime>\n"
686 " <mpiTime>%"PRId64"</mpiTime>\n"
687 " </process>\n",
688 node_record->processes[i].pid,
689 node_record->processes[i].useful_time,
690 node_record->processes[i].mpi_time);
691 }
692
693 1 fprintf(out_file,
694 " <nodeAvg>\n"
695 " <usefulTime>%"PRId64"</usefulTime>\n"
696 " <mpiTime>%"PRId64"</mpiTime>\n"
697 " </nodeAvg>\n"
698 " <nodeMax>\n"
699 " <usefulTime>%"PRId64"</usefulTime>\n"
700 " <mpiTime>%"PRId64"</mpiTime>\n"
701 " </nodeMax>\n"
702 " </node>\n",
703 node_record->avg_useful_time,
704 node_record->avg_mpi_time,
705 node_record->max_useful_time,
706 node_record->max_mpi_time);
707 }
708 1 }
709
710 3 static void node_to_csv(FILE *out_file, bool append) {
711
712
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (node_records == NULL) return;
713
714
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (!append) {
715 /* Print header */
716 1 fprintf(out_file,
717 "NodeId,"
718 "ProcessId,"
719 "ProcessUsefulTime,"
720 "ProcessMPITime,"
721 "NodeAvgUsefulTime,"
722 "NodeAvgMPITime,"
723 "NodeMaxUsefulTime,"
724 "NodeMaxMPITime\n");
725 }
726
727 1 for (GSList *node = node_records;
728
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
729 1 node = node->next) {
730
731 1 node_record_t *node_record = node->data;
732
733
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 for (int i = 0; i < node_record->nelems; ++i) {
734 2 fprintf(out_file,
735 "%d," /* NodeId */
736 "%d," /* ProcessId */
737 "%"PRId64"," /* ProcessUsefulTime */
738 "%"PRId64"," /* ProcessMPITime */
739 "%"PRId64"," /* NodeAvgUsefulTime */
740 "%"PRId64"," /* NodeAvgMPITime*/
741 "%"PRId64"," /* NodeMaxUsefulTime */
742 "%"PRId64"\n", /* NodeMaxMPITime*/
743 node_record->node_id,
744 node_record->processes[i].pid,
745 node_record->processes[i].useful_time,
746 node_record->processes[i].mpi_time,
747 node_record->avg_useful_time,
748 node_record->avg_mpi_time,
749 node_record->max_useful_time,
750 node_record->max_mpi_time);
751
752 }
753 }
754 }
755
756 1 static void node_to_txt(FILE *out_file) {
757
758 1 for (GSList *node = node_records;
759
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
760 1 node = node->next) {
761
762 1 node_record_t *node_record = node->data;
763
764 1 fprintf(out_file,
765 " |----------------------------------------------------------|\n"
766 " | Extended Report Node %4d |\n"
767 " |----------------------------------------------------------|\n"
768 " | Process | Useful Time | MPI Time |\n"
769 " |------------|----------------------|----------------------|\n",
770 node_record->node_id);
771
772
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 for (int i = 0; i < node_record->nelems; ++i) {
773 2 fprintf(out_file,
774 " | %-10d | %18e s | %18e s |\n"
775 " |------------|----------------------|----------------------|\n",
776 node_record->processes[i].pid,
777 nsecs_to_secs(node_record->processes[i].useful_time),
778 nsecs_to_secs(node_record->processes[i].mpi_time));
779 }
780
781
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (node_record->nelems > 0) {
782 1 fprintf(out_file,
783 " |------------|----------------------|----------------------|\n"
784 " | %-10s | %18e s | %18e s |\n"
785 " |------------|----------------------|----------------------|\n"
786 " | %-10s | %18e s | %18e s |\n"
787 " |------------|----------------------|----------------------|\n",
788 "Node Avg",
789 nsecs_to_secs(node_record->avg_useful_time),
790 nsecs_to_secs(node_record->avg_mpi_time),
791 "Node Max",
792 nsecs_to_secs(node_record->max_useful_time),
793 nsecs_to_secs(node_record->max_mpi_time));
794 }
795 }
796 1 }
797
798 20 static void node_finalize(void) {
799
800 /* Free every record data */
801 20 for (GSList *node = node_records;
802
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 20 times.
25 node != NULL;
803 5 node = node->next) {
804
805 5 node_record_t *record = node->data;
806 5 free(record);
807 }
808
809 /* Free list */
810 20 g_slist_free(node_records);
811 20 node_records = NULL;
812 20 }
813
814
815 /*********************************************************************************/
816 /* Process */
817 /*********************************************************************************/
818
819 typedef struct region_record_t {
820 char name[DLB_MONITOR_NAME_MAX];
821 int num_mpi_ranks;
822 process_record_t process_records[];
823 } region_record_t;
824
825 static GSList *region_records = NULL;
826
827 7 void talp_output_record_process(const char *region_name,
828 const process_record_t *process_record, int num_mpi_ranks) {
829
830 7 region_record_t *region_record = NULL;
831
832 /* Find region or allocate new one */
833 7 for (GSList *node = region_records;
834
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 7 times.
8 node != NULL;
835 1 node = node->next) {
836
837 1 region_record_t *record = node->data;
838
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (strcmp(record->name, region_name) == 0) {
839 region_record = record;
840 break;
841 }
842 }
843
844 /* Allocate if not found */
845
1/2
✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
7 if (region_record == NULL) {
846 /* Allocate and initialize new region */
847 7 size_t region_record_size = sizeof(region_record_t) +
848 7 sizeof(process_record_t) * num_mpi_ranks;
849 7 region_record = malloc(region_record_size);
850 7 *region_record = (const region_record_t) {
851 .num_mpi_ranks = num_mpi_ranks,
852 };
853 7 snprintf(region_record->name, DLB_MONITOR_NAME_MAX, "%s",
854 region_name);
855
856 /* Insert to list */
857 7 region_records = g_slist_prepend(region_records, region_record);
858 }
859
860 /* Copy process_record */
861 7 int rank = process_record->rank;
862
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.
7 ensure(rank < num_mpi_ranks, "Wrong rank number in %s", __func__);
863 7 memcpy(&region_record->process_records[rank], process_record, sizeof(process_record_t));
864 7 }
865
866 14 static void process_print(void) {
867
868 14 for (GSList *node = region_records;
869
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 14 times.
17 node != NULL;
870 3 node = node->next) {
871
872 3 region_record_t *region_record = node->data;
873
874
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 for (int i = 0; i < region_record->num_mpi_ranks; ++i) {
875
876 3 process_record_t *process_record = &region_record->process_records[i];
877
878 3 info("################# Monitoring Region Summary ##################");
879 3 info("### Name: %s",
880 3 region_record->name);
881 3 info("### Process: %d (%s)",
882 3 process_record->pid, process_record->hostname);
883 3 info("### Rank: %d",
884 process_record->rank);
885 3 info("### CpuSet: %s",
886 3 process_record->cpuset);
887 3 info("### Elapsed time: %"PRId64" ns",
888 process_record->monitor.elapsed_time);
889 3 info("### Useful time: %"PRId64" ns",
890 process_record->monitor.useful_time);
891
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
3 if (process_record->monitor.mpi_time > 0) {
892 1 info("### Not useful MPI: %"PRId64" ns",
893 process_record->monitor.mpi_time);
894 }
895
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (process_record->monitor.omp_load_imbalance_time > 0
896
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 || process_record->monitor.omp_scheduling_time > 0
897
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 || process_record->monitor.omp_serialization_time > 0) {
898 info("### Not useful OMP Load Imbalance: %"PRId64" ns",
899 process_record->monitor.omp_load_imbalance_time);
900 info("### Not useful OMP Scheduling: %"PRId64" ns",
901 process_record->monitor.omp_scheduling_time);
902 info("### Not useful OMP Serialization: %"PRId64" ns",
903 process_record->monitor.omp_serialization_time);
904 }
905
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 if (process_record->monitor.instructions > 0
906 && process_record->monitor.cycles > 0) {
907 info("### IPC : %.2f",
908 (float)process_record->monitor.instructions
909 / process_record->monitor.cycles);
910 }
911 }
912 }
913 14 }
914
915 1 static void process_to_json(FILE *out_file) {
916
917
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (region_records == NULL) return;
918
919 /* If there are pop_metrics or node_metrics, append to the existing dictionary */
920
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (pop_metrics_records != NULL
921 || node_records != NULL) {
922 1 fprintf(out_file,",\n");
923 }
924
925 1 fprintf(out_file,
926 " \"Process\": {\n");
927
928 1 for (GSList *node = region_records;
929
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
930 1 node = node->next) {
931
932 1 region_record_t *region_record = node->data;
933
934 1 fprintf(out_file,
935 " \"%s\": [\n",
936 1 region_record->name);
937
938
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 for (int i = 0; i < region_record->num_mpi_ranks; ++i) {
939
940 1 process_record_t *process_record = &region_record->process_records[i];
941
942 1 fprintf(out_file,
943 " {\n"
944 " \"rank\": %d,\n"
945 " \"pid\": %d,\n"
946 " \"nodeId\": %d,\n"
947 " \"hostname\": \"%s\",\n"
948 " \"cpuset\": %s,\n"
949 " \"numCpus\": %d,\n"
950 " \"avgCpus\": %.1f,\n"
951 " \"cycles\": %"PRId64",\n"
952 " \"instructions\": %"PRId64",\n"
953 " \"numMeasurements\": %d,\n"
954 " \"numResets\": %d,\n"
955 " \"numMpiCalls\": %"PRId64",\n"
956 " \"numOmpParallels\": %"PRId64",\n"
957 " \"numOmpTasks\": %"PRId64",\n"
958 " \"elapsedTime\": %"PRId64",\n"
959 " \"usefulTime\": %"PRId64",\n"
960 " \"mpiTime\": %"PRId64",\n"
961 " \"ompLoadImbalanceTime\": %"PRId64",\n"
962 " \"ompSchedulingTime\": %"PRId64",\n"
963 " \"ompSerializationTime\": %"PRId64"\n"
964 " }%s\n",
965 process_record->rank,
966 process_record->pid,
967 process_record->node_id,
968 1 process_record->hostname,
969 1 process_record->cpuset_quoted,
970 process_record->monitor.num_cpus,
971 1 process_record->monitor.avg_cpus,
972 process_record->monitor.cycles,
973 process_record->monitor.instructions,
974 process_record->monitor.num_measurements,
975 process_record->monitor.num_resets,
976 process_record->monitor.num_mpi_calls,
977 process_record->monitor.num_omp_parallels,
978 process_record->monitor.num_omp_tasks,
979 process_record->monitor.elapsed_time,
980 process_record->monitor.useful_time,
981 process_record->monitor.mpi_time,
982 process_record->monitor.omp_load_imbalance_time,
983 process_record->monitor.omp_scheduling_time,
984 process_record->monitor.omp_serialization_time,
985
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 i + 1 < region_record->num_mpi_ranks ? "," : "");
986 }
987 1 fprintf(out_file,
988 " ]%s\n",
989
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 node->next != NULL ? "," : "");
990 }
991 1 fprintf(out_file,
992 " }"); /* no eol */
993 }
994
995 1 static void process_to_xml(FILE *out_file) {
996
997 1 for (GSList *node = region_records;
998
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
999 1 node = node->next) {
1000
1001 1 region_record_t *region_record = node->data;
1002
1003 1 fprintf(out_file,
1004 " <Process>\n"
1005 " <name>%s</name>\n",
1006 1 region_record->name);
1007
1008
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 for (int i = 0; i < region_record->num_mpi_ranks; ++i) {
1009
1010 1 process_record_t *process_record = &region_record->process_records[i];
1011
1012 1 fprintf(out_file,
1013 " <process>\n"
1014 " <rank>%d</rank>\n"
1015 " <pid>%d</pid>\n"
1016 " <nodeId>%d</nodeId>\n"
1017 " <hostname>%s</hostname>\n"
1018 " <cpuset>%s</cpuset>\n"
1019 " <numCpus>%d</numCpus>\n"
1020 " <avgCpus>%.1f</avgCpus>\n"
1021 " <cycles>%"PRId64"</cycles>\n"
1022 " <instructions>%"PRId64"</instructions>\n"
1023 " <numMeasurements>%d</numMeasurements>\n"
1024 " <numResets>%d</numResets>\n"
1025 " <numMpiCalls>%"PRId64"</numMpiCalls>\n"
1026 " <numOmpParallels>%"PRId64"</numOmpParallels>\n"
1027 " <numOmpTasks>%"PRId64"</numOmpTasks>\n"
1028 " <elapsedTime>%"PRId64"</elapsedTime>\n"
1029 " <usefulTime>%"PRId64"</usefulTime>\n"
1030 " <mpiTime>%"PRId64"</mpiTime>\n"
1031 " <ompLoadImbalanceTime>%"PRId64"</ompLoadImbalanceTime>\n"
1032 " <ompSchedulingTime>%"PRId64"</ompSchedulingTime>\n"
1033 " <ompSerializationTime>%"PRId64"</ompSerializationTime>\n"
1034 " </process>\n",
1035 process_record->rank,
1036 process_record->pid,
1037 process_record->node_id,
1038 1 process_record->hostname,
1039 1 process_record->cpuset_quoted,
1040 process_record->monitor.num_cpus,
1041 1 process_record->monitor.avg_cpus,
1042 process_record->monitor.cycles,
1043 process_record->monitor.instructions,
1044 process_record->monitor.num_measurements,
1045 process_record->monitor.num_resets,
1046 process_record->monitor.num_mpi_calls,
1047 process_record->monitor.num_omp_parallels,
1048 process_record->monitor.num_omp_tasks,
1049 process_record->monitor.elapsed_time,
1050 process_record->monitor.useful_time,
1051 process_record->monitor.mpi_time,
1052 process_record->monitor.omp_load_imbalance_time,
1053 process_record->monitor.omp_scheduling_time,
1054 process_record->monitor.omp_serialization_time);
1055 }
1056 1 fprintf(out_file,
1057 " </Process>\n");
1058 }
1059 1 }
1060
1061 3 static void process_to_csv(FILE *out_file, bool append) {
1062
1063
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (region_records == NULL) return;
1064
1065
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (!append) {
1066 /* Print header */
1067 1 fprintf(out_file,
1068 "Region,"
1069 "Rank,"
1070 "PID,"
1071 "NodeId,"
1072 "Hostname,"
1073 "CpuSet,"
1074 "NumCpus,"
1075 "AvgCpus,"
1076 "Cycles,"
1077 "Instructions,"
1078 "NumMeasurements,"
1079 "NumResets,"
1080 "NumMpiCalls,"
1081 "NumOmpParallels,"
1082 "NumOmpTasks,"
1083 "ElapsedTime,"
1084 "UsefulTime,"
1085 "MPITime,"
1086 "OMPLoadImbalance,"
1087 "OMPSchedulingTime,"
1088 "OMPSerializationTime\n");
1089 }
1090
1091 1 for (GSList *node = region_records;
1092
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
1093 1 node = node->next) {
1094
1095 1 region_record_t *region_record = node->data;
1096
1097
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 for (int i = 0; i < region_record->num_mpi_ranks; ++i) {
1098
1099 1 process_record_t *process_record = &region_record->process_records[i];
1100
1101 1 fprintf(out_file,
1102 "%s," /* Region */
1103 "%d," /* Rank */
1104 "%d," /* PID */
1105 "%d," /* NodeId */
1106 "%s," /* Hostname */
1107 "%s," /* CpuSet */
1108 "%d" /* NumCpus */
1109 "%.1f," /* AvgCpus */
1110 "%"PRId64"," /* Cycles */
1111 "%"PRId64"," /* Instructions */
1112 "%d," /* NumMeasurements */
1113 "%d," /* NumResets */
1114 "%"PRId64"," /* NumMpiCalls */
1115 "%"PRId64"," /* NumOmpParallels */
1116 "%"PRId64"," /* NumOmpTasks */
1117 "%"PRId64"," /* ElapsedTime */
1118 "%"PRId64"," /* UsefulTime */
1119 "%"PRId64"," /* MPITime */
1120 "%"PRId64"," /* OMPLoadImbalance */
1121 "%"PRId64"," /* OMPSchedulingTime */
1122 "%"PRId64",", /* OMPSerializationTime */
1123 1 region_record->name,
1124 process_record->rank,
1125 process_record->pid,
1126 process_record->node_id,
1127 1 process_record->hostname,
1128 1 process_record->cpuset_quoted,
1129 process_record->monitor.num_cpus,
1130 1 process_record->monitor.avg_cpus,
1131 process_record->monitor.cycles,
1132 process_record->monitor.instructions,
1133 process_record->monitor.num_measurements,
1134 process_record->monitor.num_resets,
1135 process_record->monitor.num_mpi_calls,
1136 process_record->monitor.num_omp_parallels,
1137 process_record->monitor.num_omp_tasks,
1138 process_record->monitor.elapsed_time,
1139 process_record->monitor.useful_time,
1140 process_record->monitor.mpi_time,
1141 process_record->monitor.omp_load_imbalance_time,
1142 process_record->monitor.omp_scheduling_time,
1143 process_record->monitor.omp_serialization_time);
1144 }
1145 }
1146 }
1147
1148 1 static void process_to_txt(FILE *out_file) {
1149
1150 1 for (GSList *node = region_records;
1151
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
1152 1 node = node->next) {
1153
1154 1 region_record_t *region_record = node->data;
1155
1156
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 for (int i = 0; i < region_record->num_mpi_ranks; ++i) {
1157
1158 1 process_record_t *process_record = &region_record->process_records[i];
1159
1160 2 float ipc = process_record->monitor.cycles > 0
1161 ? (float)process_record->monitor.instructions
1162 / process_record->monitor.cycles
1163
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 : 0.0f;
1164
1165 1 fprintf(out_file,
1166 "################# Monitoring Region Summary ##################\n"
1167 "### Name: %s\n"
1168 "### Process: %d (%s)\n"
1169 "### Rank: %d\n"
1170 "### CpuSet: %s\n"
1171 "### Elapsed time: %"PRId64" ns\n"
1172 "### Useful time: %"PRId64" ns\n"
1173 "### Not useful MPI: %"PRId64" ns\n"
1174 "### Not useful OMP Load Imbalance: %"PRId64" ns\n"
1175 "### Not useful OMP Scheduling: %"PRId64" ns\n"
1176 "### Not useful OMP Serialization: %"PRId64" ns\n"
1177 "### IPC: %.2f\n",
1178 1 region_record->name,
1179 1 process_record->pid, process_record->hostname,
1180 process_record->rank,
1181 1 process_record->cpuset,
1182 process_record->monitor.elapsed_time,
1183 process_record->monitor.useful_time,
1184 process_record->monitor.mpi_time,
1185 process_record->monitor.omp_load_imbalance_time,
1186 process_record->monitor.omp_scheduling_time,
1187 process_record->monitor.omp_serialization_time,
1188 ipc);
1189 }
1190 }
1191 1 }
1192
1193 20 static void process_finalize(void) {
1194
1195 /* Free every record data */
1196 20 for (GSList *node = region_records;
1197
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 20 times.
27 node != NULL;
1198 7 node = node->next) {
1199
1200 7 region_record_t *record = node->data;
1201 7 free(record);
1202 }
1203
1204 /* Free list */
1205 20 g_slist_free(region_records);
1206 20 region_records = NULL;
1207 20 }
1208
1209
1210 /*********************************************************************************/
1211 /* TALP Common */
1212 /*********************************************************************************/
1213 typedef struct TALPCommonRecord {
1214 char *time_of_creation; // ISO 8601 string
1215 char *dlb_major_version; // Major.Minor DLB version used
1216 char *dlb_git_description; // GIT description output
1217 } talp_common_record_t;
1218 static talp_common_record_t common_record;
1219
1220 20 static void talp_output_record_common(void) {
1221 /* Initialize structure */
1222 20 time_t now = time(NULL);
1223 20 common_record = (const talp_common_record_t) {
1224 20 .time_of_creation = get_iso_8601_string(localtime(&now)),
1225 .dlb_major_version = PACKAGE_VERSION,
1226 .dlb_git_description = DLB_GIT_DESCRIPTION,
1227 };
1228 20 }
1229
1230 1 static void common_to_json(FILE *out_file) {
1231 1 fprintf(out_file,
1232 " \"dlbVersion\": \"%s\",\n"
1233 " \"dlbGitVersion\": \"%s\",\n"
1234 " \"timestamp\": \"%s\",\n",
1235 common_record.dlb_major_version,
1236 common_record.dlb_git_description,
1237 common_record.time_of_creation);
1238 1 }
1239
1240 1 static void common_to_xml(FILE *out_file) {
1241
1242 1 fprintf(out_file,
1243 " <dlbVersion>%s</dlbVersion>\n"
1244 " <dlbGitVersion>%s</dlbGitVersion>\n"
1245 " <timestamp>%s</timestamp>\n",
1246 common_record.dlb_major_version,
1247 common_record.dlb_git_description,
1248 common_record.time_of_creation);
1249 1 }
1250
1251 1 static void common_to_txt(FILE *out_file) {
1252
1253 1 fprintf(out_file,
1254 "################ TALP Common Data ################\n"
1255 "### DLB Version: %s\n"
1256 "### DLB Git Version: %s\n"
1257 "### Timestamp: %s\n",
1258 common_record.dlb_major_version,
1259 common_record.dlb_git_description,
1260 common_record.time_of_creation);
1261 1 }
1262
1263 20 static void common_finalize(void) {
1264 20 free(common_record.time_of_creation);
1265 20 }
1266
1267
1268
1269
1270 /*********************************************************************************/
1271 /* TALP Resources */
1272 /*********************************************************************************/
1273 typedef struct TALPResourcesRecord {
1274 unsigned int num_cpus;
1275 unsigned int num_nodes;
1276 unsigned int num_mpi_ranks;
1277 } talp_resources_record_t;
1278 static talp_resources_record_t resources_record;
1279
1280 3 void talp_output_record_resources(int num_cpus, int num_nodes, int num_mpi_ranks) {
1281
1282 3 resources_record = (const talp_resources_record_t) {
1283 3 .num_cpus = (unsigned int) num_cpus,
1284 3 .num_nodes = (unsigned int) num_nodes,
1285 3 .num_mpi_ranks = (unsigned int) num_mpi_ranks
1286 };
1287 3 }
1288
1289 1 static void resources_to_json(FILE *out_file) {
1290 1 fprintf(out_file,
1291 " \"resources\": {\n"
1292 " \"numCpus\": %u,\n"
1293 " \"numNodes\": %u,\n"
1294 " \"numMpiRanks\": %u\n"
1295 " },\n",
1296 resources_record.num_cpus,
1297 resources_record.num_nodes,
1298 resources_record.num_mpi_ranks);
1299 1 }
1300
1301 1 static void resources_to_xml(FILE *out_file) {
1302
1303 1 fprintf(out_file,
1304 " <resources>\n"
1305 " <numCpus>%u</numCpus>\n"
1306 " <numNodes>%u</numNodes>\n"
1307 " <numMpiRanks>%u</numMpiRanks>\n"
1308 " </resources>",
1309 resources_record.num_cpus,
1310 resources_record.num_nodes,
1311 resources_record.num_mpi_ranks);
1312 1 }
1313
1314 1 static void resources_to_txt(FILE *out_file) {
1315
1316 1 fprintf(out_file,
1317 "################# TALP Resources #################\n"
1318 "### Number of CPUs: %u\n"
1319 "### Number of Nodes: %u\n"
1320 "### Number of MPI processes: %u\n",
1321 resources_record.num_cpus,
1322 resources_record.num_nodes,
1323 resources_record.num_mpi_ranks);
1324 1 }
1325
1326
1327 /*********************************************************************************/
1328 /* Helper functions */
1329 /*********************************************************************************/
1330
1331 1 static void json_header(FILE *out_file) {
1332 1 fprintf(out_file, "{\n");
1333 1 }
1334
1335 1 static void json_footer(FILE *out_file) {
1336 1 fprintf(out_file, "\n}\n");
1337 1 }
1338
1339 1 static void xml_header(FILE *out_file) {
1340 1 fprintf(out_file, "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"
1341 "<root>\n");
1342 1 }
1343
1344 1 static void xml_footer(FILE *out_file) {
1345 1 fprintf(out_file, "</root>\n");
1346 1 }
1347
1348
1349 /*********************************************************************************/
1350 /* Finalize */
1351 /*********************************************************************************/
1352
1353 200 static bool check_coefficient(float coeffiecient) {
1354
2/4
✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 200 times.
✗ Branch 3 not taken.
200 return 0.0f <= coeffiecient && coeffiecient <= 1.0;
1355 }
1356
1357 static void warn_negative_counters(void) {
1358 static bool warned_once = false;
1359 if (!warned_once) {
1360 warning("Some obtained PAPI counters contain negative values. Check your"
1361 " installation or report the error to %s", PACKAGE_BUGREPORT);
1362 warned_once = true;
1363 }
1364 }
1365
1366 static void warn_wrong_coefficient(void) {
1367 static bool warned_once = false;
1368 if (!warned_once) {
1369 warning("Some computed POP metric coefficient is not within the allowed"
1370 " range [0.0, 1.0]. If you think this is an unexpected value,"
1371 " please report the error to %s", PACKAGE_BUGREPORT);
1372 warned_once = true;
1373 }
1374 }
1375
1376 20 static void sanitize_records(void) {
1377
1378 /* pop_metrics_records:
1379 * - instructions and cycles need to be >= 0
1380 * - computed efficiencyes need to be [0.0, 1.0]
1381 */
1382 20 for (GSList *node = pop_metrics_records;
1383
2/2
✓ Branch 0 taken 20 times.
✓ Branch 1 taken 20 times.
40 node != NULL;
1384 20 node = node->next) {
1385
1386 20 dlb_pop_metrics_t *record = node->data;
1387
1388
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
20 if (record->cycles < 0) {
1389 record->cycles = 0.0;
1390 warn_negative_counters();
1391 }
1392
1393
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
20 if (record->instructions < 0) {
1394 record->instructions = 0.0;
1395 warn_negative_counters();
1396 }
1397
1398
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 if (!check_coefficient(record->parallel_efficiency)
1399
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 || !check_coefficient(record->mpi_parallel_efficiency)
1400
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 || !check_coefficient(record->mpi_communication_efficiency)
1401
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 || !check_coefficient(record->mpi_load_balance)
1402
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 || !check_coefficient(record->mpi_load_balance_in)
1403
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 || !check_coefficient(record->mpi_load_balance_out)
1404
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 || !check_coefficient(record->omp_parallel_efficiency)
1405
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 || !check_coefficient(record->omp_load_balance)
1406
1/2
✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
20 || !check_coefficient(record->omp_scheduling_efficiency)
1407
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 20 times.
20 || !check_coefficient(record->omp_serialization_efficiency)) {
1408 warn_wrong_coefficient();
1409 }
1410 }
1411
1412 /* node_records: nothing to sanitize for now */
1413
1414 /* region_records: */
1415 20 for (GSList *node = region_records;
1416
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 20 times.
27 node != NULL;
1417 7 node = node->next) {
1418
1419 7 region_record_t *region_record = node->data;
1420
1421
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
14 for (int i = 0; i < region_record->num_mpi_ranks; ++i) {
1422
1423 7 dlb_monitor_t *monitor = &region_record->process_records[i].monitor;
1424
1425
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.
7 if (monitor->cycles < 0) {
1426 monitor->cycles = 0.0;
1427 warn_negative_counters();
1428 }
1429
1430
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.
7 if (monitor->instructions < 0) {
1431 monitor->instructions = 0.0;
1432 warn_negative_counters();
1433 }
1434 }
1435 }
1436 20 }
1437
1438 20 void talp_output_finalize(const char *output_file) {
1439
1440 /* For efficiency, all records are prepended to their respective lists and
1441 * reversed here */
1442 20 pop_metrics_records = g_slist_reverse(pop_metrics_records);
1443 20 node_records = g_slist_reverse(node_records);
1444 20 region_records = g_slist_reverse(region_records);
1445
1446 /* Sanitize erroneous values */
1447 20 sanitize_records();
1448
1449 20 talp_output_record_common();
1450
1451 /* If the process has changed the locale, temporarily push the C locale to
1452 * print floats with the expected notation (a comma as a decimal separator
1453 * will break CSV and JSON files). The object associated with the locale
1454 * can be safely freed after it has been set. */
1455 20 locale_t new_locale = newlocale(LC_ALL, "C", 0);
1456 20 uselocale(new_locale);
1457 20 freelocale(new_locale);
1458
1459
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 6 times.
20 if (output_file == NULL) {
1460 /* No output file, just print all records */
1461 14 pop_metrics_print();
1462 14 node_print();
1463 14 process_print();
1464 } else {
1465 /* Do not open file if process has no data */
1466
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 if (pop_metrics_records == NULL
1467 && node_records == NULL
1468 && region_records == NULL) return;
1469
1470 /* Check file extension */
1471 typedef enum Extension {
1472 EXT_JSON,
1473 EXT_XML,
1474 EXT_CSV,
1475 EXT_TXT,
1476 } extension_t;
1477 6 extension_t extension = EXT_TXT;
1478 6 const char *ext = strrchr(output_file, '.');
1479
1/2
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
6 if (ext != NULL) {
1480
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 5 times.
6 if (strcmp(ext+1, "json") == 0) {
1481 1 extension = EXT_JSON;
1482
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
5 } else if (strcmp(ext+1, "xml") == 0) {
1483 1 extension = EXT_XML;
1484
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 } else if (strcmp(ext+1, "csv") == 0) {
1485 3 extension = EXT_CSV;
1486 }
1487 }
1488
1489 /* Deprecation warning*/
1490
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 5 times.
6 if(extension == EXT_XML){
1491 1 warning("Deprecated: The support for XML output is deprecated and"
1492 " will be removed in the next release");
1493 }
1494
1495 /* Specific case where output file needs to be split */
1496
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 if (extension == EXT_CSV
1497 3 && !!(pop_metrics_records != NULL)
1498 3 + !!(node_records != NULL)
1499
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
4 + !!(region_records != NULL) > 1) {
1500
1501 /* Length without extension */
1502 1 int filename_useful_len = ext - output_file;
1503
1504 /* POP */
1505
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (pop_metrics_records != NULL) {
1506 1 const char *pop_ext = "-pop.csv";
1507 1 size_t pop_file_len = filename_useful_len + strlen(pop_ext) + 1;
1508 1 char *pop_filename = malloc(sizeof(char)*pop_file_len);
1509 1 sprintf(pop_filename, "%.*s%s", filename_useful_len, output_file, pop_ext);
1510 FILE *pop_file;
1511 bool append_to_csv;
1512
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
1 if (access(pop_filename, F_OK) == 0) {
1513 pop_file = fopen(pop_filename, "a");
1514 append_to_csv = true;
1515 } else {
1516 1 pop_file = fopen(pop_filename, "w");
1517 1 append_to_csv = false;
1518 }
1519
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (pop_file == NULL) {
1520 warning("Cannot open file %s: %s", pop_filename, strerror(errno));
1521 } else {
1522 1 pop_metrics_to_csv(pop_file, append_to_csv);
1523 1 fclose(pop_file);
1524 }
1525 }
1526
1527 /* Node */
1528
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (node_records != NULL) {
1529 1 const char *node_ext = "-node.csv";
1530 1 size_t node_file_len = filename_useful_len + strlen(node_ext) + 1;
1531 1 char *node_filename = malloc(sizeof(char)*node_file_len);
1532 1 sprintf(node_filename, "%.*s%s", filename_useful_len, output_file, node_ext);
1533 FILE *node_file;
1534 bool append_to_csv;
1535
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
1 if (access(node_filename, F_OK) == 0) {
1536 node_file = fopen(node_filename, "a");
1537 append_to_csv = true;
1538 } else {
1539 1 node_file = fopen(node_filename, "w");
1540 1 append_to_csv = false;
1541 }
1542
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (node_file == NULL) {
1543 warning("Cannot open file %s: %s", node_filename, strerror(errno));
1544 } else {
1545 1 node_to_csv(node_file, append_to_csv);
1546 1 fclose(node_file);
1547 }
1548 }
1549
1550 /* Process */
1551
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (region_records != NULL) {
1552 1 const char *process_ext = "-process.csv";
1553 1 size_t process_file_len = filename_useful_len + strlen(process_ext) + 1;
1554 1 char *process_filename = malloc(sizeof(char)*process_file_len);
1555 1 sprintf(process_filename, "%.*s%s", filename_useful_len, output_file, process_ext);
1556 FILE *process_file;
1557 bool append_to_csv;
1558
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
1 if (access(process_filename, F_OK) == 0) {
1559 process_file = fopen(process_filename, "a");
1560 append_to_csv = true;
1561 } else {
1562 1 process_file = fopen(process_filename, "w");
1563 1 append_to_csv = false;
1564 }
1565
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (process_file == NULL) {
1566 warning("Cannot open file %s: %s", process_filename, strerror(errno));
1567 } else {
1568 1 process_to_csv(process_file, append_to_csv);
1569 1 fclose(process_file);
1570 }
1571 }
1572 }
1573
1574 /* Write to file */
1575 else {
1576 /* Open file */
1577 FILE *out_file;
1578 bool append_to_csv;
1579
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 3 times.
5 if (extension == EXT_CSV
1580
2/2
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 1 times.
2 && access(output_file, F_OK) == 0) {
1581 /* Specific case where new entries are appended to existing csv */
1582 1 out_file = fopen(output_file, "a");
1583 1 append_to_csv = true;
1584 } else {
1585 4 out_file = fopen(output_file, "w");
1586 4 append_to_csv = false;
1587 }
1588
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
5 if (out_file == NULL) {
1589 warning("Cannot open file %s: %s", output_file, strerror(errno));
1590 } else {
1591 /* Write records to file */
1592
4/5
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 1 times.
✗ Branch 4 not taken.
5 switch(extension) {
1593 1 case EXT_JSON:
1594 1 json_header(out_file);
1595 1 common_to_json(out_file);
1596 1 resources_to_json(out_file);
1597 1 pop_metrics_to_json(out_file);
1598 1 node_to_json(out_file);
1599 1 process_to_json(out_file);
1600 1 json_footer(out_file);
1601 1 break;
1602 1 case EXT_XML:
1603 1 xml_header(out_file);
1604 1 common_to_xml(out_file);
1605 1 resources_to_xml(out_file);
1606 1 pop_metrics_to_xml(out_file);
1607 1 node_to_xml(out_file);
1608 1 process_to_xml(out_file);
1609 1 xml_footer(out_file);
1610 1 break;
1611 2 case EXT_CSV:
1612 2 pop_metrics_to_csv(out_file, append_to_csv);
1613 2 node_to_csv(out_file, append_to_csv);
1614 2 process_to_csv(out_file, append_to_csv);
1615 2 break;
1616 1 case EXT_TXT:
1617 1 common_to_txt(out_file);
1618 1 resources_to_txt(out_file);
1619 1 pop_metrics_to_txt(out_file);
1620 1 node_to_txt(out_file);
1621 1 process_to_txt(out_file);
1622 1 break;
1623 }
1624 /* Close file */
1625 5 fclose(out_file);
1626 }
1627 }
1628 }
1629
1630 // Restore locale
1631 20 uselocale(LC_GLOBAL_LOCALE);
1632
1633 // De-allocate all records
1634 20 common_finalize();
1635 20 pop_metrics_finalize();
1636 20 node_finalize();
1637 20 process_finalize();
1638 }
1639