GCC Code Coverage Report


Directory: src/
File: src/talp/talp_output.c
Date: 2025-11-21 10:34:40
Exec Total Coverage
Lines: 647 697 92.8%
Functions: 42 44 95.5%
Branches: 217 293 74.1%

Line Branch Exec Source
1 /*********************************************************************************/
2 /* Copyright 2009-2024 Barcelona Supercomputing Center */
3 /* */
4 /* This file is part of the DLB library. */
5 /* */
6 /* DLB is free software: you can redistribute it and/or modify */
7 /* it under the terms of the GNU Lesser General Public License as published by */
8 /* the Free Software Foundation, either version 3 of the License, or */
9 /* (at your option) any later version. */
10 /* */
11 /* DLB is distributed in the hope that it will be useful, */
12 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
13 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
14 /* GNU Lesser General Public License for more details. */
15 /* */
16 /* You should have received a copy of the GNU Lesser General Public License */
17 /* along with DLB. If not, see <https://www.gnu.org/licenses/>. */
18 /*********************************************************************************/
19
20 #ifdef HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23
24 #include "talp/talp_output.h"
25
26 #include "LB_core/spd.h"
27 #include "apis/dlb_talp.h"
28 #include "support/debug.h"
29 #include "support/gslist.h"
30 #include "support/mytime.h"
31 #include "support/options.h"
32 #include "talp/talp.h"
33 #include "talp/perf_metrics.h"
34
35 #include <errno.h>
36 #include <libgen.h>
37 #include <limits.h>
38 #include <locale.h>
39 #include <pthread.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <sys/stat.h>
44 #include <unistd.h>
45
46
47 16 static float sanitized_ipc(float instructions, float cycles) {
48
3/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 14 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
16 if (instructions > 0 && cycles > 0) {
49 2 return instructions / cycles;
50 } else {
51 14 return 0.0f;
52 }
53 }
54
55 39 static const char* make_header(const char *title) {
56 39 int width = 62;
57 static char buf[80];
58 39 int title_len = strlen(title);
59
60
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 39 times.
39 if (width >= (int)sizeof(buf)) {
61 width = sizeof(buf) - 1; // prevent overflow
62 }
63
64
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 39 times.
39 if (title_len + 2 > width) {
65 // Title too long: just return title
66 snprintf(buf, sizeof(buf), "%s", title);
67 return buf;
68 }
69
70 39 int hashes = width - title_len - 2;
71 39 int left = hashes / 2;
72 39 int right = hashes - left;
73
74 39 memset(buf, '#', left);
75 39 buf[left] = ' ';
76 39 memcpy(buf + left + 1, title, title_len);
77 39 buf[left + 1 + title_len] = ' ';
78 39 memset(buf + left + 1 + title_len + 1, '#', right);
79 39 buf[width] = '\0';
80
81 39 return buf;
82 }
83
84
85 /*********************************************************************************/
86 /* Monitoring Region */
87 /*********************************************************************************/
88
89 8 void talp_output_print_monitoring_region(const dlb_monitor_t *monitor,
90 const char *cpuset_str, bool have_mpi, bool have_openmp, bool have_gpu,
91 bool have_papi) {
92
93 char elapsed_time_str[16];
94 8 ns_to_human(elapsed_time_str, 16, monitor->elapsed_time);
95
96 8 info("%s", make_header("Monitoring Region Summary"));
97 8 info("### Name: %s", monitor->name);
98 8 info("### Elapsed Time: %s", elapsed_time_str);
99 8 info("### Useful time: %"PRId64" ns",
100 8 monitor->useful_time);
101
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (have_mpi) {
102 4 info("### Not useful MPI: %"PRId64" ns",
103 4 monitor->mpi_time);
104 }
105
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 7 times.
8 if (have_openmp) {
106 1 info("### Not useful OMP Load Balance: %"PRId64" ns",
107 1 monitor->omp_load_imbalance_time);
108 1 info("### Not useful OMP Scheduling: %"PRId64" ns",
109 1 monitor->omp_scheduling_time);
110 1 info("### Not useful OMP Serialization: %"PRId64" ns",
111 1 monitor->omp_serialization_time);
112 }
113
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 7 times.
8 if (have_gpu) {
114 1 info("### Not useful GPU runtime: %"PRId64" ns",
115 1 monitor->gpu_runtime_time);
116 1 info("### Device useful time: %"PRId64" ns",
117 1 monitor->gpu_useful_time);
118 1 info("### Device communication time: %"PRId64" ns",
119 1 monitor->gpu_communication_time);
120 }
121 8 info("### CpuSet: %s", cpuset_str);
122
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 7 times.
8 if (have_papi) {
123 1 float ipc = sanitized_ipc(monitor->instructions, monitor->cycles);
124 1 info("### IPC: %.2f ", ipc);
125 }
126
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (have_mpi) {
127 4 info("### Number of MPI calls: %"PRId64,
128 4 monitor->num_mpi_calls);
129 }
130
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 7 times.
8 if (have_openmp) {
131 1 info("### Number of OpenMP parallels: %"PRId64,
132 1 monitor->num_omp_parallels);
133 1 info("### Number of OpenMP tasks: %"PRId64,
134 1 monitor->num_omp_tasks);
135 }
136
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 7 times.
8 if (have_gpu) {
137 1 info("### Number of GPU runtime calls: %"PRId64,
138 1 monitor->num_gpu_runtime_calls);
139 }
140 8 }
141
142
143 /*********************************************************************************/
144 /* POP Metrics */
145 /*********************************************************************************/
146
147 static GSList *pop_metrics_records = NULL;
148
149 26 void talp_output_record_pop_metrics(const dlb_pop_metrics_t *metrics) {
150
151 /* Copy structure */
152 26 dlb_pop_metrics_t *new_record = malloc(sizeof(dlb_pop_metrics_t));
153 26 *new_record = *metrics;
154
155 /* Add record to list */
156 26 pop_metrics_records = g_slist_prepend(pop_metrics_records, new_record);
157 26 }
158
159 18 static void pop_metrics_print(void) {
160
161 /* AMD or NVIDIA device? */
162 enum GPU_vendor {
163 NONE,
164 AMD,
165 NVIDIA,
166 };
167 18 enum GPU_vendor vendor = NONE;
168
3/4
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 16 times.
18 if (thread_spd && strstr(thread_spd->options.plugins, "cupti") != NULL) {
169 vendor = NVIDIA;
170 }
171
3/4
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 16 times.
18 if (thread_spd && strstr(thread_spd->options.plugins, "rocprofilerv2") != NULL) {
172 vendor = AMD;
173 }
174
175 18 for (GSList *node = pop_metrics_records;
176
2/2
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 18 times.
34 node != NULL;
177 16 node = node->next) {
178
179 16 dlb_pop_metrics_t *record = node->data;
180
181
2/2
✓ Branch 0 taken 15 times.
✓ Branch 1 taken 1 times.
16 if (record->elapsed_time > 0) {
182
183 15 float avg_ipc = sanitized_ipc(record->instructions, record->cycles);
184 char elapsed_time_str[16];
185 15 ns_to_human(elapsed_time_str, 16, record->elapsed_time);
186
187
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 1 times.
29 bool have_gpu_activity = record->num_gpu_runtime_calls > 0 ||
188
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14 times.
14 record->gpu_useful_time > 0;
189
190 15 info("%s", make_header("Monitoring Region POP Metrics"));
191 15 info("### Name: %s", record->name);
192 15 info("### Elapsed Time: %s", elapsed_time_str);
193
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 14 times.
15 if (have_gpu_activity) {
194 1 info("### Host");
195 1 info("### ----");
196 }
197
1/2
✓ Branch 0 taken 15 times.
✗ Branch 1 not taken.
15 if (record->mpi_parallel_efficiency > 0.0f &&
198
1/2
✓ Branch 0 taken 15 times.
✗ Branch 1 not taken.
15 record->omp_parallel_efficiency > 0.0f) {
199 15 info("### Parallel efficiency: %1.2f",
200 15 record->parallel_efficiency);
201 }
202
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 14 times.
15 if (record->num_mpi_calls > 0) {
203 1 info("### - MPI Parallel efficiency: %1.2f",
204 1 record->mpi_parallel_efficiency);
205 1 info("### - Communication efficiency: %1.2f",
206 1 record->mpi_communication_efficiency);
207 1 info("### - Load Balance: %1.2f",
208 1 record->mpi_load_balance);
209 1 info("### - In: %1.2f",
210 1 record->mpi_load_balance_in);
211 1 info("### - Out: %1.2f",
212 1 record->mpi_load_balance_out);
213 }
214
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 13 times.
15 if (record->num_omp_parallels + record->num_omp_tasks > 0) {
215 2 info("### - OpenMP Parallel efficiency: %1.2f",
216 2 record->omp_parallel_efficiency);
217 2 info("### - Load Balance: %1.2f",
218 2 record->omp_load_balance);
219 2 info("### - Scheduling efficiency: %1.2f",
220 2 record->omp_scheduling_efficiency);
221 2 info("### - Serialization efficiency: %1.2f",
222 2 record->omp_serialization_efficiency);
223 }
224
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 14 times.
15 if (have_gpu_activity) {
225 1 info("### - Device Offload efficiency: %1.2f",
226 1 record->device_offload_efficiency);
227 }
228
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 14 times.
15 if (avg_ipc > 0.0f) {
229 1 float avg_freq = record->cycles / record->useful_time;
230 1 info("### Computational metrics:");
231 1 info("### - Average useful IPC: %1.2f", avg_ipc);
232 1 info("### - Average useful frequency: %1.2f GHz", avg_freq);
233 1 info("### - Number of instructions: %1.2E", record->instructions);
234 }
235
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 14 times.
15 if (have_gpu_activity) {
236 1 info("###");
237
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
2 info("### %s Device",
238 vendor == NVIDIA ? "NVIDIA"
239 : vendor == AMD ? "AMD"
240
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 : "GPU");
241
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 info("### %s-------",
242 vendor == NVIDIA ? "------"
243 : vendor == AMD ? "---"
244 : "---");
245 1 info("### Parallel efficiency: %1.2f",
246 1 record->gpu_parallel_efficiency);
247 1 info("### - Load Balance: %1.2f",
248 1 record->gpu_load_balance);
249 1 info("### - Communication efficiency: %1.2f",
250 1 record->gpu_communication_efficiency);
251 1 info("### - Orchestration efficiency: %1.2f",
252 1 record->gpu_orchestration_efficiency);
253 }
254 } else {
255 1 info("%s", make_header("Monitoring Region POP Metrics"));
256 1 info("### Name: %s", record->name);
257 1 info("### No data ###");
258 }
259 }
260 18 }
261
262 3 static void pop_metrics_to_json(FILE *out_file) {
263
264
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (pop_metrics_records != NULL) {
265 3 fprintf(out_file,
266 " \"Application\": {\n");
267
268 3 for (GSList *node = pop_metrics_records;
269
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 node != NULL;
270 3 node = node->next) {
271
272 3 dlb_pop_metrics_t *record = node->data;
273
274 3 fprintf(out_file,
275 " \"%s\": {\n"
276 " \"numCpus\": %d,\n"
277 " \"numMpiRanks\": %d,\n"
278 " \"numNodes\": %d,\n"
279 " \"avgCpus\": %.1f,\n"
280 " \"numGpus\": %d,\n"
281 " \"cycles\": %.0f,\n"
282 " \"instructions\": %.0f,\n"
283 " \"numMeasurements\": %"PRId64",\n"
284 " \"numMpiCalls\": %"PRId64",\n"
285 " \"numOmpParallels\": %"PRId64",\n"
286 " \"numOmpTasks\": %"PRId64",\n"
287 " \"numGpuRuntimeCalls\": %"PRId64",\n"
288 " \"elapsedTime\": %"PRId64",\n"
289 " \"usefulTime\": %"PRId64",\n"
290 " \"mpiTime\": %"PRId64",\n"
291 " \"ompLoadImbalanceTime\": %"PRId64",\n"
292 " \"ompSchedulingTime\": %"PRId64",\n"
293 " \"ompSerializationTime\": %"PRId64",\n"
294 " \"gpuRuntimeTime\": %"PRId64",\n"
295 " \"minMpiNormdProc\": %.0f,\n"
296 " \"minMpiNormdNode\": %.0f,\n"
297 " \"gpuUsefulTime\": %"PRId64",\n"
298 " \"gpuCommunicationTime\": %"PRId64",\n"
299 " \"maxGpuUsefulTime\": %"PRId64",\n"
300 " \"maxGpuActiveTime\": %"PRId64",\n"
301 " \"parallelEfficiency\": %.2f,\n"
302 " \"mpiParallelEfficiency\": %.2f,\n"
303 " \"mpiCommunicationEfficiency\": %.2f,\n"
304 " \"mpiLoadBalance\": %.2f,\n"
305 " \"mpiLoadBalanceIn\": %.2f,\n"
306 " \"mpiLoadBalanceOut\": %.2f,\n"
307 " \"ompParallelEfficiency\": %.2f,\n"
308 " \"ompLoadBalance\": %.2f,\n"
309 " \"ompSchedulingEfficiency\": %.2f,\n"
310 " \"ompSerializationEfficiency\": %.2f,\n"
311 " \"deviceOffloadEfficiency\": %.2f,\n"
312 " \"gpuParallelEfficiency\": %.2f,\n"
313 " \"gpuLoadBalance\": %.2f,\n"
314 " \"gpuCommunicationEfficiency\": %.2f,\n"
315 " \"gpuOrchestrationEfficiency\": %.2f\n"
316 " }%s\n",
317 3 record->name,
318 record->num_cpus,
319 record->num_mpi_ranks,
320 record->num_nodes,
321 3 record->avg_cpus,
322 record->num_gpus,
323 record->cycles,
324 record->instructions,
325 record->num_measurements,
326 record->num_mpi_calls,
327 record->num_omp_parallels,
328 record->num_omp_tasks,
329 record->num_gpu_runtime_calls,
330 record->elapsed_time,
331 record->useful_time,
332 record->mpi_time,
333 record->omp_load_imbalance_time,
334 record->omp_scheduling_time,
335 record->omp_serialization_time,
336 record->gpu_runtime_time,
337 record->min_mpi_normd_proc,
338 record->min_mpi_normd_node,
339 record->gpu_useful_time,
340 record->gpu_communication_time,
341 record->max_gpu_useful_time,
342 record->max_gpu_active_time,
343 3 record->parallel_efficiency,
344 3 record->mpi_parallel_efficiency,
345 3 record->mpi_communication_efficiency,
346 3 record->mpi_load_balance,
347 3 record->mpi_load_balance_in,
348 3 record->mpi_load_balance_out,
349 3 record->omp_parallel_efficiency,
350 3 record->omp_load_balance,
351 3 record->omp_scheduling_efficiency,
352 3 record->omp_serialization_efficiency,
353 3 record->device_offload_efficiency,
354 3 record->gpu_parallel_efficiency,
355 3 record->gpu_load_balance,
356 3 record->gpu_communication_efficiency,
357 3 record->gpu_orchestration_efficiency,
358
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 node->next != NULL ? "," : "");
359 }
360 3 fprintf(out_file,
361 " }"); /* no eol */
362 }
363 3 }
364
365 1 static void pop_metrics_to_xml(FILE *out_file) {
366
367 1 for (GSList *node = pop_metrics_records;
368
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
369 1 node = node->next) {
370
371 1 dlb_pop_metrics_t *record = node->data;
372
373 1 fprintf(out_file,
374 " <Application>\n"
375 " <name>%s</name>\n"
376 " <numCpus>%d</numCpus>\n"
377 " <numMpiRanks>%d</numMpiRanks>\n"
378 " <numNodes>%d</numNodes>\n"
379 " <avgCpus>%.1f</avgCpus>\n"
380 " <cycles>%.0f</cycles>\n"
381 " <instructions>%.0f</instructions>\n"
382 " <numMeasurements>%"PRId64"</numMeasurements>\n"
383 " <numMpiCalls>%"PRId64"</numMpiCalls>\n"
384 " <numOmpParallels>%"PRId64"</numOmpParallels>\n"
385 " <numOmpTasks>%"PRId64"</numOmpTasks>\n"
386 " <elapsedTime>%"PRId64"</elapsedTime>\n"
387 " <usefulTime>%"PRId64"</usefulTime>\n"
388 " <mpiTime>%"PRId64"</mpiTime>\n"
389 " <ompLoadImbalanceTime>%"PRId64"</ompLoadImbalanceTime>\n"
390 " <ompSchedulingTime>%"PRId64"</ompSchedulingTime>\n"
391 " <ompSerializationTime>%"PRId64"</ompSerializationTime>\n"
392 " <minMpiNormdProc>%.0f</minMpiNormdProc>\n"
393 " <minMpiNormdNode>%.0f</minMpiNormdNode>\n"
394 " <parallelEfficiency>%.2f</parallelEfficiency>\n"
395 " <mpiParallelEfficiency>%.2f</mpiParallelEfficiency>\n"
396 " <mpiCommunicationEfficiency>%.2f</mpiCommunicationEfficiency>\n"
397 " <mpiLoadBalance>%.2f</mpiLoadBalance>\n"
398 " <mpiLoadBalanceIn>%.2f</mpiLoadBalanceIn>\n"
399 " <mpiLoadBalanceOut>%.2f</mpiLoadBalanceOut>\n"
400 " <ompParallelEfficiency>%.2f</ompParallelEfficiency>\n"
401 " <ompLoadBalance>%.2f</ompLoadBalance>\n"
402 " <ompSchedulingEfficiency>%.2f</ompSchedulingEfficiency>\n"
403 " <ompSerializationEfficiency>%.2f</ompSerializationEfficiency>\n"
404 " </Application>\n",
405 1 record->name,
406 record->num_cpus,
407 record->num_mpi_ranks,
408 record->num_nodes,
409 1 record->avg_cpus,
410 record->cycles,
411 record->instructions,
412 record->num_measurements,
413 record->num_mpi_calls,
414 record->num_omp_parallels,
415 record->num_omp_tasks,
416 record->elapsed_time,
417 record->useful_time,
418 record->mpi_time,
419 record->omp_load_imbalance_time,
420 record->omp_scheduling_time,
421 record->omp_serialization_time,
422 record->min_mpi_normd_proc,
423 record->min_mpi_normd_node,
424 1 record->parallel_efficiency,
425 1 record->mpi_parallel_efficiency,
426 1 record->mpi_communication_efficiency,
427 1 record->mpi_load_balance,
428 1 record->mpi_load_balance_in,
429 1 record->mpi_load_balance_out,
430 1 record->omp_parallel_efficiency,
431 1 record->omp_load_balance,
432 1 record->omp_scheduling_efficiency,
433 1 record->omp_serialization_efficiency
434 );
435 }
436 1 }
437
438 3 static void pop_metrics_to_txt(FILE *out_file) {
439
440 3 for (GSList *node = pop_metrics_records;
441
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 node != NULL;
442 3 node = node->next) {
443
444 3 dlb_pop_metrics_t *record = node->data;
445
446
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (record->elapsed_time > 0) {
447 3 fprintf(out_file,
448 "%s\n"
449 "### Name: %s\n"
450 "### Number of CPUs: %d\n"
451 "### Number of MPI processes: %d\n"
452 "### Number of nodes: %d\n"
453 "### Average CPUs: %.1f\n"
454 "### Number of GPUs: %d\n"
455 "### Cycles: %.0f\n"
456 "### Instructions: %.0f\n"
457 "### Number of measurements: %"PRId64"\n"
458 "### Number of MPI calls: %"PRId64"\n"
459 "### Number of OpenMP parallel regions: %"PRId64"\n"
460 "### Number of OpenMP explicit tasks: %"PRId64"\n"
461 "### Number of GPU runtime calls: %"PRId64"\n"
462 "### Elapsed Time (ns): %"PRId64"\n"
463 "### Useful Time (ns): %"PRId64"\n"
464 "### MPI Time (ns): %"PRId64"\n"
465 "### OpenMP Load Imbalance Time (ns): %"PRId64"\n"
466 "### OpenMP Scheduling Time (ns): %"PRId64"\n"
467 "### OpenMP Serialization Time (ns): %"PRId64"\n"
468 "### GPU Runtime Time (ns): %"PRId64"\n"
469 "### MPI time normalized at process level of\n"
470 "### the process with the max non-MPI time: %.0f\n"
471 "### MPI time normalized at node level of\n"
472 "### the process with the max non-MPI time: %.0f\n"
473 "### Device useful time: %"PRId64"\n"
474 "### Device communication time: %"PRId64"\n"
475 "### Device max useful time: %"PRId64"\n"
476 "### Device max active time: %"PRId64"\n"
477 "### --- Host metrics ---\n"
478 "### Parallel efficiency: %.2f\n"
479 "### MPI Parallel efficiency: %.2f\n"
480 "### - MPI Communication efficiency: %.2f\n"
481 "### - MPI Load Balance: %.2f\n"
482 "### - MPI Load Balance in: %.2f\n"
483 "### - MPI Load Balance out: %.2f\n"
484 "### OpenMP Parallel efficiency: %.2f\n"
485 "### - OpenMP Load Balance: %.2f\n"
486 "### - OpenMP Scheduling efficiency: %.2f\n"
487 "### - OpenMP Serialization efficiency: %.2f\n"
488 "### Device Offload efficiency: %.2f\n"
489 "### --- Device metrics ---\n"
490 "### Device Parallel efficiency: %.2f\n"
491 "### - Device Load Balance: %.2f\n"
492 "### - Device Communication efficiency: %.2f\n"
493 "### - Device Orchestration efficiency: %.2f\n",
494 make_header("Monitoring Region POP Metrics"),
495 3 record->name,
496 record->num_cpus,
497 record->num_mpi_ranks,
498 record->num_nodes,
499 3 record->avg_cpus,
500 record->num_gpus,
501 record->cycles,
502 record->instructions,
503 record->num_measurements,
504 record->num_mpi_calls,
505 record->num_omp_parallels,
506 record->num_omp_tasks,
507 record->num_gpu_runtime_calls,
508 record->elapsed_time,
509 record->useful_time,
510 record->mpi_time,
511 record->omp_load_imbalance_time,
512 record->omp_scheduling_time,
513 record->omp_serialization_time,
514 record->gpu_runtime_time,
515 record->min_mpi_normd_proc,
516 record->min_mpi_normd_node,
517 record->gpu_useful_time,
518 record->gpu_communication_time,
519 record->max_gpu_useful_time,
520 record->max_gpu_active_time,
521 3 record->parallel_efficiency,
522 3 record->mpi_parallel_efficiency,
523 3 record->mpi_communication_efficiency,
524 3 record->mpi_load_balance,
525 3 record->mpi_load_balance_in,
526 3 record->mpi_load_balance_out,
527 3 record->omp_parallel_efficiency,
528 3 record->omp_load_balance,
529 3 record->omp_scheduling_efficiency,
530 3 record->omp_serialization_efficiency,
531 3 record->device_offload_efficiency,
532 3 record->gpu_parallel_efficiency,
533 3 record->gpu_load_balance,
534 3 record->gpu_communication_efficiency,
535 3 record->gpu_orchestration_efficiency
536 );
537 } else {
538 fprintf(out_file,
539 "%s\n"
540 "### Name: %s\n"
541 "### No data ###\n",
542 make_header("Monitoring Region POP Metrics"),
543 record->name);
544 }
545 }
546 3 }
547
548 3 static void pop_metrics_to_csv(FILE *out_file, bool append) {
549
550
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 if (pop_metrics_records == NULL) return;
551
552
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (!append) {
553 /* Print header */
554 2 fprintf(out_file,
555 "name,"
556 "numCpus,"
557 "numMpiRanks,"
558 "numNodes,"
559 "avgCpus,"
560 "cycles,"
561 "instructions,"
562 "numMeasurements,"
563 "numMpiCalls,"
564 "numOmpParallels,"
565 "numOmpTasks,"
566 "numGpuRuntimeCalls,"
567 "elapsedTime,"
568 "usefulTime,"
569 "mpiTime,"
570 "ompLoadImbalanceTime,"
571 "ompSchedulingTime,"
572 "ompSerializationTime,"
573 "gpuRuntimeTime,"
574 "minMpiNormdProc,"
575 "minMpiNormdNode,"
576 "parallelEfficiency,"
577 "mpiParallelEfficiency,"
578 "mpiCommunicationEfficiency,"
579 "mpiLoadBalance,"
580 "mpiLoadBalanceIn,"
581 "mpiLoadBalanceOut,"
582 "ompParallelEfficiency,"
583 "ompLoadBalance,"
584 "ompSchedulingEfficiency,"
585 "ompSerializationEfficiency,"
586 "deviceOffloadEfficiency,"
587 "gpuParallelEfficiency,"
588 "gpuLoadBalance,"
589 "gpuCommunicationEfficiency,"
590 "gpuOrchestrationEfficiency\n"
591 );
592 }
593
594 3 for (GSList *node = pop_metrics_records;
595
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 node != NULL;
596 3 node = node->next) {
597
598 3 dlb_pop_metrics_t *record = node->data;
599
600 3 fprintf(out_file,
601 "\"%s\"," /* name */
602 "%d," /* numCpus */
603 "%d," /* numMpiRanks */
604 "%d," /* numNodes */
605 "%.1f," /* avgCpus */
606 "%.0f," /* cycles */
607 "%.0f," /* instructions */
608 "%"PRId64"," /* numMeasurements */
609 "%"PRId64"," /* numMpiCalls */
610 "%"PRId64"," /* numOmpParallels */
611 "%"PRId64"," /* numOmpTasks */
612 "%"PRId64"," /* numGpuRuntimeCalls */
613 "%"PRId64"," /* elapsedTime */
614 "%"PRId64"," /* usefulTime */
615 "%"PRId64"," /* mpiTime */
616 "%"PRId64"," /* ompLoadImbalanceTime */
617 "%"PRId64"," /* ompSchedulingTime */
618 "%"PRId64"," /* ompSerializationTime */
619 "%"PRId64"," /* gpuRuntimeTime */
620 "%.0f," /* minMpiNormdProc */
621 "%.0f," /* minMpiNormdNode */
622 "%.2f," /* parallelEfficiency */
623 "%.2f," /* mpiParallelEfficiency */
624 "%.2f," /* mpiCommunicationEfficiency */
625 "%.2f," /* mpiLoadBalance */
626 "%.2f," /* mpiLoadBalanceIn */
627 "%.2f," /* mpiLoadBalanceOut */
628 "%.2f," /* ompParallelEfficiency */
629 "%.2f," /* ompLoadBalance */
630 "%.2f," /* ompSchedulingEfficiency */
631 "%.2f," /* ompSerializationEfficiency */
632 "%.2f," /* deviceOffloadEfficiency */
633 "%.2f," /* gpuParallelEfficiency */
634 "%.2f," /* gpuLoadBalance */
635 "%.2f," /* gpuCommunicationEfficiency */
636 "%.2f\n", /* gpuOrchestrationEfficiency */
637 3 record->name,
638 record->num_cpus,
639 record->num_mpi_ranks,
640 record->num_nodes,
641 3 record->avg_cpus,
642 record->cycles,
643 record->instructions,
644 record->num_measurements,
645 record->num_mpi_calls,
646 record->num_omp_parallels,
647 record->num_omp_tasks,
648 record->num_gpu_runtime_calls,
649 record->elapsed_time,
650 record->useful_time,
651 record->mpi_time,
652 record->omp_load_imbalance_time,
653 record->omp_scheduling_time,
654 record->omp_serialization_time,
655 record->gpu_runtime_time,
656 record->min_mpi_normd_proc,
657 record->min_mpi_normd_node,
658 3 record->parallel_efficiency,
659 3 record->mpi_parallel_efficiency,
660 3 record->mpi_communication_efficiency,
661 3 record->mpi_load_balance,
662 3 record->mpi_load_balance_in,
663 3 record->mpi_load_balance_out,
664 3 record->omp_parallel_efficiency,
665 3 record->omp_load_balance,
666 3 record->omp_scheduling_efficiency,
667 3 record->omp_serialization_efficiency,
668 3 record->device_offload_efficiency,
669 3 record->gpu_parallel_efficiency,
670 3 record->gpu_load_balance,
671 3 record->gpu_communication_efficiency,
672 3 record->gpu_orchestration_efficiency
673 );
674 }
675 }
676
677 28 static void pop_metrics_finalize(void) {
678
679 /* Free every record data */
680 28 for (GSList *node = pop_metrics_records;
681
2/2
✓ Branch 0 taken 26 times.
✓ Branch 1 taken 28 times.
54 node != NULL;
682 26 node = node->next) {
683
684 26 dlb_pop_metrics_t *record = node->data;
685 26 free(record);
686 }
687
688 /* Free list */
689 28 g_slist_free(pop_metrics_records);
690 28 pop_metrics_records = NULL;
691 28 }
692
693
694 /*********************************************************************************/
695 /* Node */
696 /*********************************************************************************/
697
698 static GSList *node_records = NULL;
699
700 9 void talp_output_record_node(const node_record_t *node_record) {
701
702 9 int nelems = node_record->nelems;
703
704 /* Allocate new record */
705 9 size_t process_records_size = sizeof(process_in_node_record_t) * nelems;
706 9 size_t node_record_size = sizeof(node_record_t) + process_records_size;
707 9 node_record_t *new_record = malloc(node_record_size);
708
709 /* Memcpy the entire struct */
710 9 memcpy(new_record, node_record, node_record_size);
711
712 /* Insert to list */
713 9 node_records = g_slist_prepend(node_records, new_record);
714 9 }
715
716 18 static void node_print(void) {
717
718 18 for (GSList *node = node_records;
719
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 18 times.
19 node != NULL;
720 1 node = node->next) {
721
722 1 node_record_t *node_record = node->data;
723
724 1 info(" |----------------------------------------------------------|");
725 1 info(" | Extended Report Node %4d |",
726 node_record->node_id);
727 1 info(" |----------------------------------------------------------|");
728 1 info(" | Process | Useful Time | MPI Time |");
729 1 info(" |------------|----------------------|----------------------|");
730
731
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 for (int i = 0; i < node_record->nelems; ++i) {
732 2 info(" | %-10d | %18e s | %18e s |",
733 node_record->processes[i].pid,
734 nsecs_to_secs(node_record->processes[i].useful_time),
735 nsecs_to_secs(node_record->processes[i].mpi_time));
736 2 info(" |------------|----------------------|----------------------|");
737 }
738
739
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (node_record->nelems > 0) {
740 1 info(" |------------|----------------------|----------------------|");
741 1 info(" | %-10s | %18e s | %18e s |", "Node Avg",
742 nsecs_to_secs(node_record->avg_useful_time),
743 nsecs_to_secs(node_record->avg_mpi_time));
744 1 info(" |------------|----------------------|----------------------|");
745 1 info(" | %-10s | %18e s | %18e s |", "Node Max",
746 nsecs_to_secs(node_record->max_useful_time),
747 nsecs_to_secs(node_record->max_mpi_time));
748 1 info(" |------------|----------------------|----------------------|");
749 }
750 }
751 18 }
752
753 3 static void node_to_json(FILE *out_file) {
754
755
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 if (node_records == NULL) return;
756
757 /* If there are pop_metrics, append to the existing dictionary */
758
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (pop_metrics_records != NULL) {
759 3 fprintf(out_file,",\n");
760 }
761
762 3 fprintf(out_file,
763 " \"node\": [\n");
764
765 3 for (GSList *node = node_records;
766
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 node != NULL;
767 3 node = node->next) {
768
769 3 node_record_t *node_record = node->data;
770
771 3 fprintf(out_file,
772 " {\n"
773 " \"id\": \"%d\",\n"
774 " \"process\": [\n",
775 node_record->node_id);
776
777
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 3 times.
9 for (int i = 0; i < node_record->nelems; ++i) {
778 6 fprintf(out_file,
779 " {\n"
780 " \"id\": %d,\n"
781 " \"usefulTime\": %"PRId64",\n"
782 " \"mpiTime\": %"PRId64"\n"
783 " }%s\n",
784 node_record->processes[i].pid,
785 node_record->processes[i].useful_time,
786 node_record->processes[i].mpi_time,
787
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 i+1 < node_record->nelems ? "," : "");
788 }
789
790 3 fprintf(out_file,
791 " ],\n"
792 " \"nodeAvg\": {\n"
793 " \"usefulTime\": %"PRId64",\n"
794 " \"mpiTime\": %"PRId64"\n"
795 " },\n"
796 " \"nodeMax\": {\n"
797 " \"usefulTime\": %"PRId64",\n"
798 " \"mpiTime\": %"PRId64"\n"
799 " }\n"
800 " }%s\n",
801 node_record->avg_useful_time,
802 node_record->avg_mpi_time,
803 node_record->max_useful_time,
804 node_record->max_mpi_time,
805
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 node->next != NULL ? "," : "");
806 }
807 3 fprintf(out_file,
808 " ]"); /* no eol */
809 }
810
811 1 static void node_to_xml(FILE *out_file) {
812
813 1 for (GSList *node = node_records;
814
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
815 1 node = node->next) {
816
817 1 node_record_t *node_record = node->data;
818
819 1 fprintf(out_file,
820 " <node>\n"
821 " <id>%d</id>\n",
822 node_record->node_id);
823
824
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 for (int i = 0; i < node_record->nelems; ++i) {
825 2 fprintf(out_file,
826 " <process>\n"
827 " <id>%d</id>\n"
828 " <usefulTime>%"PRId64"</usefulTime>\n"
829 " <mpiTime>%"PRId64"</mpiTime>\n"
830 " </process>\n",
831 node_record->processes[i].pid,
832 node_record->processes[i].useful_time,
833 node_record->processes[i].mpi_time);
834 }
835
836 1 fprintf(out_file,
837 " <nodeAvg>\n"
838 " <usefulTime>%"PRId64"</usefulTime>\n"
839 " <mpiTime>%"PRId64"</mpiTime>\n"
840 " </nodeAvg>\n"
841 " <nodeMax>\n"
842 " <usefulTime>%"PRId64"</usefulTime>\n"
843 " <mpiTime>%"PRId64"</mpiTime>\n"
844 " </nodeMax>\n"
845 " </node>\n",
846 node_record->avg_useful_time,
847 node_record->avg_mpi_time,
848 node_record->max_useful_time,
849 node_record->max_mpi_time);
850 }
851 1 }
852
853 3 static void node_to_csv(FILE *out_file, bool append) {
854
855
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (node_records == NULL) return;
856
857
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (!append) {
858 /* Print header */
859 1 fprintf(out_file,
860 "NodeId,"
861 "ProcessId,"
862 "ProcessUsefulTime,"
863 "ProcessMPITime,"
864 "NodeAvgUsefulTime,"
865 "NodeAvgMPITime,"
866 "NodeMaxUsefulTime,"
867 "NodeMaxMPITime\n");
868 }
869
870 1 for (GSList *node = node_records;
871
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
872 1 node = node->next) {
873
874 1 node_record_t *node_record = node->data;
875
876
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 for (int i = 0; i < node_record->nelems; ++i) {
877 2 fprintf(out_file,
878 "%d," /* NodeId */
879 "%d," /* ProcessId */
880 "%"PRId64"," /* ProcessUsefulTime */
881 "%"PRId64"," /* ProcessMPITime */
882 "%"PRId64"," /* NodeAvgUsefulTime */
883 "%"PRId64"," /* NodeAvgMPITime*/
884 "%"PRId64"," /* NodeMaxUsefulTime */
885 "%"PRId64"\n", /* NodeMaxMPITime*/
886 node_record->node_id,
887 node_record->processes[i].pid,
888 node_record->processes[i].useful_time,
889 node_record->processes[i].mpi_time,
890 node_record->avg_useful_time,
891 node_record->avg_mpi_time,
892 node_record->max_useful_time,
893 node_record->max_mpi_time);
894
895 }
896 }
897 }
898
899 3 static void node_to_txt(FILE *out_file) {
900
901 3 for (GSList *node = node_records;
902
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 node != NULL;
903 3 node = node->next) {
904
905 3 node_record_t *node_record = node->data;
906
907 3 fprintf(out_file,
908 " |----------------------------------------------------------|\n"
909 " | Extended Report Node %4d |\n"
910 " |----------------------------------------------------------|\n"
911 " | Process | Useful Time | MPI Time |\n"
912 " |------------|----------------------|----------------------|\n",
913 node_record->node_id);
914
915
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 3 times.
9 for (int i = 0; i < node_record->nelems; ++i) {
916 6 fprintf(out_file,
917 " | %-10d | %18e s | %18e s |\n"
918 " |------------|----------------------|----------------------|\n",
919 node_record->processes[i].pid,
920 nsecs_to_secs(node_record->processes[i].useful_time),
921 nsecs_to_secs(node_record->processes[i].mpi_time));
922 }
923
924
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (node_record->nelems > 0) {
925 3 fprintf(out_file,
926 " |------------|----------------------|----------------------|\n"
927 " | %-10s | %18e s | %18e s |\n"
928 " |------------|----------------------|----------------------|\n"
929 " | %-10s | %18e s | %18e s |\n"
930 " |------------|----------------------|----------------------|\n",
931 "Node Avg",
932 nsecs_to_secs(node_record->avg_useful_time),
933 nsecs_to_secs(node_record->avg_mpi_time),
934 "Node Max",
935 nsecs_to_secs(node_record->max_useful_time),
936 nsecs_to_secs(node_record->max_mpi_time));
937 }
938 }
939 3 }
940
941 28 static void node_finalize(void) {
942
943 /* Free every record data */
944 28 for (GSList *node = node_records;
945
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 28 times.
37 node != NULL;
946 9 node = node->next) {
947
948 9 node_record_t *record = node->data;
949 9 free(record);
950 }
951
952 /* Free list */
953 28 g_slist_free(node_records);
954 28 node_records = NULL;
955 28 }
956
957
958 /*********************************************************************************/
959 /* Process */
960 /*********************************************************************************/
961
962 typedef struct region_record_t {
963 char name[DLB_MONITOR_NAME_MAX];
964 int num_mpi_ranks;
965 process_record_t process_records[];
966 } region_record_t;
967
968 static GSList *region_records = NULL;
969
970 11 void talp_output_record_process(const char *region_name,
971 const process_record_t *process_record, int num_mpi_ranks) {
972
973 11 region_record_t *region_record = NULL;
974
975 /* Find region or allocate new one */
976 11 for (GSList *node = region_records;
977
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 11 times.
12 node != NULL;
978 1 node = node->next) {
979
980 1 region_record_t *record = node->data;
981
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (strcmp(record->name, region_name) == 0) {
982 region_record = record;
983 break;
984 }
985 }
986
987 /* Allocate if not found */
988
1/2
✓ Branch 0 taken 11 times.
✗ Branch 1 not taken.
11 if (region_record == NULL) {
989 /* Allocate and initialize new region */
990 11 size_t region_record_size = sizeof(region_record_t) +
991 11 sizeof(process_record_t) * num_mpi_ranks;
992 11 region_record = malloc(region_record_size);
993 11 *region_record = (const region_record_t) {
994 .num_mpi_ranks = num_mpi_ranks,
995 };
996 11 snprintf(region_record->name, DLB_MONITOR_NAME_MAX, "%s",
997 region_name);
998
999 /* Insert to list */
1000 11 region_records = g_slist_prepend(region_records, region_record);
1001 }
1002
1003 /* Copy process_record */
1004 11 int rank = process_record->rank;
1005
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 11 times.
11 ensure(rank < num_mpi_ranks, "Wrong rank number in %s", __func__);
1006 11 memcpy(&region_record->process_records[rank], process_record, sizeof(process_record_t));
1007 11 }
1008
1009 18 static void process_print(void) {
1010
1011 18 for (GSList *node = region_records;
1012
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 18 times.
21 node != NULL;
1013 3 node = node->next) {
1014
1015 3 region_record_t *region_record = node->data;
1016
1017
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 for (int i = 0; i < region_record->num_mpi_ranks; ++i) {
1018
1019 3 process_record_t *process_record = &region_record->process_records[i];
1020
1021 3 info("%s", make_header("Monitoring Region Summary"));
1022 3 info("### Name: %s",
1023 3 region_record->name);
1024 3 info("### Process: %d (%s)",
1025 3 process_record->pid, process_record->hostname);
1026 3 info("### Rank: %d",
1027 process_record->rank);
1028 3 info("### CpuSet: %s",
1029 3 process_record->cpuset);
1030 3 info("### Elapsed time: %"PRId64" ns",
1031 process_record->monitor.elapsed_time);
1032 3 info("### Useful time: %"PRId64" ns",
1033 process_record->monitor.useful_time);
1034
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
3 if (process_record->monitor.mpi_time > 0) {
1035 1 info("### Not useful MPI: %"PRId64" ns",
1036 process_record->monitor.mpi_time);
1037 }
1038
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (process_record->monitor.omp_load_imbalance_time > 0
1039
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 || process_record->monitor.omp_scheduling_time > 0
1040
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 || process_record->monitor.omp_serialization_time > 0) {
1041 info("### Not useful OMP Load Imbalance: %"PRId64" ns",
1042 process_record->monitor.omp_load_imbalance_time);
1043 info("### Not useful OMP Scheduling: %"PRId64" ns",
1044 process_record->monitor.omp_scheduling_time);
1045 info("### Not useful OMP Serialization: %"PRId64" ns",
1046 process_record->monitor.omp_serialization_time);
1047 }
1048
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 if (process_record->monitor.gpu_runtime_time > 0) {
1049 info("### Not useful GPU runtime: %"PRId64" ns",
1050 process_record->monitor.gpu_runtime_time);
1051 info("### Device useful time: %"PRId64" ns",
1052 process_record->monitor.gpu_useful_time);
1053 info("### Device communication time: %"PRId64" ns",
1054 process_record->monitor.gpu_communication_time);
1055 }
1056
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 if (process_record->monitor.instructions > 0
1057 && process_record->monitor.cycles > 0) {
1058 info("### IPC : %.2f",
1059 (float)process_record->monitor.instructions
1060 / process_record->monitor.cycles);
1061 }
1062 }
1063 }
1064 18 }
1065
1066 3 static void process_to_json(FILE *out_file) {
1067
1068
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 if (region_records == NULL) return;
1069
1070 /* If there are pop_metrics or node_metrics, append to the existing dictionary */
1071
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 if (pop_metrics_records != NULL
1072 || node_records != NULL) {
1073 3 fprintf(out_file,",\n");
1074 }
1075
1076 3 fprintf(out_file,
1077 " \"Process\": {\n");
1078
1079 3 for (GSList *node = region_records;
1080
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 node != NULL;
1081 3 node = node->next) {
1082
1083 3 region_record_t *region_record = node->data;
1084
1085 3 fprintf(out_file,
1086 " \"%s\": [\n",
1087 3 region_record->name);
1088
1089
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 for (int i = 0; i < region_record->num_mpi_ranks; ++i) {
1090
1091 3 process_record_t *process_record = &region_record->process_records[i];
1092
1093 3 fprintf(out_file,
1094 " {\n"
1095 " \"rank\": %d,\n"
1096 " \"pid\": %d,\n"
1097 " \"nodeId\": %d,\n"
1098 " \"hostname\": \"%s\",\n"
1099 " \"cpuset\": %s,\n"
1100 " \"numCpus\": %d,\n"
1101 " \"avgCpus\": %.1f,\n"
1102 " \"cycles\": %"PRId64",\n"
1103 " \"instructions\": %"PRId64",\n"
1104 " \"numMeasurements\": %d,\n"
1105 " \"numResets\": %d,\n"
1106 " \"numMpiCalls\": %"PRId64",\n"
1107 " \"numOmpParallels\": %"PRId64",\n"
1108 " \"numOmpTasks\": %"PRId64",\n"
1109 " \"numGpuRuntimeCalls\": %"PRId64",\n"
1110 " \"elapsedTime\": %"PRId64",\n"
1111 " \"usefulTime\": %"PRId64",\n"
1112 " \"mpiTime\": %"PRId64",\n"
1113 " \"ompLoadImbalanceTime\": %"PRId64",\n"
1114 " \"ompSchedulingTime\": %"PRId64",\n"
1115 " \"ompSerializationTime\": %"PRId64",\n"
1116 " \"gpuRuntimeTime\": %"PRId64",\n"
1117 " \"gpuUsefulTime\": %"PRId64",\n"
1118 " \"gpuCommunicationTime\": %"PRId64"\n"
1119 " }%s\n",
1120 process_record->rank,
1121 process_record->pid,
1122 process_record->node_id,
1123 3 process_record->hostname,
1124 3 process_record->cpuset_quoted,
1125 process_record->monitor.num_cpus,
1126 3 process_record->monitor.avg_cpus,
1127 process_record->monitor.cycles,
1128 process_record->monitor.instructions,
1129 process_record->monitor.num_measurements,
1130 process_record->monitor.num_resets,
1131 process_record->monitor.num_mpi_calls,
1132 process_record->monitor.num_omp_parallels,
1133 process_record->monitor.num_omp_tasks,
1134 process_record->monitor.num_gpu_runtime_calls,
1135 process_record->monitor.elapsed_time,
1136 process_record->monitor.useful_time,
1137 process_record->monitor.mpi_time,
1138 process_record->monitor.omp_load_imbalance_time,
1139 process_record->monitor.omp_scheduling_time,
1140 process_record->monitor.omp_serialization_time,
1141 process_record->monitor.gpu_runtime_time,
1142 process_record->monitor.gpu_useful_time,
1143 process_record->monitor.gpu_communication_time,
1144
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 i + 1 < region_record->num_mpi_ranks ? "," : "");
1145 }
1146 3 fprintf(out_file,
1147 " ]%s\n",
1148
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 node->next != NULL ? "," : "");
1149 }
1150 3 fprintf(out_file,
1151 " }"); /* no eol */
1152 }
1153
1154 1 static void process_to_xml(FILE *out_file) {
1155
1156 1 for (GSList *node = region_records;
1157
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
1158 1 node = node->next) {
1159
1160 1 region_record_t *region_record = node->data;
1161
1162 1 fprintf(out_file,
1163 " <Process>\n"
1164 " <name>%s</name>\n",
1165 1 region_record->name);
1166
1167
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 for (int i = 0; i < region_record->num_mpi_ranks; ++i) {
1168
1169 1 process_record_t *process_record = &region_record->process_records[i];
1170
1171 1 fprintf(out_file,
1172 " <process>\n"
1173 " <rank>%d</rank>\n"
1174 " <pid>%d</pid>\n"
1175 " <nodeId>%d</nodeId>\n"
1176 " <hostname>%s</hostname>\n"
1177 " <cpuset>%s</cpuset>\n"
1178 " <numCpus>%d</numCpus>\n"
1179 " <avgCpus>%.1f</avgCpus>\n"
1180 " <cycles>%"PRId64"</cycles>\n"
1181 " <instructions>%"PRId64"</instructions>\n"
1182 " <numMeasurements>%d</numMeasurements>\n"
1183 " <numResets>%d</numResets>\n"
1184 " <numMpiCalls>%"PRId64"</numMpiCalls>\n"
1185 " <numOmpParallels>%"PRId64"</numOmpParallels>\n"
1186 " <numOmpTasks>%"PRId64"</numOmpTasks>\n"
1187 " <elapsedTime>%"PRId64"</elapsedTime>\n"
1188 " <usefulTime>%"PRId64"</usefulTime>\n"
1189 " <mpiTime>%"PRId64"</mpiTime>\n"
1190 " <ompLoadImbalanceTime>%"PRId64"</ompLoadImbalanceTime>\n"
1191 " <ompSchedulingTime>%"PRId64"</ompSchedulingTime>\n"
1192 " <ompSerializationTime>%"PRId64"</ompSerializationTime>\n"
1193 " </process>\n",
1194 process_record->rank,
1195 process_record->pid,
1196 process_record->node_id,
1197 1 process_record->hostname,
1198 1 process_record->cpuset_quoted,
1199 process_record->monitor.num_cpus,
1200 1 process_record->monitor.avg_cpus,
1201 process_record->monitor.cycles,
1202 process_record->monitor.instructions,
1203 process_record->monitor.num_measurements,
1204 process_record->monitor.num_resets,
1205 process_record->monitor.num_mpi_calls,
1206 process_record->monitor.num_omp_parallels,
1207 process_record->monitor.num_omp_tasks,
1208 process_record->monitor.elapsed_time,
1209 process_record->monitor.useful_time,
1210 process_record->monitor.mpi_time,
1211 process_record->monitor.omp_load_imbalance_time,
1212 process_record->monitor.omp_scheduling_time,
1213 process_record->monitor.omp_serialization_time);
1214 }
1215 1 fprintf(out_file,
1216 " </Process>\n");
1217 }
1218 1 }
1219
1220 3 static void process_to_csv(FILE *out_file, bool append) {
1221
1222
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (region_records == NULL) return;
1223
1224
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (!append) {
1225 /* Print header */
1226 1 fprintf(out_file,
1227 "Region,"
1228 "Rank,"
1229 "PID,"
1230 "NodeId,"
1231 "Hostname,"
1232 "CpuSet,"
1233 "NumCpus,"
1234 "AvgCpus,"
1235 "Cycles,"
1236 "Instructions,"
1237 "NumMeasurements,"
1238 "NumResets,"
1239 "NumMpiCalls,"
1240 "NumOmpParallels,"
1241 "NumOmpTasks,"
1242 "NumGpuCalls,"
1243 "ElapsedTime,"
1244 "UsefulTime,"
1245 "MPITime,"
1246 "OMPLoadImbalance,"
1247 "OMPSchedulingTime,"
1248 "OMPSerializationTime,"
1249 "GPURuntimeTime,"
1250 "GPUUsefulTime,"
1251 "GPUCommunicationTime\n");
1252 }
1253
1254 1 for (GSList *node = region_records;
1255
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
1256 1 node = node->next) {
1257
1258 1 region_record_t *region_record = node->data;
1259
1260
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 for (int i = 0; i < region_record->num_mpi_ranks; ++i) {
1261
1262 1 process_record_t *process_record = &region_record->process_records[i];
1263
1264 1 fprintf(out_file,
1265 "%s," /* Region */
1266 "%d," /* Rank */
1267 "%d," /* PID */
1268 "%d," /* NodeId */
1269 "%s," /* Hostname */
1270 "%s," /* CpuSet */
1271 "%d," /* NumCpus */
1272 "%.1f," /* AvgCpus */
1273 "%"PRId64"," /* Cycles */
1274 "%"PRId64"," /* Instructions */
1275 "%d," /* NumMeasurements */
1276 "%d," /* NumResets */
1277 "%"PRId64"," /* NumMpiCalls */
1278 "%"PRId64"," /* NumOmpParallels */
1279 "%"PRId64"," /* NumOmpTasks */
1280 "%"PRId64"," /* NumGpuCalls */
1281 "%"PRId64"," /* ElapsedTime */
1282 "%"PRId64"," /* UsefulTime */
1283 "%"PRId64"," /* MPITime */
1284 "%"PRId64"," /* OMPLoadImbalance */
1285 "%"PRId64"," /* OMPSchedulingTime */
1286 "%"PRId64"," /* OMPSerializationTime */
1287 "%"PRId64"," /* GPURuntimeTime */
1288 "%"PRId64"," /* GPUUsefulTime */
1289 "%"PRId64"\n", /* GPUCommunicationTime */
1290 1 region_record->name,
1291 process_record->rank,
1292 process_record->pid,
1293 process_record->node_id,
1294 1 process_record->hostname,
1295 1 process_record->cpuset_quoted,
1296 process_record->monitor.num_cpus,
1297 1 process_record->monitor.avg_cpus,
1298 process_record->monitor.cycles,
1299 process_record->monitor.instructions,
1300 process_record->monitor.num_measurements,
1301 process_record->monitor.num_resets,
1302 process_record->monitor.num_mpi_calls,
1303 process_record->monitor.num_omp_parallels,
1304 process_record->monitor.num_omp_tasks,
1305 process_record->monitor.num_gpu_runtime_calls,
1306 process_record->monitor.elapsed_time,
1307 process_record->monitor.useful_time,
1308 process_record->monitor.mpi_time,
1309 process_record->monitor.omp_load_imbalance_time,
1310 process_record->monitor.omp_scheduling_time,
1311 process_record->monitor.omp_serialization_time,
1312 process_record->monitor.gpu_runtime_time,
1313 process_record->monitor.gpu_useful_time,
1314 process_record->monitor.gpu_communication_time);
1315 }
1316 }
1317 }
1318
1319 3 static void process_to_txt(FILE *out_file) {
1320
1321 3 for (GSList *node = region_records;
1322
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 node != NULL;
1323 3 node = node->next) {
1324
1325 3 region_record_t *region_record = node->data;
1326
1327
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 for (int i = 0; i < region_record->num_mpi_ranks; ++i) {
1328
1329 3 process_record_t *process_record = &region_record->process_records[i];
1330
1331 6 float ipc = process_record->monitor.cycles > 0
1332 ? (float)process_record->monitor.instructions
1333 / process_record->monitor.cycles
1334
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 : 0.0f;
1335
1336 3 fprintf(out_file,
1337 "%s\n"
1338 "### Name: %s\n"
1339 "### Process: %d (%s)\n"
1340 "### Rank: %d\n"
1341 "### CpuSet: %s\n"
1342 "### Elapsed time: %"PRId64" ns\n"
1343 "### Useful time: %"PRId64" ns\n"
1344 "### Not useful MPI: %"PRId64" ns\n"
1345 "### Not useful OMP Load Imbalance: %"PRId64" ns\n"
1346 "### Not useful OMP Scheduling: %"PRId64" ns\n"
1347 "### Not useful OMP Serialization: %"PRId64" ns\n"
1348 "### Not useful GPU runtime: %"PRId64" ns\n"
1349 "### Device useful time: %"PRId64" ns\n"
1350 "### Device communication time: %"PRId64" ns\n"
1351 "### IPC: %.2f\n",
1352 make_header("Monitoring Region Summary"),
1353 3 region_record->name,
1354 3 process_record->pid, process_record->hostname,
1355 process_record->rank,
1356 3 process_record->cpuset,
1357 process_record->monitor.elapsed_time,
1358 process_record->monitor.useful_time,
1359 process_record->monitor.mpi_time,
1360 process_record->monitor.omp_load_imbalance_time,
1361 process_record->monitor.omp_scheduling_time,
1362 process_record->monitor.omp_serialization_time,
1363 process_record->monitor.gpu_runtime_time,
1364 process_record->monitor.gpu_useful_time,
1365 process_record->monitor.gpu_communication_time,
1366 ipc);
1367 }
1368 }
1369 3 }
1370
1371 28 static void process_finalize(void) {
1372
1373 /* Free every record data */
1374 28 for (GSList *node = region_records;
1375
2/2
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 28 times.
39 node != NULL;
1376 11 node = node->next) {
1377
1378 11 region_record_t *record = node->data;
1379 11 free(record);
1380 }
1381
1382 /* Free list */
1383 28 g_slist_free(region_records);
1384 28 region_records = NULL;
1385 28 }
1386
1387
1388 /*********************************************************************************/
1389 /* TALP Common */
1390 /*********************************************************************************/
1391 typedef struct TALPCommonRecord {
1392 char *dlb_version; // Version X.Y.Z[-#-hash]
1393 char *time_of_creation; // ISO 8601 string
1394 } talp_common_record_t;
1395 static talp_common_record_t common_record;
1396
1397 28 static void talp_output_record_common(void) {
1398 /* Initialize structure */
1399 28 time_t now = time(NULL);
1400 28 common_record = (const talp_common_record_t) {
1401 .dlb_version = PACKAGE_VERSION,
1402 28 .time_of_creation = get_iso_8601_string(localtime(&now)),
1403 };
1404 28 }
1405
1406 3 static void common_to_json(FILE *out_file) {
1407 3 fprintf(out_file,
1408 " \"dlbVersion\": \"%s\",\n"
1409 " \"timestamp\": \"%s\",\n",
1410 common_record.dlb_version,
1411 common_record.time_of_creation);
1412 3 }
1413
1414 1 static void common_to_xml(FILE *out_file) {
1415
1416 1 fprintf(out_file,
1417 " <dlbVersion>%s</dlbVersion>\n"
1418 " <timestamp>%s</timestamp>\n",
1419 common_record.dlb_version,
1420 common_record.time_of_creation);
1421 1 }
1422
1423 3 static void common_to_txt(FILE *out_file) {
1424
1425 3 fprintf(out_file,
1426 "%s\n"
1427 "### DLB Version: %s\n"
1428 "### Timestamp: %s\n",
1429 make_header("TALP Common Data"),
1430 common_record.dlb_version,
1431 common_record.time_of_creation);
1432 3 }
1433
1434 28 static void common_finalize(void) {
1435 28 free(common_record.time_of_creation);
1436 28 }
1437
1438
1439
1440
1441 /*********************************************************************************/
1442 /* TALP Resources */
1443 /*********************************************************************************/
1444 typedef struct TALPResourcesRecord {
1445 unsigned int num_cpus;
1446 unsigned int num_nodes;
1447 unsigned int num_mpi_ranks;
1448 unsigned int num_gpus;
1449 } talp_resources_record_t;
1450 static talp_resources_record_t resources_record;
1451
1452 8 void talp_output_record_resources(int num_cpus, int num_nodes, int num_mpi_ranks,
1453 int num_gpus) {
1454
1455 8 resources_record = (const talp_resources_record_t) {
1456 8 .num_cpus = (unsigned int) num_cpus,
1457 8 .num_nodes = (unsigned int) num_nodes,
1458 8 .num_mpi_ranks = (unsigned int) num_mpi_ranks,
1459 8 .num_gpus = (unsigned int) num_gpus,
1460 };
1461 8 }
1462
1463 3 static void resources_to_json(FILE *out_file) {
1464 3 fprintf(out_file,
1465 " \"resources\": {\n"
1466 " \"numCpus\": %u,\n"
1467 " \"numNodes\": %u,\n"
1468 " \"numMpiRanks\": %u,\n"
1469 " \"numGpus\": %u\n"
1470 " },\n",
1471 resources_record.num_cpus,
1472 resources_record.num_nodes,
1473 resources_record.num_mpi_ranks,
1474 resources_record.num_gpus);
1475 3 }
1476
1477 1 static void resources_to_xml(FILE *out_file) {
1478
1479 1 fprintf(out_file,
1480 " <resources>\n"
1481 " <numCpus>%u</numCpus>\n"
1482 " <numNodes>%u</numNodes>\n"
1483 " <numMpiRanks>%u</numMpiRanks>\n"
1484 " </resources>",
1485 resources_record.num_cpus,
1486 resources_record.num_nodes,
1487 resources_record.num_mpi_ranks);
1488 1 }
1489
1490 3 static void resources_to_txt(FILE *out_file) {
1491
1492 3 fprintf(out_file,
1493 "%s\n"
1494 "### Number of CPUs: %u\n"
1495 "### Number of Nodes: %u\n"
1496 "### Number of MPI processes: %u\n"
1497 "### Number of GPUs: %u\n",
1498 make_header("TALP Resources"),
1499 resources_record.num_cpus,
1500 resources_record.num_nodes,
1501 resources_record.num_mpi_ranks,
1502 resources_record.num_gpus);
1503 3 }
1504
1505
1506 /*********************************************************************************/
1507 /* Helper functions */
1508 /*********************************************************************************/
1509
1510 3 static void json_header(FILE *out_file) {
1511 3 fprintf(out_file, "{\n");
1512 3 }
1513
1514 3 static void json_footer(FILE *out_file) {
1515 3 fprintf(out_file, "\n}\n");
1516 3 }
1517
1518 1 static void xml_header(FILE *out_file) {
1519 1 fprintf(out_file, "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"
1520 "<root>\n");
1521 1 }
1522
1523 1 static void xml_footer(FILE *out_file) {
1524 1 fprintf(out_file, "</root>\n");
1525 1 }
1526
1527
1528 /*********************************************************************************/
1529 /* Output directory/file logic */
1530 /*********************************************************************************/
1531
1532 // Helper: recursively create directories (mkdir -p equivalent)
1533 9 static int mkdir_p(const char *path, mode_t mode) {
1534 char tmp[PATH_MAX];
1535 9 char *p = NULL;
1536
1537 9 snprintf(tmp, sizeof(tmp), "%s", path);
1538 9 size_t len = strlen(tmp);
1539
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 9 times.
9 if (len == 0) return -1;
1540
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 9 times.
9 if (tmp[len - 1] == '/') {
1541 tmp[len - 1] = '\0';
1542 }
1543
1544
2/2
✓ Branch 0 taken 421 times.
✓ Branch 1 taken 9 times.
430 for (p = tmp + 1; *p; p++) {
1545
2/2
✓ Branch 0 taken 25 times.
✓ Branch 1 taken 396 times.
421 if (*p == '/') {
1546 25 *p = '\0';
1547
2/4
✓ Branch 1 taken 25 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 25 times.
25 if (mkdir(tmp, mode) != 0 && errno != EEXIST) {
1548 return -1;
1549 }
1550 25 *p = '/';
1551 }
1552 }
1553
1554
4/4
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 1 times.
✓ Branch 4 taken 7 times.
9 if (mkdir(tmp, mode) != 0 && errno != EEXIST) {
1555 1 return -1;
1556 }
1557
1558 8 return 0;
1559 }
1560
1561 // open file for appending or writing, creating dirs as needed
1562 12 static FILE *open_file_with_dirs(const char *filename, bool *append) {
1563
2/2
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 9 times.
12 if (access(filename, F_OK) == 0) {
1564 FILE *f;
1565
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
3 if (append) {
1566 1 *append = true;
1567 1 f = fopen(filename, "a");
1568 } else {
1569 2 f = fopen(filename, "w");
1570 }
1571
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
3 if (!f) {
1572 1 warning("Cannot open existing file %s: %s", filename, strerror(errno));
1573 }
1574 3 return f;
1575 } else {
1576 // Ensure parent directories exist
1577 char pathbuf[PATH_MAX];
1578 9 snprintf(pathbuf, sizeof(pathbuf), "%s", filename);
1579 9 char *dir = dirname(pathbuf);
1580
1581
2/2
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 8 times.
9 if (mkdir_p(dir, 0755) != 0) {
1582 1 warning("Cannot create directory %s: %s", dir, strerror(errno));
1583 1 return NULL;
1584 }
1585
1586
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (append) {
1587 4 *append = false;
1588 }
1589 8 FILE *f = fopen(filename, "w");
1590
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
8 if (!f) {
1591 warning("Cannot create file %s: %s", filename, strerror(errno));
1592 }
1593 8 return f;
1594 }
1595 }
1596
1597
1598
1599 /*********************************************************************************/
1600 /* Finalize */
1601 /*********************************************************************************/
1602
1603 260 static bool check_coefficient(float coeffiecient) {
1604
2/4
✓ Branch 0 taken 260 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 260 times.
✗ Branch 3 not taken.
260 return 0.0f <= coeffiecient && coeffiecient <= 1.0;
1605 }
1606
1607 static void warn_negative_counters(void) {
1608 static bool warned_once = false;
1609 if (!warned_once) {
1610 warning("Some obtained PAPI counters contain negative values. Check your"
1611 " installation or report the error to %s", PACKAGE_BUGREPORT);
1612 warned_once = true;
1613 }
1614 }
1615
1616 static void warn_wrong_coefficient(void) {
1617 static bool warned_once = false;
1618 if (!warned_once) {
1619 warning("Some computed POP metric coefficient is not within the allowed"
1620 " range [0.0, 1.0]. If you think this is an unexpected value,"
1621 " please report the error to %s", PACKAGE_BUGREPORT);
1622 warned_once = true;
1623 }
1624 }
1625
1626 28 static void sanitize_records(void) {
1627
1628 /* pop_metrics_records:
1629 * - instructions and cycles need to be >= 0
1630 * - computed efficiencyes need to be [0.0, 1.0]
1631 */
1632 28 for (GSList *node = pop_metrics_records;
1633
2/2
✓ Branch 0 taken 26 times.
✓ Branch 1 taken 28 times.
54 node != NULL;
1634 26 node = node->next) {
1635
1636 26 dlb_pop_metrics_t *record = node->data;
1637
1638
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 26 times.
26 if (record->cycles < 0) {
1639 record->cycles = 0.0;
1640 warn_negative_counters();
1641 }
1642
1643
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 26 times.
26 if (record->instructions < 0) {
1644 record->instructions = 0.0;
1645 warn_negative_counters();
1646 }
1647
1648
1/2
✓ Branch 1 taken 26 times.
✗ Branch 2 not taken.
26 if (!check_coefficient(record->parallel_efficiency)
1649
1/2
✓ Branch 1 taken 26 times.
✗ Branch 2 not taken.
26 || !check_coefficient(record->mpi_parallel_efficiency)
1650
1/2
✓ Branch 1 taken 26 times.
✗ Branch 2 not taken.
26 || !check_coefficient(record->mpi_communication_efficiency)
1651
1/2
✓ Branch 1 taken 26 times.
✗ Branch 2 not taken.
26 || !check_coefficient(record->mpi_load_balance)
1652
1/2
✓ Branch 1 taken 26 times.
✗ Branch 2 not taken.
26 || !check_coefficient(record->mpi_load_balance_in)
1653
1/2
✓ Branch 1 taken 26 times.
✗ Branch 2 not taken.
26 || !check_coefficient(record->mpi_load_balance_out)
1654
1/2
✓ Branch 1 taken 26 times.
✗ Branch 2 not taken.
26 || !check_coefficient(record->omp_parallel_efficiency)
1655
1/2
✓ Branch 1 taken 26 times.
✗ Branch 2 not taken.
26 || !check_coefficient(record->omp_load_balance)
1656
1/2
✓ Branch 1 taken 26 times.
✗ Branch 2 not taken.
26 || !check_coefficient(record->omp_scheduling_efficiency)
1657
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 26 times.
26 || !check_coefficient(record->omp_serialization_efficiency)) {
1658 warn_wrong_coefficient();
1659 }
1660 }
1661
1662 /* node_records: nothing to sanitize for now */
1663
1664 /* region_records: */
1665 28 for (GSList *node = region_records;
1666
2/2
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 28 times.
39 node != NULL;
1667 11 node = node->next) {
1668
1669 11 region_record_t *region_record = node->data;
1670
1671
2/2
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 11 times.
22 for (int i = 0; i < region_record->num_mpi_ranks; ++i) {
1672
1673 11 dlb_monitor_t *monitor = &region_record->process_records[i].monitor;
1674
1675
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 11 times.
11 if (monitor->cycles < 0) {
1676 monitor->cycles = 0.0;
1677 warn_negative_counters();
1678 }
1679
1680
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 11 times.
11 if (monitor->instructions < 0) {
1681 monitor->instructions = 0.0;
1682 warn_negative_counters();
1683 }
1684 }
1685 }
1686 28 }
1687
1688 28 void talp_output_finalize(const char *output_file) {
1689
1690 /* For efficiency, all records are prepended to their respective lists and
1691 * reversed here */
1692 28 pop_metrics_records = g_slist_reverse(pop_metrics_records);
1693 28 node_records = g_slist_reverse(node_records);
1694 28 region_records = g_slist_reverse(region_records);
1695
1696 /* Sanitize erroneous values */
1697 28 sanitize_records();
1698
1699 28 talp_output_record_common();
1700
1701 /* If the process has changed the locale, temporarily push the C locale to
1702 * print floats with the expected notation (a comma as a decimal separator
1703 * will break CSV and JSON files). The object associated with the locale
1704 * can be safely freed after it has been set. */
1705 28 locale_t new_locale = newlocale(LC_ALL, "C", 0);
1706 28 uselocale(new_locale);
1707 28 freelocale(new_locale);
1708
1709
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 10 times.
28 if (output_file == NULL) {
1710 /* No output file, just print all records */
1711 18 pop_metrics_print();
1712 18 node_print();
1713 18 process_print();
1714 } else {
1715 /* Do not open file if process has no data */
1716
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
10 if (pop_metrics_records == NULL
1717 && node_records == NULL
1718 && region_records == NULL) return;
1719
1720 /* Check file extension */
1721 typedef enum Extension {
1722 EXT_JSON,
1723 EXT_XML,
1724 EXT_CSV,
1725 EXT_TXT,
1726 } extension_t;
1727 10 extension_t extension = EXT_TXT;
1728 10 const char *ext = strrchr(output_file, '.');
1729
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 1 times.
10 if (ext != NULL) {
1730
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 6 times.
9 if (strcmp(ext+1, "json") == 0) {
1731 3 extension = EXT_JSON;
1732
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 5 times.
6 } else if (strcmp(ext+1, "xml") == 0) {
1733 1 extension = EXT_XML;
1734
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 2 times.
5 } else if (strcmp(ext+1, "csv") == 0) {
1735 3 extension = EXT_CSV;
1736 }
1737 }
1738
1739 /* Deprecation warning*/
1740
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 9 times.
10 if(extension == EXT_XML){
1741 1 warning("Deprecated: The support for XML output is deprecated and"
1742 " will be removed in the next release");
1743 }
1744
1745 /* Specific case where output file needs to be split */
1746
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 7 times.
10 if (extension == EXT_CSV
1747 3 && !!(pop_metrics_records != NULL)
1748 3 + !!(node_records != NULL)
1749
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
4 + !!(region_records != NULL) > 1) {
1750
1751 /* Length without extension */
1752 1 int filename_useful_len = ext - output_file;
1753
1754 /* POP */
1755
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (pop_metrics_records != NULL) {
1756 1 const char *pop_ext = "-pop.csv";
1757 1 size_t pop_file_len = filename_useful_len + strlen(pop_ext) + 1;
1758 1 char *pop_filename = malloc(sizeof(char)*pop_file_len);
1759 1 sprintf(pop_filename, "%.*s%s", filename_useful_len, output_file, pop_ext);
1760 bool append_to_csv;
1761 1 FILE *pop_file = open_file_with_dirs(pop_filename, &append_to_csv);
1762
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (pop_file) {
1763 1 pop_metrics_to_csv(pop_file, append_to_csv);
1764 1 fclose(pop_file);
1765 } else {
1766 warning("Writing metrics to stdout instead:");
1767 pop_metrics_to_csv(stdout, /* append: */ false);
1768 }
1769 }
1770
1771 /* Node */
1772
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (node_records != NULL) {
1773 1 const char *node_ext = "-node.csv";
1774 1 size_t node_file_len = filename_useful_len + strlen(node_ext) + 1;
1775 1 char *node_filename = malloc(sizeof(char)*node_file_len);
1776 1 sprintf(node_filename, "%.*s%s", filename_useful_len, output_file, node_ext);
1777 bool append_to_csv;
1778 1 FILE *node_file = open_file_with_dirs(node_filename, &append_to_csv);
1779
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (node_file) {
1780 1 node_to_csv(node_file, append_to_csv);
1781 1 fclose(node_file);
1782 } else {
1783 warning("Writing metrics to stdout instead:");
1784 node_to_csv(stdout, /* append: */ false);
1785 }
1786 }
1787
1788 /* Process */
1789
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (region_records != NULL) {
1790 1 const char *process_ext = "-process.csv";
1791 1 size_t process_file_len = filename_useful_len + strlen(process_ext) + 1;
1792 1 char *process_filename = malloc(sizeof(char)*process_file_len);
1793 1 sprintf(process_filename, "%.*s%s", filename_useful_len, output_file, process_ext);
1794 bool append_to_csv;
1795 1 FILE *process_file = open_file_with_dirs(process_filename, &append_to_csv);
1796
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (process_file) {
1797 1 process_to_csv(process_file, append_to_csv);
1798 1 fclose(process_file);
1799 } else {
1800 warning("Writing metrics to stdout instead:");
1801 process_to_csv(stdout, /* append: */ false);
1802 }
1803 }
1804 }
1805
1806 /* Write to file */
1807 else {
1808 /* Open file */
1809 bool append_to_csv;
1810
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 7 times.
9 FILE *out_file = open_file_with_dirs(output_file,
1811 extension == EXT_CSV ? &append_to_csv : NULL);
1812
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 7 times.
9 if (!out_file) {
1813 2 warning("Writing metrics to stdout instead:");
1814 2 out_file = stdout;
1815 2 append_to_csv = false;
1816 }
1817
1818 /* Write records to file */
1819
4/5
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 3 times.
✗ Branch 4 not taken.
9 switch(extension) {
1820 3 case EXT_JSON:
1821 3 json_header(out_file);
1822 3 common_to_json(out_file);
1823 3 resources_to_json(out_file);
1824 3 pop_metrics_to_json(out_file);
1825 3 node_to_json(out_file);
1826 3 process_to_json(out_file);
1827 3 json_footer(out_file);
1828 3 break;
1829 1 case EXT_XML:
1830 1 xml_header(out_file);
1831 1 common_to_xml(out_file);
1832 1 resources_to_xml(out_file);
1833 1 pop_metrics_to_xml(out_file);
1834 1 node_to_xml(out_file);
1835 1 process_to_xml(out_file);
1836 1 xml_footer(out_file);
1837 1 break;
1838 2 case EXT_CSV:
1839 2 pop_metrics_to_csv(out_file, append_to_csv);
1840 2 node_to_csv(out_file, append_to_csv);
1841 2 process_to_csv(out_file, append_to_csv);
1842 2 break;
1843 3 case EXT_TXT:
1844 3 common_to_txt(out_file);
1845 3 resources_to_txt(out_file);
1846 3 pop_metrics_to_txt(out_file);
1847 3 node_to_txt(out_file);
1848 3 process_to_txt(out_file);
1849 3 break;
1850 }
1851
1852 /* Close file */
1853
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 2 times.
9 if (out_file != stdout) {
1854 7 fclose(out_file);
1855 }
1856 }
1857 }
1858
1859 // Restore locale
1860 28 uselocale(LC_GLOBAL_LOCALE);
1861
1862 // De-allocate all records
1863 28 common_finalize();
1864 28 pop_metrics_finalize();
1865 28 node_finalize();
1866 28 process_finalize();
1867 }
1868