GCC Code Coverage Report


Directory: src/
File: src/talp/talp_output.c
Date: 2026-04-17 13:45:07
Exec Total Coverage
Lines: 729 799 91.2%
Functions: 47 50 94.0%
Branches: 245 335 73.1%

Line Branch Exec Source
1 /*********************************************************************************/
2 /* Copyright 2009-2024 Barcelona Supercomputing Center */
3 /* */
4 /* This file is part of the DLB library. */
5 /* */
6 /* DLB is free software: you can redistribute it and/or modify */
7 /* it under the terms of the GNU Lesser General Public License as published by */
8 /* the Free Software Foundation, either version 3 of the License, or */
9 /* (at your option) any later version. */
10 /* */
11 /* DLB is distributed in the hope that it will be useful, */
12 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
13 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
14 /* GNU Lesser General Public License for more details. */
15 /* */
16 /* You should have received a copy of the GNU Lesser General Public License */
17 /* along with DLB. If not, see <https://www.gnu.org/licenses/>. */
18 /*********************************************************************************/
19
20 #ifdef HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23
24 #include "talp/talp_output.h"
25
26 #include "LB_core/spd.h"
27 #include "apis/dlb_talp.h"
28 #include "support/debug.h"
29 #include "support/gslist.h"
30 #include "support/mytime.h"
31 #include "support/options.h"
32 #include "talp/talp.h"
33 #include "talp/talp_types.h"
34 #include "talp/perf_metrics.h"
35
36 #include <errno.h>
37 #include <libgen.h>
38 #include <limits.h>
39 #include <locale.h>
40 #include <pthread.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <sys/stat.h>
45 #include <unistd.h>
46
47
48 19 static float sanitized_ipc(float instructions, float cycles) {
49
3/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 17 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
19 if (instructions > 0 && cycles > 0) {
50 2 return instructions / cycles;
51 } else {
52 17 return 0.0f;
53 }
54 }
55
56 42 static const char* make_header(const char *title) {
57 42 int width = 62;
58 static char buf[80];
59 42 int title_len = strlen(title);
60
61
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 42 times.
42 if (width >= (int)sizeof(buf)) {
62 width = sizeof(buf) - 1; // prevent overflow
63 }
64
65
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 42 times.
42 if (title_len + 2 > width) {
66 // Title too long: just return title
67 snprintf(buf, sizeof(buf), "%s", title);
68 return buf;
69 }
70
71 42 int hashes = width - title_len - 2;
72 42 int left = hashes / 2;
73 42 int right = hashes - left;
74
75 42 memset(buf, '#', left);
76 42 buf[left] = ' ';
77 42 memcpy(buf + left + 1, title, title_len);
78 42 buf[left + 1 + title_len] = ' ';
79 42 memset(buf + left + 1 + title_len + 1, '#', right);
80 42 buf[width] = '\0';
81
82 42 return buf;
83 }
84
85
86 /*********************************************************************************/
87 /* GPU vendor */
88 /*********************************************************************************/
89
90 static gpu_vendor_t gpu_vendor = GPU_VENDOR_NONE;
91
92 void talp_output_record_gpu_vendor(gpu_vendor_t vendor) {
93 gpu_vendor = vendor;
94 }
95
96 /*********************************************************************************/
97 /* Monitoring Region */
98 /*********************************************************************************/
99
100 8 void talp_output_print_monitoring_region(const dlb_monitor_t *monitor,
101 const char *cpuset_str, talp_flags_t talp_flags) {
102
103 char elapsed_time_str[16];
104 8 ns_to_human(elapsed_time_str, 16, monitor->elapsed_time);
105
106 8 info("%s", make_header("Monitoring Region Summary"));
107 8 info("### Name: %s", monitor->name);
108 8 info("### Elapsed Time: %s", elapsed_time_str);
109 8 info("### Useful time: %"PRId64" ns",
110 8 monitor->useful_time);
111
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (talp_flags.have_mpi) {
112 4 info("### Not useful MPI: %"PRId64" ns",
113 4 monitor->mpi_time);
114 }
115
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 7 times.
8 if (talp_flags.have_openmp) {
116 1 info("### Not useful OMP Load Balance: %"PRId64" ns",
117 1 monitor->omp_load_imbalance_time);
118 1 info("### Not useful OMP Scheduling: %"PRId64" ns",
119 1 monitor->omp_scheduling_time);
120 1 info("### Not useful OMP Serialization: %"PRId64" ns",
121 1 monitor->omp_serialization_time);
122 }
123
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 7 times.
8 if (talp_flags.have_gpu) {
124 1 info("### Not useful GPU runtime: %"PRId64" ns",
125 1 monitor->gpu_runtime_time);
126 1 info("### Device useful time: %"PRId64" ns",
127 1 monitor->gpu_useful_time);
128 1 info("### Device communication time: %"PRId64" ns",
129 1 monitor->gpu_communication_time);
130 }
131 8 info("### CpuSet: %s", cpuset_str);
132
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 7 times.
8 if (talp_flags.have_hwc) {
133 1 float ipc = sanitized_ipc(monitor->instructions, monitor->cycles);
134 1 info("### IPC: %.2f ", ipc);
135 }
136
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (talp_flags.have_mpi) {
137 4 info("### Number of MPI calls: %"PRId64,
138 4 monitor->num_mpi_calls);
139 }
140
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 7 times.
8 if (talp_flags.have_openmp) {
141 1 info("### Number of OpenMP parallels: %"PRId64,
142 1 monitor->num_omp_parallels);
143 1 info("### Number of OpenMP tasks: %"PRId64,
144 1 monitor->num_omp_tasks);
145 }
146
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 7 times.
8 if (talp_flags.have_gpu) {
147 1 info("### Number of GPU runtime calls: %"PRId64,
148 1 monitor->num_gpu_runtime_calls);
149 }
150 8 }
151
152
153 /*********************************************************************************/
154 /* POP Metrics */
155 /*********************************************************************************/
156
157 static GSList *pop_metrics_records = NULL;
158
159 32 void talp_output_record_pop_metrics(const dlb_pop_metrics_t *metrics) {
160
161 /* Copy structure */
162 32 dlb_pop_metrics_t *new_record = malloc(sizeof(dlb_pop_metrics_t));
163 32 *new_record = *metrics;
164
165 /* Add record to list */
166 32 pop_metrics_records = g_slist_prepend(pop_metrics_records, new_record);
167 32 }
168
169 24 static void pop_metrics_print(void) {
170
171 24 for (GSList *node = pop_metrics_records;
172
2/2
✓ Branch 0 taken 19 times.
✓ Branch 1 taken 24 times.
43 node != NULL;
173 19 node = node->next) {
174
175 19 dlb_pop_metrics_t *record = node->data;
176
177
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 1 times.
19 if (record->elapsed_time > 0) {
178
179 18 float avg_ipc = sanitized_ipc(record->instructions, record->cycles);
180 char elapsed_time_str[16];
181 18 ns_to_human(elapsed_time_str, 16, record->elapsed_time);
182
183
2/2
✓ Branch 0 taken 17 times.
✓ Branch 1 taken 1 times.
35 bool have_gpu_activity = record->num_gpu_runtime_calls > 0 ||
184
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 17 times.
17 record->gpu_useful_time > 0;
185
186 18 info("%s", make_header("Monitoring Region POP Metrics"));
187 18 info("### Name: %s", record->name);
188 18 info("### Elapsed Time: %s", elapsed_time_str);
189
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 17 times.
18 if (have_gpu_activity) {
190 1 info("### Host");
191 1 info("### ----");
192 }
193
1/2
✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
18 if (record->mpi_parallel_efficiency > 0.0f &&
194
1/2
✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
18 record->omp_parallel_efficiency > 0.0f) {
195 18 info("### Parallel efficiency: %1.2f",
196 18 record->parallel_efficiency);
197 }
198
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 17 times.
18 if (record->num_mpi_calls > 0) {
199 1 info("### - MPI Parallel efficiency: %1.2f",
200 1 record->mpi_parallel_efficiency);
201 1 info("### - Communication efficiency: %1.2f",
202 1 record->mpi_communication_efficiency);
203 1 info("### - Load Balance: %1.2f",
204 1 record->mpi_load_balance);
205 1 info("### - In: %1.2f",
206 1 record->mpi_load_balance_in);
207 1 info("### - Out: %1.2f",
208 1 record->mpi_load_balance_out);
209 }
210
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 16 times.
18 if (record->num_omp_parallels + record->num_omp_tasks > 0) {
211 2 info("### - OpenMP Parallel efficiency: %1.2f",
212 2 record->omp_parallel_efficiency);
213 2 info("### - Load Balance: %1.2f",
214 2 record->omp_load_balance);
215 2 info("### - Scheduling efficiency: %1.2f",
216 2 record->omp_scheduling_efficiency);
217 2 info("### - Serialization efficiency: %1.2f",
218 2 record->omp_serialization_efficiency);
219 }
220
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 17 times.
18 if (have_gpu_activity) {
221 1 info("### - Device Offload efficiency: %1.2f",
222 1 record->device_offload_efficiency);
223 }
224
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 17 times.
18 if (avg_ipc > 0.0f) {
225 1 float avg_freq = record->cycles / record->useful_time;
226 1 info("### Computational metrics:");
227 1 info("### - Average useful IPC: %1.2f", avg_ipc);
228 1 info("### - Average useful frequency: %1.2f GHz", avg_freq);
229 1 info("### - Number of instructions: %1.2E", record->instructions);
230 }
231
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 17 times.
18 if (have_gpu_activity) {
232 1 info("###");
233 1 info("### %s Device",
234
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 gpu_vendor == GPU_VENDOR_NVIDIA ? "NVIDIA"
235 1 : gpu_vendor == GPU_VENDOR_AMD ? "AMD"
236
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 : "GPU");
237 1 info("### %s-------",
238
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 gpu_vendor == GPU_VENDOR_NVIDIA ? "------"
239 : gpu_vendor == GPU_VENDOR_AMD ? "---"
240 : "---");
241 1 info("### Parallel efficiency: %1.2f",
242 1 record->gpu_parallel_efficiency);
243 1 info("### - Load Balance: %1.2f",
244 1 record->gpu_load_balance);
245 1 info("### - Communication efficiency: %1.2f",
246 1 record->gpu_communication_efficiency);
247 1 info("### - Orchestration efficiency: %1.2f",
248 1 record->gpu_orchestration_efficiency);
249 }
250 } else {
251 1 info("%s", make_header("Monitoring Region POP Metrics"));
252 1 info("### Name: %s", record->name);
253 1 info("### No data ###");
254 }
255 }
256 24 }
257
258 6 static void pop_metrics_to_json(FILE *out_file) {
259
260
1/2
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
6 if (pop_metrics_records != NULL) {
261 6 fprintf(out_file,
262 " \"Application\": {\n");
263
264 6 for (GSList *node = pop_metrics_records;
265
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 node != NULL;
266 6 node = node->next) {
267
268 6 dlb_pop_metrics_t *record = node->data;
269
270 6 fprintf(out_file,
271 " \"%s\": {\n"
272 " \"numCpus\": %d,\n"
273 " \"numMpiRanks\": %d,\n"
274 " \"numNodes\": %d,\n"
275 " \"avgCpus\": %.1f,\n"
276 " \"numGpus\": %d,\n"
277 " \"cycles\": %.0f,\n"
278 " \"instructions\": %.0f,\n"
279 " \"numMeasurements\": %"PRId64",\n"
280 " \"numMpiCalls\": %"PRId64",\n"
281 " \"numOmpParallels\": %"PRId64",\n"
282 " \"numOmpTasks\": %"PRId64",\n"
283 " \"numGpuRuntimeCalls\": %"PRId64",\n"
284 " \"elapsedTime\": %"PRId64",\n"
285 " \"usefulTime\": %"PRId64",\n"
286 " \"mpiTime\": %"PRId64",\n"
287 " \"ompLoadImbalanceTime\": %"PRId64",\n"
288 " \"ompSchedulingTime\": %"PRId64",\n"
289 " \"ompSerializationTime\": %"PRId64",\n"
290 " \"gpuRuntimeTime\": %"PRId64",\n"
291 " \"minMpiNormdProc\": %.0f,\n"
292 " \"minMpiNormdNode\": %.0f,\n"
293 " \"gpuUsefulTime\": %"PRId64",\n"
294 " \"gpuCommunicationTime\": %"PRId64",\n"
295 " \"maxGpuUsefulTime\": %"PRId64",\n"
296 " \"maxGpuActiveTime\": %"PRId64",\n"
297 " \"parallelEfficiency\": %.2f,\n"
298 " \"mpiParallelEfficiency\": %.2f,\n"
299 " \"mpiCommunicationEfficiency\": %.2f,\n"
300 " \"mpiLoadBalance\": %.2f,\n"
301 " \"mpiLoadBalanceIn\": %.2f,\n"
302 " \"mpiLoadBalanceOut\": %.2f,\n"
303 " \"ompParallelEfficiency\": %.2f,\n"
304 " \"ompLoadBalance\": %.2f,\n"
305 " \"ompSchedulingEfficiency\": %.2f,\n"
306 " \"ompSerializationEfficiency\": %.2f,\n"
307 " \"deviceOffloadEfficiency\": %.2f,\n"
308 " \"gpuParallelEfficiency\": %.2f,\n"
309 " \"gpuLoadBalance\": %.2f,\n"
310 " \"gpuCommunicationEfficiency\": %.2f,\n"
311 " \"gpuOrchestrationEfficiency\": %.2f\n"
312 " }%s\n",
313 6 record->name,
314 record->num_cpus,
315 record->num_mpi_ranks,
316 record->num_nodes,
317 6 record->avg_cpus,
318 record->num_gpus,
319 record->cycles,
320 record->instructions,
321 record->num_measurements,
322 record->num_mpi_calls,
323 record->num_omp_parallels,
324 record->num_omp_tasks,
325 record->num_gpu_runtime_calls,
326 record->elapsed_time,
327 record->useful_time,
328 record->mpi_time,
329 record->omp_load_imbalance_time,
330 record->omp_scheduling_time,
331 record->omp_serialization_time,
332 record->gpu_runtime_time,
333 record->min_mpi_normd_proc,
334 record->min_mpi_normd_node,
335 record->gpu_useful_time,
336 record->gpu_communication_time,
337 record->max_gpu_useful_time,
338 record->max_gpu_active_time,
339 6 record->parallel_efficiency,
340 6 record->mpi_parallel_efficiency,
341 6 record->mpi_communication_efficiency,
342 6 record->mpi_load_balance,
343 6 record->mpi_load_balance_in,
344 6 record->mpi_load_balance_out,
345 6 record->omp_parallel_efficiency,
346 6 record->omp_load_balance,
347 6 record->omp_scheduling_efficiency,
348 6 record->omp_serialization_efficiency,
349 6 record->device_offload_efficiency,
350 6 record->gpu_parallel_efficiency,
351 6 record->gpu_load_balance,
352 6 record->gpu_communication_efficiency,
353 6 record->gpu_orchestration_efficiency,
354
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 node->next != NULL ? "," : "");
355 }
356 6 fprintf(out_file,
357 " }"); /* no eol */
358 }
359 6 }
360
361 1 static void pop_metrics_to_xml(FILE *out_file) {
362
363 1 for (GSList *node = pop_metrics_records;
364
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
365 1 node = node->next) {
366
367 1 dlb_pop_metrics_t *record = node->data;
368
369 1 fprintf(out_file,
370 " <Application>\n"
371 " <name>%s</name>\n"
372 " <numCpus>%d</numCpus>\n"
373 " <numMpiRanks>%d</numMpiRanks>\n"
374 " <numNodes>%d</numNodes>\n"
375 " <avgCpus>%.1f</avgCpus>\n"
376 " <cycles>%.0f</cycles>\n"
377 " <instructions>%.0f</instructions>\n"
378 " <numMeasurements>%"PRId64"</numMeasurements>\n"
379 " <numMpiCalls>%"PRId64"</numMpiCalls>\n"
380 " <numOmpParallels>%"PRId64"</numOmpParallels>\n"
381 " <numOmpTasks>%"PRId64"</numOmpTasks>\n"
382 " <elapsedTime>%"PRId64"</elapsedTime>\n"
383 " <usefulTime>%"PRId64"</usefulTime>\n"
384 " <mpiTime>%"PRId64"</mpiTime>\n"
385 " <ompLoadImbalanceTime>%"PRId64"</ompLoadImbalanceTime>\n"
386 " <ompSchedulingTime>%"PRId64"</ompSchedulingTime>\n"
387 " <ompSerializationTime>%"PRId64"</ompSerializationTime>\n"
388 " <minMpiNormdProc>%.0f</minMpiNormdProc>\n"
389 " <minMpiNormdNode>%.0f</minMpiNormdNode>\n"
390 " <parallelEfficiency>%.2f</parallelEfficiency>\n"
391 " <mpiParallelEfficiency>%.2f</mpiParallelEfficiency>\n"
392 " <mpiCommunicationEfficiency>%.2f</mpiCommunicationEfficiency>\n"
393 " <mpiLoadBalance>%.2f</mpiLoadBalance>\n"
394 " <mpiLoadBalanceIn>%.2f</mpiLoadBalanceIn>\n"
395 " <mpiLoadBalanceOut>%.2f</mpiLoadBalanceOut>\n"
396 " <ompParallelEfficiency>%.2f</ompParallelEfficiency>\n"
397 " <ompLoadBalance>%.2f</ompLoadBalance>\n"
398 " <ompSchedulingEfficiency>%.2f</ompSchedulingEfficiency>\n"
399 " <ompSerializationEfficiency>%.2f</ompSerializationEfficiency>\n"
400 " </Application>\n",
401 1 record->name,
402 record->num_cpus,
403 record->num_mpi_ranks,
404 record->num_nodes,
405 1 record->avg_cpus,
406 record->cycles,
407 record->instructions,
408 record->num_measurements,
409 record->num_mpi_calls,
410 record->num_omp_parallels,
411 record->num_omp_tasks,
412 record->elapsed_time,
413 record->useful_time,
414 record->mpi_time,
415 record->omp_load_imbalance_time,
416 record->omp_scheduling_time,
417 record->omp_serialization_time,
418 record->min_mpi_normd_proc,
419 record->min_mpi_normd_node,
420 1 record->parallel_efficiency,
421 1 record->mpi_parallel_efficiency,
422 1 record->mpi_communication_efficiency,
423 1 record->mpi_load_balance,
424 1 record->mpi_load_balance_in,
425 1 record->mpi_load_balance_out,
426 1 record->omp_parallel_efficiency,
427 1 record->omp_load_balance,
428 1 record->omp_scheduling_efficiency,
429 1 record->omp_serialization_efficiency
430 );
431 }
432 1 }
433
434 3 static void pop_metrics_to_txt(FILE *out_file) {
435
436 3 for (GSList *node = pop_metrics_records;
437
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 node != NULL;
438 3 node = node->next) {
439
440 3 dlb_pop_metrics_t *record = node->data;
441
442
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (record->elapsed_time > 0) {
443 3 fprintf(out_file,
444 "%s\n"
445 "### Name: %s\n"
446 "### Number of CPUs: %d\n"
447 "### Number of MPI processes: %d\n"
448 "### Number of nodes: %d\n"
449 "### Average CPUs: %.1f\n"
450 "### Number of GPUs: %d\n"
451 "### Cycles: %.0f\n"
452 "### Instructions: %.0f\n"
453 "### Number of measurements: %"PRId64"\n"
454 "### Number of MPI calls: %"PRId64"\n"
455 "### Number of OpenMP parallel regions: %"PRId64"\n"
456 "### Number of OpenMP explicit tasks: %"PRId64"\n"
457 "### Number of GPU runtime calls: %"PRId64"\n"
458 "### Elapsed Time (ns): %"PRId64"\n"
459 "### Useful Time (ns): %"PRId64"\n"
460 "### MPI Time (ns): %"PRId64"\n"
461 "### OpenMP Load Imbalance Time (ns): %"PRId64"\n"
462 "### OpenMP Scheduling Time (ns): %"PRId64"\n"
463 "### OpenMP Serialization Time (ns): %"PRId64"\n"
464 "### GPU Runtime Time (ns): %"PRId64"\n"
465 "### MPI time normalized at process level of\n"
466 "### the process with the max non-MPI time: %.0f\n"
467 "### MPI time normalized at node level of\n"
468 "### the process with the max non-MPI time: %.0f\n"
469 "### Device useful time: %"PRId64"\n"
470 "### Device communication time: %"PRId64"\n"
471 "### Device max useful time: %"PRId64"\n"
472 "### Device max active time: %"PRId64"\n"
473 "### --- Host metrics ---\n"
474 "### Parallel efficiency: %.2f\n"
475 "### MPI Parallel efficiency: %.2f\n"
476 "### - MPI Communication efficiency: %.2f\n"
477 "### - MPI Load Balance: %.2f\n"
478 "### - MPI Load Balance in: %.2f\n"
479 "### - MPI Load Balance out: %.2f\n"
480 "### OpenMP Parallel efficiency: %.2f\n"
481 "### - OpenMP Load Balance: %.2f\n"
482 "### - OpenMP Scheduling efficiency: %.2f\n"
483 "### - OpenMP Serialization efficiency: %.2f\n"
484 "### Device Offload efficiency: %.2f\n"
485 "### --- Device metrics ---\n"
486 "### Device Parallel efficiency: %.2f\n"
487 "### - Device Load Balance: %.2f\n"
488 "### - Device Communication efficiency: %.2f\n"
489 "### - Device Orchestration efficiency: %.2f\n",
490 make_header("Monitoring Region POP Metrics"),
491 3 record->name,
492 record->num_cpus,
493 record->num_mpi_ranks,
494 record->num_nodes,
495 3 record->avg_cpus,
496 record->num_gpus,
497 record->cycles,
498 record->instructions,
499 record->num_measurements,
500 record->num_mpi_calls,
501 record->num_omp_parallels,
502 record->num_omp_tasks,
503 record->num_gpu_runtime_calls,
504 record->elapsed_time,
505 record->useful_time,
506 record->mpi_time,
507 record->omp_load_imbalance_time,
508 record->omp_scheduling_time,
509 record->omp_serialization_time,
510 record->gpu_runtime_time,
511 record->min_mpi_normd_proc,
512 record->min_mpi_normd_node,
513 record->gpu_useful_time,
514 record->gpu_communication_time,
515 record->max_gpu_useful_time,
516 record->max_gpu_active_time,
517 3 record->parallel_efficiency,
518 3 record->mpi_parallel_efficiency,
519 3 record->mpi_communication_efficiency,
520 3 record->mpi_load_balance,
521 3 record->mpi_load_balance_in,
522 3 record->mpi_load_balance_out,
523 3 record->omp_parallel_efficiency,
524 3 record->omp_load_balance,
525 3 record->omp_scheduling_efficiency,
526 3 record->omp_serialization_efficiency,
527 3 record->device_offload_efficiency,
528 3 record->gpu_parallel_efficiency,
529 3 record->gpu_load_balance,
530 3 record->gpu_communication_efficiency,
531 3 record->gpu_orchestration_efficiency
532 );
533 } else {
534 fprintf(out_file,
535 "%s\n"
536 "### Name: %s\n"
537 "### No data ###\n",
538 make_header("Monitoring Region POP Metrics"),
539 record->name);
540 }
541 }
542 3 }
543
544 3 static void pop_metrics_to_csv(FILE *out_file, bool append) {
545
546
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 if (pop_metrics_records == NULL) return;
547
548
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (!append) {
549 /* Print header */
550 2 fprintf(out_file,
551 "name,"
552 "numCpus,"
553 "numMpiRanks,"
554 "numNodes,"
555 "avgCpus,"
556 "cycles,"
557 "instructions,"
558 "numMeasurements,"
559 "numMpiCalls,"
560 "numOmpParallels,"
561 "numOmpTasks,"
562 "numGpuRuntimeCalls,"
563 "elapsedTime,"
564 "usefulTime,"
565 "mpiTime,"
566 "ompLoadImbalanceTime,"
567 "ompSchedulingTime,"
568 "ompSerializationTime,"
569 "gpuRuntimeTime,"
570 "minMpiNormdProc,"
571 "minMpiNormdNode,"
572 "parallelEfficiency,"
573 "mpiParallelEfficiency,"
574 "mpiCommunicationEfficiency,"
575 "mpiLoadBalance,"
576 "mpiLoadBalanceIn,"
577 "mpiLoadBalanceOut,"
578 "ompParallelEfficiency,"
579 "ompLoadBalance,"
580 "ompSchedulingEfficiency,"
581 "ompSerializationEfficiency,"
582 "deviceOffloadEfficiency,"
583 "gpuParallelEfficiency,"
584 "gpuLoadBalance,"
585 "gpuCommunicationEfficiency,"
586 "gpuOrchestrationEfficiency\n"
587 );
588 }
589
590 3 for (GSList *node = pop_metrics_records;
591
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 node != NULL;
592 3 node = node->next) {
593
594 3 dlb_pop_metrics_t *record = node->data;
595
596 3 fprintf(out_file,
597 "\"%s\"," /* name */
598 "%d," /* numCpus */
599 "%d," /* numMpiRanks */
600 "%d," /* numNodes */
601 "%.1f," /* avgCpus */
602 "%.0f," /* cycles */
603 "%.0f," /* instructions */
604 "%"PRId64"," /* numMeasurements */
605 "%"PRId64"," /* numMpiCalls */
606 "%"PRId64"," /* numOmpParallels */
607 "%"PRId64"," /* numOmpTasks */
608 "%"PRId64"," /* numGpuRuntimeCalls */
609 "%"PRId64"," /* elapsedTime */
610 "%"PRId64"," /* usefulTime */
611 "%"PRId64"," /* mpiTime */
612 "%"PRId64"," /* ompLoadImbalanceTime */
613 "%"PRId64"," /* ompSchedulingTime */
614 "%"PRId64"," /* ompSerializationTime */
615 "%"PRId64"," /* gpuRuntimeTime */
616 "%.0f," /* minMpiNormdProc */
617 "%.0f," /* minMpiNormdNode */
618 "%.2f," /* parallelEfficiency */
619 "%.2f," /* mpiParallelEfficiency */
620 "%.2f," /* mpiCommunicationEfficiency */
621 "%.2f," /* mpiLoadBalance */
622 "%.2f," /* mpiLoadBalanceIn */
623 "%.2f," /* mpiLoadBalanceOut */
624 "%.2f," /* ompParallelEfficiency */
625 "%.2f," /* ompLoadBalance */
626 "%.2f," /* ompSchedulingEfficiency */
627 "%.2f," /* ompSerializationEfficiency */
628 "%.2f," /* deviceOffloadEfficiency */
629 "%.2f," /* gpuParallelEfficiency */
630 "%.2f," /* gpuLoadBalance */
631 "%.2f," /* gpuCommunicationEfficiency */
632 "%.2f\n", /* gpuOrchestrationEfficiency */
633 3 record->name,
634 record->num_cpus,
635 record->num_mpi_ranks,
636 record->num_nodes,
637 3 record->avg_cpus,
638 record->cycles,
639 record->instructions,
640 record->num_measurements,
641 record->num_mpi_calls,
642 record->num_omp_parallels,
643 record->num_omp_tasks,
644 record->num_gpu_runtime_calls,
645 record->elapsed_time,
646 record->useful_time,
647 record->mpi_time,
648 record->omp_load_imbalance_time,
649 record->omp_scheduling_time,
650 record->omp_serialization_time,
651 record->gpu_runtime_time,
652 record->min_mpi_normd_proc,
653 record->min_mpi_normd_node,
654 3 record->parallel_efficiency,
655 3 record->mpi_parallel_efficiency,
656 3 record->mpi_communication_efficiency,
657 3 record->mpi_load_balance,
658 3 record->mpi_load_balance_in,
659 3 record->mpi_load_balance_out,
660 3 record->omp_parallel_efficiency,
661 3 record->omp_load_balance,
662 3 record->omp_scheduling_efficiency,
663 3 record->omp_serialization_efficiency,
664 3 record->device_offload_efficiency,
665 3 record->gpu_parallel_efficiency,
666 3 record->gpu_load_balance,
667 3 record->gpu_communication_efficiency,
668 3 record->gpu_orchestration_efficiency
669 );
670 }
671 }
672
673 37 static void pop_metrics_finalize(void) {
674
675 /* Free every record data */
676 37 for (GSList *node = pop_metrics_records;
677
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 37 times.
69 node != NULL;
678 32 node = node->next) {
679
680 32 dlb_pop_metrics_t *record = node->data;
681 32 free(record);
682 }
683
684 /* Free list */
685 37 g_slist_free(pop_metrics_records);
686 37 pop_metrics_records = NULL;
687 37 }
688
689
690 /*********************************************************************************/
691 /* Node */
692 /*********************************************************************************/
693
694 static GSList *node_records = NULL;
695
696 12 void talp_output_record_node(const node_record_t *node_record) {
697
698 12 int nelems = node_record->nelems;
699
700 /* Allocate new record */
701 12 size_t process_records_size = sizeof(process_in_node_record_t) * nelems;
702 12 size_t node_record_size = sizeof(node_record_t) + process_records_size;
703 12 node_record_t *new_record = malloc(node_record_size);
704
705 /* Memcpy the entire struct */
706 12 memcpy(new_record, node_record, node_record_size);
707
708 /* Insert to list */
709 12 node_records = g_slist_prepend(node_records, new_record);
710 12 }
711
712 24 static void node_print(void) {
713
714 24 for (GSList *node = node_records;
715
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 24 times.
25 node != NULL;
716 1 node = node->next) {
717
718 1 node_record_t *node_record = node->data;
719
720 1 info(" |----------------------------------------------------------|");
721 1 info(" | Extended Report Node %4d |",
722 node_record->node_id);
723 1 info(" |----------------------------------------------------------|");
724 1 info(" | Process | Useful Time | MPI Time |");
725 1 info(" |------------|----------------------|----------------------|");
726
727
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 for (int i = 0; i < node_record->nelems; ++i) {
728 2 info(" | %-10d | %18e s | %18e s |",
729 node_record->processes[i].pid,
730 nsecs_to_secs(node_record->processes[i].useful_time),
731 nsecs_to_secs(node_record->processes[i].mpi_time));
732 2 info(" |------------|----------------------|----------------------|");
733 }
734
735
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (node_record->nelems > 0) {
736 1 info(" |------------|----------------------|----------------------|");
737 1 info(" | %-10s | %18e s | %18e s |", "Node Avg",
738 nsecs_to_secs(node_record->avg_useful_time),
739 nsecs_to_secs(node_record->avg_mpi_time));
740 1 info(" |------------|----------------------|----------------------|");
741 1 info(" | %-10s | %18e s | %18e s |", "Node Max",
742 nsecs_to_secs(node_record->max_useful_time),
743 nsecs_to_secs(node_record->max_mpi_time));
744 1 info(" |------------|----------------------|----------------------|");
745 }
746 }
747 24 }
748
749 6 static void node_to_json(FILE *out_file) {
750
751
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 if (node_records == NULL) return;
752
753 /* If there are pop_metrics, append to the existing dictionary */
754
1/2
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
6 if (pop_metrics_records != NULL) {
755 6 fprintf(out_file,",\n");
756 }
757
758 6 fprintf(out_file,
759 " \"node\": [\n");
760
761 6 for (GSList *node = node_records;
762
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 node != NULL;
763 6 node = node->next) {
764
765 6 node_record_t *node_record = node->data;
766
767 6 fprintf(out_file,
768 " {\n"
769 " \"id\": \"%d\",\n"
770 " \"process\": [\n",
771 node_record->node_id);
772
773
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 6 times.
18 for (int i = 0; i < node_record->nelems; ++i) {
774 12 fprintf(out_file,
775 " {\n"
776 " \"id\": %d,\n"
777 " \"usefulTime\": %"PRId64",\n"
778 " \"mpiTime\": %"PRId64"\n"
779 " }%s\n",
780 node_record->processes[i].pid,
781 node_record->processes[i].useful_time,
782 node_record->processes[i].mpi_time,
783
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 i+1 < node_record->nelems ? "," : "");
784 }
785
786 6 fprintf(out_file,
787 " ],\n"
788 " \"nodeAvg\": {\n"
789 " \"usefulTime\": %"PRId64",\n"
790 " \"mpiTime\": %"PRId64"\n"
791 " },\n"
792 " \"nodeMax\": {\n"
793 " \"usefulTime\": %"PRId64",\n"
794 " \"mpiTime\": %"PRId64"\n"
795 " }\n"
796 " }%s\n",
797 node_record->avg_useful_time,
798 node_record->avg_mpi_time,
799 node_record->max_useful_time,
800 node_record->max_mpi_time,
801
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 node->next != NULL ? "," : "");
802 }
803 6 fprintf(out_file,
804 " ]"); /* no eol */
805 }
806
807 1 static void node_to_xml(FILE *out_file) {
808
809 1 for (GSList *node = node_records;
810
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
811 1 node = node->next) {
812
813 1 node_record_t *node_record = node->data;
814
815 1 fprintf(out_file,
816 " <node>\n"
817 " <id>%d</id>\n",
818 node_record->node_id);
819
820
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 for (int i = 0; i < node_record->nelems; ++i) {
821 2 fprintf(out_file,
822 " <process>\n"
823 " <id>%d</id>\n"
824 " <usefulTime>%"PRId64"</usefulTime>\n"
825 " <mpiTime>%"PRId64"</mpiTime>\n"
826 " </process>\n",
827 node_record->processes[i].pid,
828 node_record->processes[i].useful_time,
829 node_record->processes[i].mpi_time);
830 }
831
832 1 fprintf(out_file,
833 " <nodeAvg>\n"
834 " <usefulTime>%"PRId64"</usefulTime>\n"
835 " <mpiTime>%"PRId64"</mpiTime>\n"
836 " </nodeAvg>\n"
837 " <nodeMax>\n"
838 " <usefulTime>%"PRId64"</usefulTime>\n"
839 " <mpiTime>%"PRId64"</mpiTime>\n"
840 " </nodeMax>\n"
841 " </node>\n",
842 node_record->avg_useful_time,
843 node_record->avg_mpi_time,
844 node_record->max_useful_time,
845 node_record->max_mpi_time);
846 }
847 1 }
848
849 3 static void node_to_csv(FILE *out_file, bool append) {
850
851
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (node_records == NULL) return;
852
853
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (!append) {
854 /* Print header */
855 1 fprintf(out_file,
856 "NodeId,"
857 "ProcessId,"
858 "ProcessUsefulTime,"
859 "ProcessMPITime,"
860 "NodeAvgUsefulTime,"
861 "NodeAvgMPITime,"
862 "NodeMaxUsefulTime,"
863 "NodeMaxMPITime\n");
864 }
865
866 1 for (GSList *node = node_records;
867
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
868 1 node = node->next) {
869
870 1 node_record_t *node_record = node->data;
871
872
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 for (int i = 0; i < node_record->nelems; ++i) {
873 2 fprintf(out_file,
874 "%d," /* NodeId */
875 "%d," /* ProcessId */
876 "%"PRId64"," /* ProcessUsefulTime */
877 "%"PRId64"," /* ProcessMPITime */
878 "%"PRId64"," /* NodeAvgUsefulTime */
879 "%"PRId64"," /* NodeAvgMPITime*/
880 "%"PRId64"," /* NodeMaxUsefulTime */
881 "%"PRId64"\n", /* NodeMaxMPITime*/
882 node_record->node_id,
883 node_record->processes[i].pid,
884 node_record->processes[i].useful_time,
885 node_record->processes[i].mpi_time,
886 node_record->avg_useful_time,
887 node_record->avg_mpi_time,
888 node_record->max_useful_time,
889 node_record->max_mpi_time);
890
891 }
892 }
893 }
894
895 3 static void node_to_txt(FILE *out_file) {
896
897 3 for (GSList *node = node_records;
898
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 node != NULL;
899 3 node = node->next) {
900
901 3 node_record_t *node_record = node->data;
902
903 3 fprintf(out_file,
904 " |----------------------------------------------------------|\n"
905 " | Extended Report Node %4d |\n"
906 " |----------------------------------------------------------|\n"
907 " | Process | Useful Time | MPI Time |\n"
908 " |------------|----------------------|----------------------|\n",
909 node_record->node_id);
910
911
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 3 times.
9 for (int i = 0; i < node_record->nelems; ++i) {
912 6 fprintf(out_file,
913 " | %-10d | %18e s | %18e s |\n"
914 " |------------|----------------------|----------------------|\n",
915 node_record->processes[i].pid,
916 nsecs_to_secs(node_record->processes[i].useful_time),
917 nsecs_to_secs(node_record->processes[i].mpi_time));
918 }
919
920
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (node_record->nelems > 0) {
921 3 fprintf(out_file,
922 " |------------|----------------------|----------------------|\n"
923 " | %-10s | %18e s | %18e s |\n"
924 " |------------|----------------------|----------------------|\n"
925 " | %-10s | %18e s | %18e s |\n"
926 " |------------|----------------------|----------------------|\n",
927 "Node Avg",
928 nsecs_to_secs(node_record->avg_useful_time),
929 nsecs_to_secs(node_record->avg_mpi_time),
930 "Node Max",
931 nsecs_to_secs(node_record->max_useful_time),
932 nsecs_to_secs(node_record->max_mpi_time));
933 }
934 }
935 3 }
936
937 37 static void node_finalize(void) {
938
939 /* Free every record data */
940 37 for (GSList *node = node_records;
941
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 37 times.
49 node != NULL;
942 12 node = node->next) {
943
944 12 node_record_t *record = node->data;
945 12 free(record);
946 }
947
948 /* Free list */
949 37 g_slist_free(node_records);
950 37 node_records = NULL;
951 37 }
952
953
954 /*********************************************************************************/
955 /* Process */
956 /*********************************************************************************/
957
958 typedef struct region_record_t {
959 char name[DLB_MONITOR_NAME_MAX];
960 int num_mpi_ranks;
961 process_record_t process_records[];
962 } region_record_t;
963
964 static GSList *region_records = NULL;
965
966 14 void talp_output_record_process(const char *region_name,
967 const process_record_t *process_record, int num_mpi_ranks) {
968
969 14 region_record_t *region_record = NULL;
970
971 /* Find region or allocate new one */
972 14 for (GSList *node = region_records;
973
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 14 times.
15 node != NULL;
974 1 node = node->next) {
975
976 1 region_record_t *record = node->data;
977
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (strcmp(record->name, region_name) == 0) {
978 region_record = record;
979 break;
980 }
981 }
982
983 /* Allocate if not found */
984
1/2
✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
14 if (region_record == NULL) {
985 /* Allocate and initialize new region */
986 14 size_t region_record_size = sizeof(region_record_t) +
987 14 sizeof(process_record_t) * num_mpi_ranks;
988 14 region_record = malloc(region_record_size);
989 14 *region_record = (const region_record_t) {
990 .num_mpi_ranks = num_mpi_ranks,
991 };
992 14 snprintf(region_record->name, DLB_MONITOR_NAME_MAX, "%s",
993 region_name);
994
995 /* Insert to list */
996 14 region_records = g_slist_prepend(region_records, region_record);
997 }
998
999 /* Copy process_record */
1000 14 int rank = process_record->rank;
1001
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14 times.
14 ensure(rank < num_mpi_ranks, "Wrong rank number in %s", __func__);
1002 14 memcpy(&region_record->process_records[rank], process_record, sizeof(process_record_t));
1003 14 }
1004
1005 24 static void process_print(void) {
1006
1007 24 for (GSList *node = region_records;
1008
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 24 times.
27 node != NULL;
1009 3 node = node->next) {
1010
1011 3 region_record_t *region_record = node->data;
1012
1013
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 for (int i = 0; i < region_record->num_mpi_ranks; ++i) {
1014
1015 3 process_record_t *process_record = &region_record->process_records[i];
1016
1017 3 info("%s", make_header("Monitoring Region Summary"));
1018 3 info("### Name: %s",
1019 3 region_record->name);
1020 3 info("### Process: %d (%s)",
1021 3 process_record->pid, process_record->hostname);
1022 3 info("### Rank: %d",
1023 process_record->rank);
1024 3 info("### CpuSet: %s",
1025 3 process_record->cpuset);
1026 3 info("### Elapsed time: %"PRId64" ns",
1027 process_record->monitor.elapsed_time);
1028 3 info("### Useful time: %"PRId64" ns",
1029 process_record->monitor.useful_time);
1030
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
3 if (process_record->monitor.mpi_time > 0) {
1031 1 info("### Not useful MPI: %"PRId64" ns",
1032 process_record->monitor.mpi_time);
1033 }
1034
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (process_record->monitor.omp_load_imbalance_time > 0
1035
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 || process_record->monitor.omp_scheduling_time > 0
1036
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 || process_record->monitor.omp_serialization_time > 0) {
1037 info("### Not useful OMP Load Imbalance: %"PRId64" ns",
1038 process_record->monitor.omp_load_imbalance_time);
1039 info("### Not useful OMP Scheduling: %"PRId64" ns",
1040 process_record->monitor.omp_scheduling_time);
1041 info("### Not useful OMP Serialization: %"PRId64" ns",
1042 process_record->monitor.omp_serialization_time);
1043 }
1044
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 if (process_record->monitor.gpu_runtime_time > 0) {
1045 info("### Not useful GPU runtime: %"PRId64" ns",
1046 process_record->monitor.gpu_runtime_time);
1047 info("### Device useful time: %"PRId64" ns",
1048 process_record->monitor.gpu_useful_time);
1049 info("### Device communication time: %"PRId64" ns",
1050 process_record->monitor.gpu_communication_time);
1051 }
1052
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 if (process_record->monitor.instructions > 0
1053 && process_record->monitor.cycles > 0) {
1054 info("### IPC : %.2f",
1055 (float)process_record->monitor.instructions
1056 / process_record->monitor.cycles);
1057 }
1058 }
1059 }
1060 24 }
1061
1062 6 static void process_to_json(FILE *out_file) {
1063
1064
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 if (region_records == NULL) return;
1065
1066 /* If there are pop_metrics or node_metrics, append to the existing dictionary */
1067
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 if (pop_metrics_records != NULL
1068 || node_records != NULL) {
1069 6 fprintf(out_file,",\n");
1070 }
1071
1072 6 fprintf(out_file,
1073 " \"Process\": {\n");
1074
1075 6 for (GSList *node = region_records;
1076
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 node != NULL;
1077 6 node = node->next) {
1078
1079 6 region_record_t *region_record = node->data;
1080
1081 6 fprintf(out_file,
1082 " \"%s\": [\n",
1083 6 region_record->name);
1084
1085
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 for (int i = 0; i < region_record->num_mpi_ranks; ++i) {
1086
1087 6 process_record_t *process_record = &region_record->process_records[i];
1088
1089 6 fprintf(out_file,
1090 " {\n"
1091 " \"rank\": %d,\n"
1092 " \"pid\": %d,\n"
1093 " \"nodeId\": %d,\n"
1094 " \"hostname\": \"%s\",\n"
1095 " \"cpuset\": %s,\n"
1096 " \"numCpus\": %d,\n"
1097 " \"avgCpus\": %.1f,\n"
1098 " \"cycles\": %"PRId64",\n"
1099 " \"instructions\": %"PRId64",\n"
1100 " \"numMeasurements\": %d,\n"
1101 " \"numResets\": %d,\n"
1102 " \"numMpiCalls\": %"PRId64",\n"
1103 " \"numOmpParallels\": %"PRId64",\n"
1104 " \"numOmpTasks\": %"PRId64",\n"
1105 " \"numGpuRuntimeCalls\": %"PRId64",\n"
1106 " \"elapsedTime\": %"PRId64",\n"
1107 " \"usefulTime\": %"PRId64",\n"
1108 " \"mpiTime\": %"PRId64",\n"
1109 " \"ompLoadImbalanceTime\": %"PRId64",\n"
1110 " \"ompSchedulingTime\": %"PRId64",\n"
1111 " \"ompSerializationTime\": %"PRId64",\n"
1112 " \"gpuRuntimeTime\": %"PRId64",\n"
1113 " \"gpuUsefulTime\": %"PRId64",\n"
1114 " \"gpuCommunicationTime\": %"PRId64"\n"
1115 " }%s\n",
1116 process_record->rank,
1117 process_record->pid,
1118 process_record->node_id,
1119 6 process_record->hostname,
1120 6 process_record->cpuset_quoted,
1121 process_record->monitor.num_cpus,
1122 6 process_record->monitor.avg_cpus,
1123 process_record->monitor.cycles,
1124 process_record->monitor.instructions,
1125 process_record->monitor.num_measurements,
1126 process_record->monitor.num_resets,
1127 process_record->monitor.num_mpi_calls,
1128 process_record->monitor.num_omp_parallels,
1129 process_record->monitor.num_omp_tasks,
1130 process_record->monitor.num_gpu_runtime_calls,
1131 process_record->monitor.elapsed_time,
1132 process_record->monitor.useful_time,
1133 process_record->monitor.mpi_time,
1134 process_record->monitor.omp_load_imbalance_time,
1135 process_record->monitor.omp_scheduling_time,
1136 process_record->monitor.omp_serialization_time,
1137 process_record->monitor.gpu_runtime_time,
1138 process_record->monitor.gpu_useful_time,
1139 process_record->monitor.gpu_communication_time,
1140
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 i + 1 < region_record->num_mpi_ranks ? "," : "");
1141 }
1142 6 fprintf(out_file,
1143 " ]%s\n",
1144
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 node->next != NULL ? "," : "");
1145 }
1146 6 fprintf(out_file,
1147 " }"); /* no eol */
1148 }
1149
1150 1 static void process_to_xml(FILE *out_file) {
1151
1152 1 for (GSList *node = region_records;
1153
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
1154 1 node = node->next) {
1155
1156 1 region_record_t *region_record = node->data;
1157
1158 1 fprintf(out_file,
1159 " <Process>\n"
1160 " <name>%s</name>\n",
1161 1 region_record->name);
1162
1163
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 for (int i = 0; i < region_record->num_mpi_ranks; ++i) {
1164
1165 1 process_record_t *process_record = &region_record->process_records[i];
1166
1167 1 fprintf(out_file,
1168 " <process>\n"
1169 " <rank>%d</rank>\n"
1170 " <pid>%d</pid>\n"
1171 " <nodeId>%d</nodeId>\n"
1172 " <hostname>%s</hostname>\n"
1173 " <cpuset>%s</cpuset>\n"
1174 " <numCpus>%d</numCpus>\n"
1175 " <avgCpus>%.1f</avgCpus>\n"
1176 " <cycles>%"PRId64"</cycles>\n"
1177 " <instructions>%"PRId64"</instructions>\n"
1178 " <numMeasurements>%d</numMeasurements>\n"
1179 " <numResets>%d</numResets>\n"
1180 " <numMpiCalls>%"PRId64"</numMpiCalls>\n"
1181 " <numOmpParallels>%"PRId64"</numOmpParallels>\n"
1182 " <numOmpTasks>%"PRId64"</numOmpTasks>\n"
1183 " <elapsedTime>%"PRId64"</elapsedTime>\n"
1184 " <usefulTime>%"PRId64"</usefulTime>\n"
1185 " <mpiTime>%"PRId64"</mpiTime>\n"
1186 " <ompLoadImbalanceTime>%"PRId64"</ompLoadImbalanceTime>\n"
1187 " <ompSchedulingTime>%"PRId64"</ompSchedulingTime>\n"
1188 " <ompSerializationTime>%"PRId64"</ompSerializationTime>\n"
1189 " </process>\n",
1190 process_record->rank,
1191 process_record->pid,
1192 process_record->node_id,
1193 1 process_record->hostname,
1194 1 process_record->cpuset_quoted,
1195 process_record->monitor.num_cpus,
1196 1 process_record->monitor.avg_cpus,
1197 process_record->monitor.cycles,
1198 process_record->monitor.instructions,
1199 process_record->monitor.num_measurements,
1200 process_record->monitor.num_resets,
1201 process_record->monitor.num_mpi_calls,
1202 process_record->monitor.num_omp_parallels,
1203 process_record->monitor.num_omp_tasks,
1204 process_record->monitor.elapsed_time,
1205 process_record->monitor.useful_time,
1206 process_record->monitor.mpi_time,
1207 process_record->monitor.omp_load_imbalance_time,
1208 process_record->monitor.omp_scheduling_time,
1209 process_record->monitor.omp_serialization_time);
1210 }
1211 1 fprintf(out_file,
1212 " </Process>\n");
1213 }
1214 1 }
1215
1216 3 static void process_to_csv(FILE *out_file, bool append) {
1217
1218
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (region_records == NULL) return;
1219
1220
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (!append) {
1221 /* Print header */
1222 1 fprintf(out_file,
1223 "Region,"
1224 "Rank,"
1225 "PID,"
1226 "NodeId,"
1227 "Hostname,"
1228 "CpuSet,"
1229 "NumCpus,"
1230 "AvgCpus,"
1231 "Cycles,"
1232 "Instructions,"
1233 "NumMeasurements,"
1234 "NumResets,"
1235 "NumMpiCalls,"
1236 "NumOmpParallels,"
1237 "NumOmpTasks,"
1238 "NumGpuCalls,"
1239 "ElapsedTime,"
1240 "UsefulTime,"
1241 "MPITime,"
1242 "OMPLoadImbalance,"
1243 "OMPSchedulingTime,"
1244 "OMPSerializationTime,"
1245 "GPURuntimeTime,"
1246 "GPUUsefulTime,"
1247 "GPUCommunicationTime\n");
1248 }
1249
1250 1 for (GSList *node = region_records;
1251
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
1252 1 node = node->next) {
1253
1254 1 region_record_t *region_record = node->data;
1255
1256
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 for (int i = 0; i < region_record->num_mpi_ranks; ++i) {
1257
1258 1 process_record_t *process_record = &region_record->process_records[i];
1259
1260 1 fprintf(out_file,
1261 "%s," /* Region */
1262 "%d," /* Rank */
1263 "%d," /* PID */
1264 "%d," /* NodeId */
1265 "%s," /* Hostname */
1266 "%s," /* CpuSet */
1267 "%d," /* NumCpus */
1268 "%.1f," /* AvgCpus */
1269 "%"PRId64"," /* Cycles */
1270 "%"PRId64"," /* Instructions */
1271 "%d," /* NumMeasurements */
1272 "%d," /* NumResets */
1273 "%"PRId64"," /* NumMpiCalls */
1274 "%"PRId64"," /* NumOmpParallels */
1275 "%"PRId64"," /* NumOmpTasks */
1276 "%"PRId64"," /* NumGpuCalls */
1277 "%"PRId64"," /* ElapsedTime */
1278 "%"PRId64"," /* UsefulTime */
1279 "%"PRId64"," /* MPITime */
1280 "%"PRId64"," /* OMPLoadImbalance */
1281 "%"PRId64"," /* OMPSchedulingTime */
1282 "%"PRId64"," /* OMPSerializationTime */
1283 "%"PRId64"," /* GPURuntimeTime */
1284 "%"PRId64"," /* GPUUsefulTime */
1285 "%"PRId64"\n", /* GPUCommunicationTime */
1286 1 region_record->name,
1287 process_record->rank,
1288 process_record->pid,
1289 process_record->node_id,
1290 1 process_record->hostname,
1291 1 process_record->cpuset_quoted,
1292 process_record->monitor.num_cpus,
1293 1 process_record->monitor.avg_cpus,
1294 process_record->monitor.cycles,
1295 process_record->monitor.instructions,
1296 process_record->monitor.num_measurements,
1297 process_record->monitor.num_resets,
1298 process_record->monitor.num_mpi_calls,
1299 process_record->monitor.num_omp_parallels,
1300 process_record->monitor.num_omp_tasks,
1301 process_record->monitor.num_gpu_runtime_calls,
1302 process_record->monitor.elapsed_time,
1303 process_record->monitor.useful_time,
1304 process_record->monitor.mpi_time,
1305 process_record->monitor.omp_load_imbalance_time,
1306 process_record->monitor.omp_scheduling_time,
1307 process_record->monitor.omp_serialization_time,
1308 process_record->monitor.gpu_runtime_time,
1309 process_record->monitor.gpu_useful_time,
1310 process_record->monitor.gpu_communication_time);
1311 }
1312 }
1313 }
1314
1315 3 static void process_to_txt(FILE *out_file) {
1316
1317 3 for (GSList *node = region_records;
1318
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 node != NULL;
1319 3 node = node->next) {
1320
1321 3 region_record_t *region_record = node->data;
1322
1323
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 for (int i = 0; i < region_record->num_mpi_ranks; ++i) {
1324
1325 3 process_record_t *process_record = &region_record->process_records[i];
1326
1327 6 float ipc = process_record->monitor.cycles > 0
1328 ? (float)process_record->monitor.instructions
1329 / process_record->monitor.cycles
1330
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 : 0.0f;
1331
1332 3 fprintf(out_file,
1333 "%s\n"
1334 "### Name: %s\n"
1335 "### Process: %d (%s)\n"
1336 "### Rank: %d\n"
1337 "### CpuSet: %s\n"
1338 "### Elapsed time: %"PRId64" ns\n"
1339 "### Useful time: %"PRId64" ns\n"
1340 "### Not useful MPI: %"PRId64" ns\n"
1341 "### Not useful OMP Load Imbalance: %"PRId64" ns\n"
1342 "### Not useful OMP Scheduling: %"PRId64" ns\n"
1343 "### Not useful OMP Serialization: %"PRId64" ns\n"
1344 "### Not useful GPU runtime: %"PRId64" ns\n"
1345 "### Device useful time: %"PRId64" ns\n"
1346 "### Device communication time: %"PRId64" ns\n"
1347 "### IPC: %.2f\n",
1348 make_header("Monitoring Region Summary"),
1349 3 region_record->name,
1350 3 process_record->pid, process_record->hostname,
1351 process_record->rank,
1352 3 process_record->cpuset,
1353 process_record->monitor.elapsed_time,
1354 process_record->monitor.useful_time,
1355 process_record->monitor.mpi_time,
1356 process_record->monitor.omp_load_imbalance_time,
1357 process_record->monitor.omp_scheduling_time,
1358 process_record->monitor.omp_serialization_time,
1359 process_record->monitor.gpu_runtime_time,
1360 process_record->monitor.gpu_useful_time,
1361 process_record->monitor.gpu_communication_time,
1362 ipc);
1363 }
1364 }
1365 3 }
1366
1367 37 static void process_finalize(void) {
1368
1369 /* Free every record data */
1370 37 for (GSList *node = region_records;
1371
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 37 times.
51 node != NULL;
1372 14 node = node->next) {
1373
1374 14 region_record_t *record = node->data;
1375 14 free(record);
1376 }
1377
1378 /* Free list */
1379 37 g_slist_free(region_records);
1380 37 region_records = NULL;
1381 37 }
1382
1383
1384 /*********************************************************************************/
1385 /* TALP Common */
1386 /*********************************************************************************/
1387 typedef struct TALPCommonRecord {
1388 char *dlb_version; // Version X.Y.Z[-#-hash]
1389 char *time_of_creation; // ISO 8601 string
1390 } talp_common_record_t;
1391 static talp_common_record_t common_record;
1392
1393 37 static void talp_output_record_common(void) {
1394 /* Initialize structure */
1395 37 time_t now = time(NULL);
1396 37 common_record = (const talp_common_record_t) {
1397 .dlb_version = PACKAGE_VERSION,
1398 37 .time_of_creation = get_iso_8601_string(localtime(&now)),
1399 };
1400 37 }
1401
1402 6 static void common_to_json(FILE *out_file) {
1403 6 fprintf(out_file,
1404 " \"dlbVersion\": \"%s\",\n"
1405 " \"timestamp\": \"%s\",\n",
1406 common_record.dlb_version,
1407 common_record.time_of_creation);
1408 6 }
1409
1410 1 static void common_to_xml(FILE *out_file) {
1411
1412 1 fprintf(out_file,
1413 " <dlbVersion>%s</dlbVersion>\n"
1414 " <timestamp>%s</timestamp>\n",
1415 common_record.dlb_version,
1416 common_record.time_of_creation);
1417 1 }
1418
1419 3 static void common_to_txt(FILE *out_file) {
1420
1421 3 fprintf(out_file,
1422 "%s\n"
1423 "### DLB Version: %s\n"
1424 "### Timestamp: %s\n",
1425 make_header("TALP Common Data"),
1426 common_record.dlb_version,
1427 common_record.time_of_creation);
1428 3 }
1429
1430 37 static void common_finalize(void) {
1431 37 free(common_record.time_of_creation);
1432 37 }
1433
1434
1435
1436
1437 /*********************************************************************************/
1438 /* TALP Resources */
1439 /*********************************************************************************/
1440 typedef struct TALPResourcesRecord {
1441 unsigned int num_cpus;
1442 unsigned int num_nodes;
1443 unsigned int num_mpi_ranks;
1444 unsigned int num_gpus;
1445 } talp_resources_record_t;
1446 static talp_resources_record_t resources_record;
1447
1448 11 void talp_output_record_resources(int num_cpus, int num_nodes, int num_mpi_ranks,
1449 int num_gpus) {
1450
1451 11 resources_record = (const talp_resources_record_t) {
1452 11 .num_cpus = (unsigned int) num_cpus,
1453 11 .num_nodes = (unsigned int) num_nodes,
1454 11 .num_mpi_ranks = (unsigned int) num_mpi_ranks,
1455 11 .num_gpus = (unsigned int) num_gpus,
1456 };
1457 11 }
1458
1459 6 static void resources_to_json(FILE *out_file) {
1460 6 fprintf(out_file,
1461 " \"resources\": {\n"
1462 " \"numCpus\": %u,\n"
1463 " \"numNodes\": %u,\n"
1464 " \"numMpiRanks\": %u,\n"
1465 " \"numGpus\": %u\n"
1466 " },\n",
1467 resources_record.num_cpus,
1468 resources_record.num_nodes,
1469 resources_record.num_mpi_ranks,
1470 resources_record.num_gpus);
1471 6 }
1472
1473 1 static void resources_to_xml(FILE *out_file) {
1474
1475 1 fprintf(out_file,
1476 " <resources>\n"
1477 " <numCpus>%u</numCpus>\n"
1478 " <numNodes>%u</numNodes>\n"
1479 " <numMpiRanks>%u</numMpiRanks>\n"
1480 " </resources>",
1481 resources_record.num_cpus,
1482 resources_record.num_nodes,
1483 resources_record.num_mpi_ranks);
1484 1 }
1485
1486 3 static void resources_to_txt(FILE *out_file) {
1487
1488 3 fprintf(out_file,
1489 "%s\n"
1490 "### Number of CPUs: %u\n"
1491 "### Number of Nodes: %u\n"
1492 "### Number of MPI processes: %u\n"
1493 "### Number of GPUs: %u\n",
1494 make_header("TALP Resources"),
1495 resources_record.num_cpus,
1496 resources_record.num_nodes,
1497 resources_record.num_mpi_ranks,
1498 resources_record.num_gpus);
1499 3 }
1500
1501
1502 /*********************************************************************************/
1503 /* TALP Process info */
1504 /*********************************************************************************/
1505
1506 typedef struct talp_process_info_record_t {
1507 char hostname[HOST_NAME_MAX];
1508 pid_t pid;
1509 } talp_process_info_record_t;
1510
1511 static talp_process_info_record_t process_info_record = {0};
1512
1513 19 void talp_output_record_process_info(void) {
1514
1515 19 gethostname(process_info_record.hostname, HOST_NAME_MAX);
1516 19 process_info_record.pid = getpid();
1517 19 }
1518
1519 6 static void process_info_to_json(FILE *out_file) {
1520
1521
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 1 times.
6 if (process_info_record.pid != 0) {
1522 5 fprintf(out_file,
1523 " \"processInfo\": {\n"
1524 " \"hostname\": \"%s\",\n"
1525 " \"pid\": %d\n"
1526 " },\n",
1527 process_info_record.hostname,
1528 process_info_record.pid);
1529 }
1530 6 }
1531
1532 /*********************************************************************************/
1533 /* Helper functions */
1534 /*********************************************************************************/
1535
1536 6 static void json_header(FILE *out_file) {
1537 6 fprintf(out_file, "{\n");
1538 6 }
1539
1540 6 static void json_footer(FILE *out_file) {
1541 6 fprintf(out_file, "\n}\n");
1542 6 }
1543
1544 1 static void xml_header(FILE *out_file) {
1545 1 fprintf(out_file, "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"
1546 "<root>\n");
1547 1 }
1548
1549 1 static void xml_footer(FILE *out_file) {
1550 1 fprintf(out_file, "</root>\n");
1551 1 }
1552
1553
1554 /*********************************************************************************/
1555 /* Output directory/file logic */
1556 /*********************************************************************************/
1557
1558 // Helper: recursively create directories (mkdir -p equivalent)
1559 11 static int mkdir_p(const char *path, mode_t mode) {
1560 char tmp[PATH_MAX];
1561 11 char *p = NULL;
1562
1563 11 snprintf(tmp, sizeof(tmp), "%s", path);
1564 11 size_t len = strlen(tmp);
1565
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 11 times.
11 if (len == 0) return -1;
1566
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 11 times.
11 if (tmp[len - 1] == '/') {
1567 tmp[len - 1] = '\0';
1568 }
1569
1570
2/2
✓ Branch 0 taken 523 times.
✓ Branch 1 taken 11 times.
534 for (p = tmp + 1; *p; p++) {
1571
2/2
✓ Branch 0 taken 31 times.
✓ Branch 1 taken 492 times.
523 if (*p == '/') {
1572 31 *p = '\0';
1573
2/4
✓ Branch 1 taken 31 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 31 times.
31 if (mkdir(tmp, mode) != 0 && errno != EEXIST) {
1574 return -1;
1575 }
1576 31 *p = '/';
1577 }
1578 }
1579
1580
4/4
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 1 times.
✓ Branch 4 taken 9 times.
11 if (mkdir(tmp, mode) != 0 && errno != EEXIST) {
1581 1 return -1;
1582 }
1583
1584 10 return 0;
1585 }
1586
1587 // open file for appending or writing, creating dirs as needed
1588 15 static FILE *open_file_with_dirs(const char *filename, bool *append) {
1589
2/2
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 11 times.
15 if (access(filename, F_OK) == 0) {
1590 FILE *f;
1591
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 3 times.
4 if (append) {
1592 1 *append = true;
1593 1 f = fopen(filename, "a");
1594 } else {
1595 3 f = fopen(filename, "w");
1596 }
1597
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 3 times.
4 if (!f) {
1598 1 warning("Cannot open existing file %s: %s", filename, strerror(errno));
1599 }
1600 4 return f;
1601 } else {
1602 // Ensure parent directories exist
1603 char pathbuf[PATH_MAX];
1604 11 snprintf(pathbuf, sizeof(pathbuf), "%s", filename);
1605 11 char *dir = dirname(pathbuf);
1606
1607
2/2
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 10 times.
11 if (mkdir_p(dir, 0755) != 0) {
1608 1 warning("Cannot create directory %s: %s", dir, strerror(errno));
1609 1 return NULL;
1610 }
1611
1612
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 6 times.
10 if (append) {
1613 4 *append = false;
1614 }
1615 10 FILE *f = fopen(filename, "w");
1616
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
10 if (!f) {
1617 warning("Cannot create file %s: %s", filename, strerror(errno));
1618 }
1619 10 return f;
1620 }
1621 }
1622
1623
1624
1625 /*********************************************************************************/
1626 /* Finalize */
1627 /*********************************************************************************/
1628
1629 320 static bool check_coefficient(float coeffiecient) {
1630
2/4
✓ Branch 0 taken 320 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 320 times.
✗ Branch 3 not taken.
320 return 0.0f <= coeffiecient && coeffiecient <= 1.0;
1631 }
1632
1633 static void warn_negative_counters(void) {
1634 static bool warned_once = false;
1635 if (!warned_once) {
1636 warning("Some obtained PAPI counters contain negative values. Check your"
1637 " installation or report the error to %s", PACKAGE_BUGREPORT);
1638 warned_once = true;
1639 }
1640 }
1641
1642 static void warn_wrong_coefficient(void) {
1643 static bool warned_once = false;
1644 if (!warned_once) {
1645 warning("Some computed POP metric coefficient is not within the allowed"
1646 " range [0.0, 1.0]. If you think this is an unexpected value,"
1647 " please report the error to %s", PACKAGE_BUGREPORT);
1648 warned_once = true;
1649 }
1650 }
1651
1652 37 static void sanitize_records(void) {
1653
1654 /* pop_metrics_records:
1655 * - instructions and cycles need to be >= 0
1656 * - computed efficiencyes need to be [0.0, 1.0]
1657 */
1658 37 for (GSList *node = pop_metrics_records;
1659
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 37 times.
69 node != NULL;
1660 32 node = node->next) {
1661
1662 32 dlb_pop_metrics_t *record = node->data;
1663
1664
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 32 times.
32 if (record->cycles < 0) {
1665 record->cycles = 0.0;
1666 warn_negative_counters();
1667 }
1668
1669
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 32 times.
32 if (record->instructions < 0) {
1670 record->instructions = 0.0;
1671 warn_negative_counters();
1672 }
1673
1674
1/2
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
32 if (!check_coefficient(record->parallel_efficiency)
1675
1/2
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
32 || !check_coefficient(record->mpi_parallel_efficiency)
1676
1/2
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
32 || !check_coefficient(record->mpi_communication_efficiency)
1677
1/2
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
32 || !check_coefficient(record->mpi_load_balance)
1678
1/2
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
32 || !check_coefficient(record->mpi_load_balance_in)
1679
1/2
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
32 || !check_coefficient(record->mpi_load_balance_out)
1680
1/2
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
32 || !check_coefficient(record->omp_parallel_efficiency)
1681
1/2
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
32 || !check_coefficient(record->omp_load_balance)
1682
1/2
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
32 || !check_coefficient(record->omp_scheduling_efficiency)
1683
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 32 times.
32 || !check_coefficient(record->omp_serialization_efficiency)) {
1684 warn_wrong_coefficient();
1685 }
1686 }
1687
1688 /* node_records: nothing to sanitize for now */
1689
1690 /* region_records: */
1691 37 for (GSList *node = region_records;
1692
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 37 times.
51 node != NULL;
1693 14 node = node->next) {
1694
1695 14 region_record_t *region_record = node->data;
1696
1697
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 14 times.
28 for (int i = 0; i < region_record->num_mpi_ranks; ++i) {
1698
1699 14 dlb_monitor_t *monitor = &region_record->process_records[i].monitor;
1700
1701
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14 times.
14 if (monitor->cycles < 0) {
1702 monitor->cycles = 0.0;
1703 warn_negative_counters();
1704 }
1705
1706
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14 times.
14 if (monitor->instructions < 0) {
1707 monitor->instructions = 0.0;
1708 warn_negative_counters();
1709 }
1710 }
1711 }
1712 37 }
1713
1714 /* Return an allocated string: base + "_%h_%p.partial" + extension */
1715 1 static char *build_partial_template(const char *filename) {
1716
1717 1 const char *dot = strrchr(filename, '.');
1718 1 size_t base_len = dot - filename;
1719 1 const char *ext = dot;
1720
1721
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 ensure(dot != NULL, "expected filename with extension %s. Please report bug.", __func__);
1722
1723 1 size_t len =
1724 base_len +
1725 1 strlen("_%h_%p.partial") +
1726 1 strlen(ext) +
1727 1;
1728
1729 1 char *out = malloc(len);
1730
1731 1 snprintf(out, len,
1732 "%.*s_%%h_%%p.partial%s",
1733 (int)base_len,
1734 filename,
1735 ext);
1736
1737 1 return out;
1738 }
1739
1740 /* Detect job ID across schedulers */
1741 2 static const char *get_job_id(void) {
1742
1743 2 const char *vars[] = {
1744 "SLURM_JOB_ID",
1745 "SLURM_JOBID",
1746 "FLUX_JOB_ID",
1747 "PBS_JOBID",
1748 "LSB_JOBID",
1749 "JOB_ID",
1750 NULL
1751 };
1752
1753
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 2 times.
14 for (int i = 0; vars[i]; i++) {
1754 12 const char *v = getenv(vars[i]);
1755
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
12 if (v && *v)
1756 return v;
1757 }
1758
1759 2 return NULL;
1760 }
1761
1762 /* Expands output file, e.g.: talp_%p.json -> talp_123.json */
1763 13 static char *expand_output_filename(const char *template)
1764 {
1765
2/2
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 2 times.
13 if (strchr(template, '%') == NULL) return NULL;
1766
1767 char hostname[HOST_NAME_MAX];
1768 2 gethostname(hostname, HOST_NAME_MAX);
1769
1770 2 pid_t pid = getpid();
1771 char pid_buf[32];
1772 2 snprintf(pid_buf, sizeof(pid_buf), "%d", pid);
1773
1774 2 const char *job = get_job_id();
1775
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 const char *job_str = job ? job : "0";
1776
1777 2 size_t hst_len = strlen(hostname);
1778 2 size_t pid_len = strlen(pid_buf);
1779 2 size_t job_len = strlen(job_str);
1780
1781 /* Compute output length */
1782
1783 2 size_t out_len = 0;
1784
1785
2/2
✓ Branch 0 taken 140 times.
✓ Branch 1 taken 2 times.
142 for (size_t i = 0; template[i]; i++) {
1786
3/4
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 136 times.
✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.
140 if (template[i] == '%' && template[i+1]) {
1787
2/5
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
4 switch (template[i+1]) {
1788 2 case 'h':
1789 2 out_len += hst_len;
1790 2 i++;
1791 2 continue;
1792 2 case 'p':
1793 2 out_len += pid_len;
1794 2 i++;
1795 2 continue;
1796 case 'j':
1797 out_len += job_len;
1798 i++;
1799 continue;
1800 case '%':
1801 out_len += 1;
1802 i++;
1803 continue;
1804 }
1805 }
1806 136 out_len += 1;
1807 }
1808
1809
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (out_len > PATH_MAX) return NULL;
1810
1811 /* Fill buffer */
1812
1813 2 char *output_filename = malloc(out_len + 1);
1814
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (!output_filename) return NULL;
1815
1816 2 char *out = output_filename;
1817
1818
2/2
✓ Branch 0 taken 140 times.
✓ Branch 1 taken 2 times.
142 for (size_t i = 0; template[i]; ++i) {
1819
3/4
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 136 times.
✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.
140 if (template[i] == '%' && template[i+1]) {
1820
2/5
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
4 switch (template[i+1]) {
1821 2 case 'h':
1822 2 memcpy(out, hostname, hst_len);
1823 2 out += hst_len;
1824 2 ++i;
1825 2 continue;
1826
1827 2 case 'p':
1828 2 memcpy(out, pid_buf, pid_len);
1829 2 out += pid_len;
1830 2 ++i;
1831 2 continue;
1832
1833 case 'j':
1834 memcpy(out, job_str, job_len);
1835 out += job_len;
1836 ++i;
1837 continue;
1838
1839 case '%':
1840 *out++ = '%';
1841 ++i;
1842 continue;
1843 }
1844 }
1845
1846 136 *out++ = template[i];
1847 }
1848
1849 2 *out = '\0';
1850
1851 2 return output_filename;
1852 }
1853
1854 37 void talp_output_finalize(const char *output_file, bool partial_output) {
1855
1856 /* For efficiency when adding records, they are prepended to their respective lists.
1857 * Then, they are reversed here to print them in alphabetical order. */
1858 37 pop_metrics_records = g_slist_reverse(pop_metrics_records);
1859 37 node_records = g_slist_reverse(node_records);
1860 37 region_records = g_slist_reverse(region_records);
1861
1862 /* Sanitize erroneous values */
1863 37 sanitize_records();
1864
1865 37 talp_output_record_common();
1866
1867 /* If the process has changed the locale, temporarily push the C locale to
1868 * print floats with the expected notation (a comma as a decimal separator
1869 * will break CSV and JSON files). The object associated with the locale
1870 * can be safely freed after it has been set. */
1871 37 locale_t new_locale = newlocale(LC_ALL, "C", 0);
1872 37 uselocale(new_locale);
1873 37 freelocale(new_locale);
1874
1875
2/2
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 13 times.
37 if (output_file == NULL) {
1876 /* No output file, just print all records */
1877 24 pop_metrics_print();
1878 24 node_print();
1879 24 process_print();
1880 } else {
1881 /* Do not open file if process has no data */
1882
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 13 times.
13 if (pop_metrics_records == NULL
1883 && node_records == NULL
1884 && region_records == NULL) return;
1885
1886 /* Check file extension */
1887 typedef enum Extension {
1888 EXT_JSON,
1889 EXT_XML,
1890 EXT_CSV,
1891 EXT_TXT,
1892 } extension_t;
1893 13 extension_t extension = EXT_TXT;
1894 13 const char *ext = strrchr(output_file, '.');
1895
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 1 times.
13 if (ext != NULL) {
1896
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 if (strcmp(ext+1, "json") == 0) {
1897 6 extension = EXT_JSON;
1898
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 5 times.
6 } else if (strcmp(ext+1, "xml") == 0) {
1899 1 extension = EXT_XML;
1900
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 2 times.
5 } else if (strcmp(ext+1, "csv") == 0) {
1901 3 extension = EXT_CSV;
1902 }
1903 }
1904
1905 /* Deprecation warning*/
1906
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 12 times.
13 if(extension == EXT_XML){
1907 1 warning("Deprecated: The support for XML output is deprecated and"
1908 " will be removed in the next release");
1909 }
1910
1911 /* Flag check */
1912
3/4
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 7 times.
13 if (extension != EXT_JSON && partial_output) {
1913 warning("Option --talp-partial-output is only supported for JSON format."
1914 " Disabling option");
1915 partial_output = false;
1916 }
1917
1918 /* Obtain the real filename depending on whether output_file is a template,
1919 * or the user requested partial output */
1920 13 char *template = NULL;
1921 13 char *filename = NULL;
1922
1923
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 12 times.
13 if (partial_output) {
1924 1 template = build_partial_template(output_file);
1925 } else {
1926 12 template = strdup(output_file);
1927 }
1928
1929 13 filename = expand_output_filename(template);
1930
1931
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 11 times.
13 if (filename != NULL) {
1932 2 output_file = filename;
1933 }
1934
1935 13 free(template);
1936
1937 /* Specific case where output file needs to be split */
1938
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 10 times.
13 if (extension == EXT_CSV
1939 3 && !!(pop_metrics_records != NULL)
1940 3 + !!(node_records != NULL)
1941
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
4 + !!(region_records != NULL) > 1) {
1942
1943 /* Length without extension */
1944 1 int filename_useful_len = ext - output_file;
1945
1946 /* POP */
1947
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (pop_metrics_records != NULL) {
1948 1 const char *pop_ext = "-pop.csv";
1949 1 size_t pop_file_len = filename_useful_len + strlen(pop_ext) + 1;
1950 1 char *pop_filename = malloc(sizeof(char)*pop_file_len);
1951 1 sprintf(pop_filename, "%.*s%s", filename_useful_len, output_file, pop_ext);
1952 bool append_to_csv;
1953 1 FILE *pop_file = open_file_with_dirs(pop_filename, &append_to_csv);
1954
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (pop_file) {
1955 1 pop_metrics_to_csv(pop_file, append_to_csv);
1956 1 fclose(pop_file);
1957 } else {
1958 warning("Writing metrics to stdout instead:");
1959 pop_metrics_to_csv(stdout, /* append: */ false);
1960 }
1961 }
1962
1963 /* Node */
1964
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (node_records != NULL) {
1965 1 const char *node_ext = "-node.csv";
1966 1 size_t node_file_len = filename_useful_len + strlen(node_ext) + 1;
1967 1 char *node_filename = malloc(sizeof(char)*node_file_len);
1968 1 sprintf(node_filename, "%.*s%s", filename_useful_len, output_file, node_ext);
1969 bool append_to_csv;
1970 1 FILE *node_file = open_file_with_dirs(node_filename, &append_to_csv);
1971
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (node_file) {
1972 1 node_to_csv(node_file, append_to_csv);
1973 1 fclose(node_file);
1974 } else {
1975 warning("Writing metrics to stdout instead:");
1976 node_to_csv(stdout, /* append: */ false);
1977 }
1978 }
1979
1980 /* Process */
1981
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (region_records != NULL) {
1982 1 const char *process_ext = "-process.csv";
1983 1 size_t process_file_len = filename_useful_len + strlen(process_ext) + 1;
1984 1 char *process_filename = malloc(sizeof(char)*process_file_len);
1985 1 sprintf(process_filename, "%.*s%s", filename_useful_len, output_file, process_ext);
1986 bool append_to_csv;
1987 1 FILE *process_file = open_file_with_dirs(process_filename, &append_to_csv);
1988
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (process_file) {
1989 1 process_to_csv(process_file, append_to_csv);
1990 1 fclose(process_file);
1991 } else {
1992 warning("Writing metrics to stdout instead:");
1993 process_to_csv(stdout, /* append: */ false);
1994 }
1995 }
1996 }
1997
1998 /* Write to file */
1999 else {
2000 /* Open file */
2001 bool append_to_csv;
2002
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 10 times.
12 FILE *out_file = open_file_with_dirs(output_file,
2003 extension == EXT_CSV ? &append_to_csv : NULL);
2004
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 10 times.
12 if (!out_file) {
2005 2 warning("Writing metrics to stdout instead:");
2006 2 out_file = stdout;
2007 2 append_to_csv = false;
2008 }
2009
2010 /* Write records to file */
2011
4/5
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 3 times.
✗ Branch 4 not taken.
12 switch(extension) {
2012 6 case EXT_JSON:
2013 6 json_header(out_file);
2014 6 common_to_json(out_file);
2015 6 resources_to_json(out_file);
2016 6 process_info_to_json(out_file);
2017 6 pop_metrics_to_json(out_file);
2018 6 node_to_json(out_file);
2019 6 process_to_json(out_file);
2020 6 json_footer(out_file);
2021 6 break;
2022 1 case EXT_XML:
2023 1 xml_header(out_file);
2024 1 common_to_xml(out_file);
2025 1 resources_to_xml(out_file);
2026 1 pop_metrics_to_xml(out_file);
2027 1 node_to_xml(out_file);
2028 1 process_to_xml(out_file);
2029 1 xml_footer(out_file);
2030 1 break;
2031 2 case EXT_CSV:
2032 2 pop_metrics_to_csv(out_file, append_to_csv);
2033 2 node_to_csv(out_file, append_to_csv);
2034 2 process_to_csv(out_file, append_to_csv);
2035 2 break;
2036 3 case EXT_TXT:
2037 3 common_to_txt(out_file);
2038 3 resources_to_txt(out_file);
2039 3 pop_metrics_to_txt(out_file);
2040 3 node_to_txt(out_file);
2041 3 process_to_txt(out_file);
2042 3 break;
2043 }
2044
2045 /* Close file */
2046
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 2 times.
12 if (out_file != stdout) {
2047 10 fclose(out_file);
2048 }
2049 }
2050
2051
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 11 times.
13 if (filename != NULL) {
2052 2 free(filename);
2053 2 filename = NULL;
2054 }
2055 }
2056
2057 // Restore locale
2058 37 uselocale(LC_GLOBAL_LOCALE);
2059
2060 // De-allocate all records
2061 37 common_finalize();
2062 37 pop_metrics_finalize();
2063 37 node_finalize();
2064 37 process_finalize();
2065 }
2066