GCC Code Coverage Report


Directory: src/
File: src/talp/talp_output.c
Date: 2026-06-05 08:54:23
Exec Total Coverage
Lines: 733 804 91.2%
Functions: 47 50 94.0%
Branches: 248 339 73.2%

Line Branch Exec Source
1 /*********************************************************************************/
2 /* Copyright 2009-2024 Barcelona Supercomputing Center */
3 /* */
4 /* This file is part of the DLB library. */
5 /* */
6 /* DLB is free software: you can redistribute it and/or modify */
7 /* it under the terms of the GNU Lesser General Public License as published by */
8 /* the Free Software Foundation, either version 3 of the License, or */
9 /* (at your option) any later version. */
10 /* */
11 /* DLB is distributed in the hope that it will be useful, */
12 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
13 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
14 /* GNU Lesser General Public License for more details. */
15 /* */
16 /* You should have received a copy of the GNU Lesser General Public License */
17 /* along with DLB. If not, see <https://www.gnu.org/licenses/>. */
18 /*********************************************************************************/
19
20 #ifdef HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23
24 #include "talp/talp_output.h"
25
26 #include "LB_core/spd.h"
27 #include "apis/dlb_talp.h"
28 #include "support/debug.h"
29 #include "support/gslist.h"
30 #include "support/mytime.h"
31 #include "support/options.h"
32 #include "talp/talp.h"
33 #include "talp/talp_types.h"
34 #include "talp/perf_metrics.h"
35
36 #include <errno.h>
37 #include <libgen.h>
38 #include <limits.h>
39 #include <locale.h>
40 #include <pthread.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <sys/stat.h>
45 #include <unistd.h>
46
47
48 19 static float sanitized_ipc(float instructions, float cycles) {
49
3/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 17 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
19 if (instructions > 0 && cycles > 0) {
50 2 return instructions / cycles;
51 } else {
52 17 return 0.0f;
53 }
54 }
55
56 42 static const char* make_header(const char *title) {
57 42 int width = 62;
58 static char buf[80];
59 42 int title_len = strlen(title);
60
61
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 42 times.
42 if (width >= (int)sizeof(buf)) {
62 width = sizeof(buf) - 1; // prevent overflow
63 }
64
65
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 42 times.
42 if (title_len + 2 > width) {
66 // Title too long: just return title
67 snprintf(buf, sizeof(buf), "%s", title);
68 return buf;
69 }
70
71 42 int hashes = width - title_len - 2;
72 42 int left = hashes / 2;
73 42 int right = hashes - left;
74
75 42 memset(buf, '#', left);
76 42 buf[left] = ' ';
77 42 memcpy(buf + left + 1, title, title_len);
78 42 buf[left + 1 + title_len] = ' ';
79 42 memset(buf + left + 1 + title_len + 1, '#', right);
80 42 buf[width] = '\0';
81
82 42 return buf;
83 }
84
85
86 /*********************************************************************************/
87 /* GPU vendor */
88 /*********************************************************************************/
89
90 static gpu_vendor_t gpu_vendor = GPU_VENDOR_NONE;
91
92 void talp_output_record_gpu_vendor(gpu_vendor_t vendor) {
93 gpu_vendor = vendor;
94 }
95
96 /*********************************************************************************/
97 /* Monitoring Region */
98 /*********************************************************************************/
99
100 8 void talp_output_print_monitoring_region(const dlb_monitor_t *monitor,
101 const char *cpuset_str, talp_flags_t talp_flags) {
102
103 char elapsed_time_str[16];
104 8 ns_to_human(elapsed_time_str, 16, monitor->elapsed_time);
105
106 8 info("%s", make_header("Monitoring Region Summary"));
107 8 info("### Name: %s", monitor->name);
108 8 info("### Elapsed Time: %s", elapsed_time_str);
109 8 info("### Useful time: %"PRId64" ns",
110 8 monitor->useful_time);
111
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (talp_flags.have_mpi) {
112 4 info("### Not useful MPI: %"PRId64" ns",
113 4 monitor->mpi_time);
114
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 3 times.
4 if (talp_flags.have_openmp) {
115 1 info("### Not useful MPI in worker threads: %"PRId64" ns",
116 1 monitor->mpi_worker_idle_time);
117 }
118 }
119
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 7 times.
8 if (talp_flags.have_openmp) {
120 1 info("### Not useful OMP Load Balance: %"PRId64" ns",
121 1 monitor->omp_load_imbalance_time);
122 1 info("### Not useful OMP Scheduling: %"PRId64" ns",
123 1 monitor->omp_scheduling_time);
124 1 info("### Not useful OMP Serialization: %"PRId64" ns",
125 1 monitor->omp_serialization_time);
126 }
127
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 7 times.
8 if (talp_flags.have_gpu) {
128 1 info("### Not useful GPU runtime: %"PRId64" ns",
129 1 monitor->gpu_runtime_time);
130 1 info("### Device useful time: %"PRId64" ns",
131 1 monitor->gpu_useful_time);
132 1 info("### Device communication time: %"PRId64" ns",
133 1 monitor->gpu_communication_time);
134 }
135 8 info("### CpuSet: %s", cpuset_str);
136
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 7 times.
8 if (talp_flags.have_hwc) {
137 1 float ipc = sanitized_ipc(monitor->instructions, monitor->cycles);
138 1 info("### IPC: %.2f ", ipc);
139 }
140
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (talp_flags.have_mpi) {
141 4 info("### Number of MPI calls: %"PRId64,
142 4 monitor->num_mpi_calls);
143 }
144
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 7 times.
8 if (talp_flags.have_openmp) {
145 1 info("### Number of OpenMP parallels: %"PRId64,
146 1 monitor->num_omp_parallels);
147 1 info("### Number of OpenMP tasks: %"PRId64,
148 1 monitor->num_omp_tasks);
149 }
150
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 7 times.
8 if (talp_flags.have_gpu) {
151 1 info("### Number of GPU runtime calls: %"PRId64,
152 1 monitor->num_gpu_runtime_calls);
153 }
154 8 }
155
156
157 /*********************************************************************************/
158 /* POP Metrics */
159 /*********************************************************************************/
160
161 static GSList *pop_metrics_records = NULL;
162
163 32 void talp_output_record_pop_metrics(const dlb_pop_metrics_t *metrics) {
164
165 /* Copy structure */
166 32 dlb_pop_metrics_t *new_record = malloc(sizeof(dlb_pop_metrics_t));
167 32 *new_record = *metrics;
168
169 /* Add record to list */
170 32 pop_metrics_records = g_slist_prepend(pop_metrics_records, new_record);
171 32 }
172
173 24 static void pop_metrics_print(void) {
174
175 24 for (GSList *node = pop_metrics_records;
176
2/2
✓ Branch 0 taken 19 times.
✓ Branch 1 taken 24 times.
43 node != NULL;
177 19 node = node->next) {
178
179 19 dlb_pop_metrics_t *record = node->data;
180
181
2/2
✓ Branch 0 taken 18 times.
✓ Branch 1 taken 1 times.
19 if (record->elapsed_time > 0) {
182
183 18 float avg_ipc = sanitized_ipc(record->instructions, record->cycles);
184 char elapsed_time_str[16];
185 18 ns_to_human(elapsed_time_str, 16, record->elapsed_time);
186
187
2/2
✓ Branch 0 taken 17 times.
✓ Branch 1 taken 1 times.
35 bool have_gpu_activity = record->num_gpu_runtime_calls > 0 ||
188
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 17 times.
17 record->gpu_useful_time > 0;
189
190 18 info("%s", make_header("Monitoring Region POP Metrics"));
191 18 info("### Name: %s", record->name);
192 18 info("### Elapsed Time: %s", elapsed_time_str);
193
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 17 times.
18 if (have_gpu_activity) {
194 1 info("### Host");
195 1 info("### ----");
196 }
197
1/2
✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
18 if (record->mpi_parallel_efficiency > 0.0f &&
198
1/2
✓ Branch 0 taken 18 times.
✗ Branch 1 not taken.
18 record->omp_parallel_efficiency > 0.0f) {
199 18 info("### Parallel efficiency: %1.2f",
200 18 record->parallel_efficiency);
201 }
202
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 17 times.
18 if (record->num_mpi_calls > 0) {
203 1 info("### - MPI Parallel efficiency: %1.2f",
204 1 record->mpi_parallel_efficiency);
205 1 info("### - Communication efficiency: %1.2f",
206 1 record->mpi_communication_efficiency);
207 1 info("### - Load Balance: %1.2f",
208 1 record->mpi_load_balance);
209 1 info("### - In: %1.2f",
210 1 record->mpi_load_balance_in);
211 1 info("### - Out: %1.2f",
212 1 record->mpi_load_balance_out);
213 }
214
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 16 times.
18 if (record->num_omp_parallels + record->num_omp_tasks > 0) {
215 2 info("### - OpenMP Parallel efficiency: %1.2f",
216 2 record->omp_parallel_efficiency);
217 2 info("### - Load Balance: %1.2f",
218 2 record->omp_load_balance);
219 2 info("### - Scheduling efficiency: %1.2f",
220 2 record->omp_scheduling_efficiency);
221 2 info("### - Serialization efficiency: %1.2f",
222 2 record->omp_serialization_efficiency);
223 }
224
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 17 times.
18 if (have_gpu_activity) {
225 1 info("### - Device Offload efficiency: %1.2f",
226 1 record->device_offload_efficiency);
227 }
228
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 17 times.
18 if (avg_ipc > 0.0f) {
229 1 float avg_freq = record->cycles / record->useful_time;
230 1 info("### Computational metrics:");
231 1 info("### - Average useful IPC: %1.2f", avg_ipc);
232 1 info("### - Average useful frequency: %1.2f GHz", avg_freq);
233 1 info("### - Number of instructions: %1.2E", record->instructions);
234 }
235
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 17 times.
18 if (have_gpu_activity) {
236 1 info("###");
237 1 info("### %s Device",
238
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 gpu_vendor == GPU_VENDOR_NVIDIA ? "NVIDIA"
239 1 : gpu_vendor == GPU_VENDOR_AMD ? "AMD"
240
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 : "GPU");
241 1 info("### %s-------",
242
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 gpu_vendor == GPU_VENDOR_NVIDIA ? "------"
243 : gpu_vendor == GPU_VENDOR_AMD ? "---"
244 : "---");
245 1 info("### Parallel efficiency: %1.2f",
246 1 record->gpu_parallel_efficiency);
247 1 info("### - Load Balance: %1.2f",
248 1 record->gpu_load_balance);
249 1 info("### - Communication efficiency: %1.2f",
250 1 record->gpu_communication_efficiency);
251 1 info("### - Orchestration efficiency: %1.2f",
252 1 record->gpu_orchestration_efficiency);
253 }
254 } else {
255 1 info("%s", make_header("Monitoring Region POP Metrics"));
256 1 info("### Name: %s", record->name);
257 1 info("### No data ###");
258 }
259 }
260 24 }
261
262 6 static void pop_metrics_to_json(FILE *out_file) {
263
264
1/2
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
6 if (pop_metrics_records != NULL) {
265 6 fprintf(out_file,
266 " \"Application\": {\n");
267
268 6 for (GSList *node = pop_metrics_records;
269
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 node != NULL;
270 6 node = node->next) {
271
272 6 dlb_pop_metrics_t *record = node->data;
273
274 6 fprintf(out_file,
275 " \"%s\": {\n"
276 " \"numCpus\": %d,\n"
277 " \"numMpiRanks\": %d,\n"
278 " \"numNodes\": %d,\n"
279 " \"avgCpus\": %.1f,\n"
280 " \"numGpus\": %d,\n"
281 " \"cycles\": %.0f,\n"
282 " \"instructions\": %.0f,\n"
283 " \"numMeasurements\": %"PRId64",\n"
284 " \"numMpiCalls\": %"PRId64",\n"
285 " \"numOmpParallels\": %"PRId64",\n"
286 " \"numOmpTasks\": %"PRId64",\n"
287 " \"numGpuRuntimeCalls\": %"PRId64",\n"
288 " \"elapsedTime\": %"PRId64",\n"
289 " \"usefulTime\": %"PRId64",\n"
290 " \"mpiTime\": %"PRId64",\n"
291 " \"mpiWorkerIdleTime\": %"PRId64",\n"
292 " \"ompLoadImbalanceTime\": %"PRId64",\n"
293 " \"ompSchedulingTime\": %"PRId64",\n"
294 " \"ompSerializationTime\": %"PRId64",\n"
295 " \"gpuRuntimeTime\": %"PRId64",\n"
296 " \"minMpiNormdProc\": %.0f,\n"
297 " \"minMpiNormdNode\": %.0f,\n"
298 " \"gpuUsefulTime\": %"PRId64",\n"
299 " \"gpuCommunicationTime\": %"PRId64",\n"
300 " \"maxGpuUsefulTime\": %"PRId64",\n"
301 " \"maxGpuActiveTime\": %"PRId64",\n"
302 " \"parallelEfficiency\": %.2f,\n"
303 " \"mpiParallelEfficiency\": %.2f,\n"
304 " \"mpiCommunicationEfficiency\": %.2f,\n"
305 " \"mpiLoadBalance\": %.2f,\n"
306 " \"mpiLoadBalanceIn\": %.2f,\n"
307 " \"mpiLoadBalanceOut\": %.2f,\n"
308 " \"ompParallelEfficiency\": %.2f,\n"
309 " \"ompLoadBalance\": %.2f,\n"
310 " \"ompSchedulingEfficiency\": %.2f,\n"
311 " \"ompSerializationEfficiency\": %.2f,\n"
312 " \"deviceOffloadEfficiency\": %.2f,\n"
313 " \"gpuParallelEfficiency\": %.2f,\n"
314 " \"gpuLoadBalance\": %.2f,\n"
315 " \"gpuCommunicationEfficiency\": %.2f,\n"
316 " \"gpuOrchestrationEfficiency\": %.2f\n"
317 " }%s\n",
318 6 record->name,
319 record->num_cpus,
320 record->num_mpi_ranks,
321 record->num_nodes,
322 6 record->avg_cpus,
323 record->num_gpus,
324 record->cycles,
325 record->instructions,
326 record->num_measurements,
327 record->num_mpi_calls,
328 record->num_omp_parallels,
329 record->num_omp_tasks,
330 record->num_gpu_runtime_calls,
331 record->elapsed_time,
332 record->useful_time,
333 record->mpi_time,
334 record->mpi_worker_idle_time,
335 record->omp_load_imbalance_time,
336 record->omp_scheduling_time,
337 record->omp_serialization_time,
338 record->gpu_runtime_time,
339 record->min_mpi_normd_proc,
340 record->min_mpi_normd_node,
341 record->gpu_useful_time,
342 record->gpu_communication_time,
343 record->max_gpu_useful_time,
344 record->max_gpu_active_time,
345 6 record->parallel_efficiency,
346 6 record->mpi_parallel_efficiency,
347 6 record->mpi_communication_efficiency,
348 6 record->mpi_load_balance,
349 6 record->mpi_load_balance_in,
350 6 record->mpi_load_balance_out,
351 6 record->omp_parallel_efficiency,
352 6 record->omp_load_balance,
353 6 record->omp_scheduling_efficiency,
354 6 record->omp_serialization_efficiency,
355 6 record->device_offload_efficiency,
356 6 record->gpu_parallel_efficiency,
357 6 record->gpu_load_balance,
358 6 record->gpu_communication_efficiency,
359 6 record->gpu_orchestration_efficiency,
360
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 node->next != NULL ? "," : "");
361 }
362 6 fprintf(out_file,
363 " }"); /* no eol */
364 }
365 6 }
366
367 1 static void pop_metrics_to_xml(FILE *out_file) {
368
369 1 for (GSList *node = pop_metrics_records;
370
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
371 1 node = node->next) {
372
373 1 dlb_pop_metrics_t *record = node->data;
374
375 1 fprintf(out_file,
376 " <Application>\n"
377 " <name>%s</name>\n"
378 " <numCpus>%d</numCpus>\n"
379 " <numMpiRanks>%d</numMpiRanks>\n"
380 " <numNodes>%d</numNodes>\n"
381 " <avgCpus>%.1f</avgCpus>\n"
382 " <cycles>%.0f</cycles>\n"
383 " <instructions>%.0f</instructions>\n"
384 " <numMeasurements>%"PRId64"</numMeasurements>\n"
385 " <numMpiCalls>%"PRId64"</numMpiCalls>\n"
386 " <numOmpParallels>%"PRId64"</numOmpParallels>\n"
387 " <numOmpTasks>%"PRId64"</numOmpTasks>\n"
388 " <elapsedTime>%"PRId64"</elapsedTime>\n"
389 " <usefulTime>%"PRId64"</usefulTime>\n"
390 " <mpiTime>%"PRId64"</mpiTime>\n"
391 " <ompLoadImbalanceTime>%"PRId64"</ompLoadImbalanceTime>\n"
392 " <ompSchedulingTime>%"PRId64"</ompSchedulingTime>\n"
393 " <ompSerializationTime>%"PRId64"</ompSerializationTime>\n"
394 " <minMpiNormdProc>%.0f</minMpiNormdProc>\n"
395 " <minMpiNormdNode>%.0f</minMpiNormdNode>\n"
396 " <parallelEfficiency>%.2f</parallelEfficiency>\n"
397 " <mpiParallelEfficiency>%.2f</mpiParallelEfficiency>\n"
398 " <mpiCommunicationEfficiency>%.2f</mpiCommunicationEfficiency>\n"
399 " <mpiLoadBalance>%.2f</mpiLoadBalance>\n"
400 " <mpiLoadBalanceIn>%.2f</mpiLoadBalanceIn>\n"
401 " <mpiLoadBalanceOut>%.2f</mpiLoadBalanceOut>\n"
402 " <ompParallelEfficiency>%.2f</ompParallelEfficiency>\n"
403 " <ompLoadBalance>%.2f</ompLoadBalance>\n"
404 " <ompSchedulingEfficiency>%.2f</ompSchedulingEfficiency>\n"
405 " <ompSerializationEfficiency>%.2f</ompSerializationEfficiency>\n"
406 " </Application>\n",
407 1 record->name,
408 record->num_cpus,
409 record->num_mpi_ranks,
410 record->num_nodes,
411 1 record->avg_cpus,
412 record->cycles,
413 record->instructions,
414 record->num_measurements,
415 record->num_mpi_calls,
416 record->num_omp_parallels,
417 record->num_omp_tasks,
418 record->elapsed_time,
419 record->useful_time,
420 record->mpi_time,
421 record->omp_load_imbalance_time,
422 record->omp_scheduling_time,
423 record->omp_serialization_time,
424 record->min_mpi_normd_proc,
425 record->min_mpi_normd_node,
426 1 record->parallel_efficiency,
427 1 record->mpi_parallel_efficiency,
428 1 record->mpi_communication_efficiency,
429 1 record->mpi_load_balance,
430 1 record->mpi_load_balance_in,
431 1 record->mpi_load_balance_out,
432 1 record->omp_parallel_efficiency,
433 1 record->omp_load_balance,
434 1 record->omp_scheduling_efficiency,
435 1 record->omp_serialization_efficiency
436 );
437 }
438 1 }
439
440 3 static void pop_metrics_to_txt(FILE *out_file) {
441
442 3 for (GSList *node = pop_metrics_records;
443
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 node != NULL;
444 3 node = node->next) {
445
446 3 dlb_pop_metrics_t *record = node->data;
447
448
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (record->elapsed_time > 0) {
449 3 fprintf(out_file,
450 "%s\n"
451 "### Name: %s\n"
452 "### Number of CPUs: %d\n"
453 "### Number of MPI processes: %d\n"
454 "### Number of nodes: %d\n"
455 "### Average CPUs: %.1f\n"
456 "### Number of GPUs: %d\n"
457 "### Cycles: %.0f\n"
458 "### Instructions: %.0f\n"
459 "### Number of measurements: %"PRId64"\n"
460 "### Number of MPI calls: %"PRId64"\n"
461 "### Number of OpenMP parallel regions: %"PRId64"\n"
462 "### Number of OpenMP explicit tasks: %"PRId64"\n"
463 "### Number of GPU runtime calls: %"PRId64"\n"
464 "### Elapsed Time (ns): %"PRId64"\n"
465 "### Useful Time (ns): %"PRId64"\n"
466 "### MPI Time (ns): %"PRId64"\n"
467 "### OpenMP Load Imbalance Time (ns): %"PRId64"\n"
468 "### OpenMP Scheduling Time (ns): %"PRId64"\n"
469 "### OpenMP Serialization Time (ns): %"PRId64"\n"
470 "### GPU Runtime Time (ns): %"PRId64"\n"
471 "### MPI time normalized at process level of\n"
472 "### the process with the max non-MPI time: %.0f\n"
473 "### MPI time normalized at node level of\n"
474 "### the process with the max non-MPI time: %.0f\n"
475 "### Device useful time: %"PRId64"\n"
476 "### Device communication time: %"PRId64"\n"
477 "### Device max useful time: %"PRId64"\n"
478 "### Device max active time: %"PRId64"\n"
479 "### --- Host metrics ---\n"
480 "### Parallel efficiency: %.2f\n"
481 "### MPI Parallel efficiency: %.2f\n"
482 "### - MPI Communication efficiency: %.2f\n"
483 "### - MPI Load Balance: %.2f\n"
484 "### - MPI Load Balance in: %.2f\n"
485 "### - MPI Load Balance out: %.2f\n"
486 "### OpenMP Parallel efficiency: %.2f\n"
487 "### - OpenMP Load Balance: %.2f\n"
488 "### - OpenMP Scheduling efficiency: %.2f\n"
489 "### - OpenMP Serialization efficiency: %.2f\n"
490 "### Device Offload efficiency: %.2f\n"
491 "### --- Device metrics ---\n"
492 "### Device Parallel efficiency: %.2f\n"
493 "### - Device Load Balance: %.2f\n"
494 "### - Device Communication efficiency: %.2f\n"
495 "### - Device Orchestration efficiency: %.2f\n",
496 make_header("Monitoring Region POP Metrics"),
497 3 record->name,
498 record->num_cpus,
499 record->num_mpi_ranks,
500 record->num_nodes,
501 3 record->avg_cpus,
502 record->num_gpus,
503 record->cycles,
504 record->instructions,
505 record->num_measurements,
506 record->num_mpi_calls,
507 record->num_omp_parallels,
508 record->num_omp_tasks,
509 record->num_gpu_runtime_calls,
510 record->elapsed_time,
511 record->useful_time,
512 record->mpi_time,
513 record->omp_load_imbalance_time,
514 record->omp_scheduling_time,
515 record->omp_serialization_time,
516 record->gpu_runtime_time,
517 record->min_mpi_normd_proc,
518 record->min_mpi_normd_node,
519 record->gpu_useful_time,
520 record->gpu_communication_time,
521 record->max_gpu_useful_time,
522 record->max_gpu_active_time,
523 3 record->parallel_efficiency,
524 3 record->mpi_parallel_efficiency,
525 3 record->mpi_communication_efficiency,
526 3 record->mpi_load_balance,
527 3 record->mpi_load_balance_in,
528 3 record->mpi_load_balance_out,
529 3 record->omp_parallel_efficiency,
530 3 record->omp_load_balance,
531 3 record->omp_scheduling_efficiency,
532 3 record->omp_serialization_efficiency,
533 3 record->device_offload_efficiency,
534 3 record->gpu_parallel_efficiency,
535 3 record->gpu_load_balance,
536 3 record->gpu_communication_efficiency,
537 3 record->gpu_orchestration_efficiency
538 );
539 } else {
540 fprintf(out_file,
541 "%s\n"
542 "### Name: %s\n"
543 "### No data ###\n",
544 make_header("Monitoring Region POP Metrics"),
545 record->name);
546 }
547 }
548 3 }
549
550 3 static void pop_metrics_to_csv(FILE *out_file, bool append) {
551
552
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 if (pop_metrics_records == NULL) return;
553
554
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (!append) {
555 /* Print header */
556 2 fprintf(out_file,
557 "name,"
558 "numCpus,"
559 "numMpiRanks,"
560 "numNodes,"
561 "avgCpus,"
562 "cycles,"
563 "instructions,"
564 "numMeasurements,"
565 "numMpiCalls,"
566 "numOmpParallels,"
567 "numOmpTasks,"
568 "numGpuRuntimeCalls,"
569 "elapsedTime,"
570 "usefulTime,"
571 "mpiTime,"
572 "mpiWorkerIdleTime,"
573 "ompLoadImbalanceTime,"
574 "ompSchedulingTime,"
575 "ompSerializationTime,"
576 "gpuRuntimeTime,"
577 "minMpiNormdProc,"
578 "minMpiNormdNode,"
579 "parallelEfficiency,"
580 "mpiParallelEfficiency,"
581 "mpiCommunicationEfficiency,"
582 "mpiLoadBalance,"
583 "mpiLoadBalanceIn,"
584 "mpiLoadBalanceOut,"
585 "ompParallelEfficiency,"
586 "ompLoadBalance,"
587 "ompSchedulingEfficiency,"
588 "ompSerializationEfficiency,"
589 "deviceOffloadEfficiency,"
590 "gpuParallelEfficiency,"
591 "gpuLoadBalance,"
592 "gpuCommunicationEfficiency,"
593 "gpuOrchestrationEfficiency\n"
594 );
595 }
596
597 3 for (GSList *node = pop_metrics_records;
598
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 node != NULL;
599 3 node = node->next) {
600
601 3 dlb_pop_metrics_t *record = node->data;
602
603 3 fprintf(out_file,
604 "\"%s\"," /* name */
605 "%d," /* numCpus */
606 "%d," /* numMpiRanks */
607 "%d," /* numNodes */
608 "%.1f," /* avgCpus */
609 "%.0f," /* cycles */
610 "%.0f," /* instructions */
611 "%"PRId64"," /* numMeasurements */
612 "%"PRId64"," /* numMpiCalls */
613 "%"PRId64"," /* numOmpParallels */
614 "%"PRId64"," /* numOmpTasks */
615 "%"PRId64"," /* numGpuRuntimeCalls */
616 "%"PRId64"," /* elapsedTime */
617 "%"PRId64"," /* usefulTime */
618 "%"PRId64"," /* mpiTime */
619 "%"PRId64"," /* mpiWorkerIdleTime */
620 "%"PRId64"," /* ompLoadImbalanceTime */
621 "%"PRId64"," /* ompSchedulingTime */
622 "%"PRId64"," /* ompSerializationTime */
623 "%"PRId64"," /* gpuRuntimeTime */
624 "%.0f," /* minMpiNormdProc */
625 "%.0f," /* minMpiNormdNode */
626 "%.2f," /* parallelEfficiency */
627 "%.2f," /* mpiParallelEfficiency */
628 "%.2f," /* mpiCommunicationEfficiency */
629 "%.2f," /* mpiLoadBalance */
630 "%.2f," /* mpiLoadBalanceIn */
631 "%.2f," /* mpiLoadBalanceOut */
632 "%.2f," /* ompParallelEfficiency */
633 "%.2f," /* ompLoadBalance */
634 "%.2f," /* ompSchedulingEfficiency */
635 "%.2f," /* ompSerializationEfficiency */
636 "%.2f," /* deviceOffloadEfficiency */
637 "%.2f," /* gpuParallelEfficiency */
638 "%.2f," /* gpuLoadBalance */
639 "%.2f," /* gpuCommunicationEfficiency */
640 "%.2f\n", /* gpuOrchestrationEfficiency */
641 3 record->name,
642 record->num_cpus,
643 record->num_mpi_ranks,
644 record->num_nodes,
645 3 record->avg_cpus,
646 record->cycles,
647 record->instructions,
648 record->num_measurements,
649 record->num_mpi_calls,
650 record->num_omp_parallels,
651 record->num_omp_tasks,
652 record->num_gpu_runtime_calls,
653 record->elapsed_time,
654 record->useful_time,
655 record->mpi_time,
656 record->mpi_worker_idle_time,
657 record->omp_load_imbalance_time,
658 record->omp_scheduling_time,
659 record->omp_serialization_time,
660 record->gpu_runtime_time,
661 record->min_mpi_normd_proc,
662 record->min_mpi_normd_node,
663 3 record->parallel_efficiency,
664 3 record->mpi_parallel_efficiency,
665 3 record->mpi_communication_efficiency,
666 3 record->mpi_load_balance,
667 3 record->mpi_load_balance_in,
668 3 record->mpi_load_balance_out,
669 3 record->omp_parallel_efficiency,
670 3 record->omp_load_balance,
671 3 record->omp_scheduling_efficiency,
672 3 record->omp_serialization_efficiency,
673 3 record->device_offload_efficiency,
674 3 record->gpu_parallel_efficiency,
675 3 record->gpu_load_balance,
676 3 record->gpu_communication_efficiency,
677 3 record->gpu_orchestration_efficiency
678 );
679 }
680 }
681
682 37 static void pop_metrics_finalize(void) {
683
684 /* Free every record data */
685 37 for (GSList *node = pop_metrics_records;
686
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 37 times.
69 node != NULL;
687 32 node = node->next) {
688
689 32 dlb_pop_metrics_t *record = node->data;
690 32 free(record);
691 }
692
693 /* Free list */
694 37 g_slist_free(pop_metrics_records);
695 37 pop_metrics_records = NULL;
696 37 }
697
698
699 /*********************************************************************************/
700 /* Node */
701 /*********************************************************************************/
702
703 static GSList *node_records = NULL;
704
705 12 void talp_output_record_node(const node_record_t *node_record) {
706
707 12 int nelems = node_record->nelems;
708
709 /* Allocate new record */
710 12 size_t process_records_size = sizeof(process_in_node_record_t) * nelems;
711 12 size_t node_record_size = sizeof(node_record_t) + process_records_size;
712 12 node_record_t *new_record = malloc(node_record_size);
713
714 /* Memcpy the entire struct */
715 12 memcpy(new_record, node_record, node_record_size);
716
717 /* Insert to list */
718 12 node_records = g_slist_prepend(node_records, new_record);
719 12 }
720
721 24 static void node_print(void) {
722
723 24 for (GSList *node = node_records;
724
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 24 times.
25 node != NULL;
725 1 node = node->next) {
726
727 1 node_record_t *node_record = node->data;
728
729 1 info(" |----------------------------------------------------------|");
730 1 info(" | Extended Report Node %4d |",
731 node_record->node_id);
732 1 info(" |----------------------------------------------------------|");
733 1 info(" | Process | Useful Time | MPI Time |");
734 1 info(" |------------|----------------------|----------------------|");
735
736
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 for (int i = 0; i < node_record->nelems; ++i) {
737 2 info(" | %-10d | %18e s | %18e s |",
738 node_record->processes[i].pid,
739 nsecs_to_secs(node_record->processes[i].useful_time),
740 nsecs_to_secs(node_record->processes[i].mpi_time));
741 2 info(" |------------|----------------------|----------------------|");
742 }
743
744
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (node_record->nelems > 0) {
745 1 info(" |------------|----------------------|----------------------|");
746 1 info(" | %-10s | %18e s | %18e s |", "Node Avg",
747 nsecs_to_secs(node_record->avg_useful_time),
748 nsecs_to_secs(node_record->avg_mpi_time));
749 1 info(" |------------|----------------------|----------------------|");
750 1 info(" | %-10s | %18e s | %18e s |", "Node Max",
751 nsecs_to_secs(node_record->max_useful_time),
752 nsecs_to_secs(node_record->max_mpi_time));
753 1 info(" |------------|----------------------|----------------------|");
754 }
755 }
756 24 }
757
758 6 static void node_to_json(FILE *out_file) {
759
760
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 if (node_records == NULL) return;
761
762 /* If there are pop_metrics, append to the existing dictionary */
763
1/2
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
6 if (pop_metrics_records != NULL) {
764 6 fprintf(out_file,",\n");
765 }
766
767 6 fprintf(out_file,
768 " \"node\": [\n");
769
770 6 for (GSList *node = node_records;
771
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 node != NULL;
772 6 node = node->next) {
773
774 6 node_record_t *node_record = node->data;
775
776 6 fprintf(out_file,
777 " {\n"
778 " \"id\": \"%d\",\n"
779 " \"process\": [\n",
780 node_record->node_id);
781
782
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 6 times.
18 for (int i = 0; i < node_record->nelems; ++i) {
783 12 fprintf(out_file,
784 " {\n"
785 " \"id\": %d,\n"
786 " \"usefulTime\": %"PRId64",\n"
787 " \"mpiTime\": %"PRId64"\n"
788 " }%s\n",
789 node_record->processes[i].pid,
790 node_record->processes[i].useful_time,
791 node_record->processes[i].mpi_time,
792
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 i+1 < node_record->nelems ? "," : "");
793 }
794
795 6 fprintf(out_file,
796 " ],\n"
797 " \"nodeAvg\": {\n"
798 " \"usefulTime\": %"PRId64",\n"
799 " \"mpiTime\": %"PRId64"\n"
800 " },\n"
801 " \"nodeMax\": {\n"
802 " \"usefulTime\": %"PRId64",\n"
803 " \"mpiTime\": %"PRId64"\n"
804 " }\n"
805 " }%s\n",
806 node_record->avg_useful_time,
807 node_record->avg_mpi_time,
808 node_record->max_useful_time,
809 node_record->max_mpi_time,
810
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 node->next != NULL ? "," : "");
811 }
812 6 fprintf(out_file,
813 " ]"); /* no eol */
814 }
815
816 1 static void node_to_xml(FILE *out_file) {
817
818 1 for (GSList *node = node_records;
819
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
820 1 node = node->next) {
821
822 1 node_record_t *node_record = node->data;
823
824 1 fprintf(out_file,
825 " <node>\n"
826 " <id>%d</id>\n",
827 node_record->node_id);
828
829
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 for (int i = 0; i < node_record->nelems; ++i) {
830 2 fprintf(out_file,
831 " <process>\n"
832 " <id>%d</id>\n"
833 " <usefulTime>%"PRId64"</usefulTime>\n"
834 " <mpiTime>%"PRId64"</mpiTime>\n"
835 " </process>\n",
836 node_record->processes[i].pid,
837 node_record->processes[i].useful_time,
838 node_record->processes[i].mpi_time);
839 }
840
841 1 fprintf(out_file,
842 " <nodeAvg>\n"
843 " <usefulTime>%"PRId64"</usefulTime>\n"
844 " <mpiTime>%"PRId64"</mpiTime>\n"
845 " </nodeAvg>\n"
846 " <nodeMax>\n"
847 " <usefulTime>%"PRId64"</usefulTime>\n"
848 " <mpiTime>%"PRId64"</mpiTime>\n"
849 " </nodeMax>\n"
850 " </node>\n",
851 node_record->avg_useful_time,
852 node_record->avg_mpi_time,
853 node_record->max_useful_time,
854 node_record->max_mpi_time);
855 }
856 1 }
857
858 3 static void node_to_csv(FILE *out_file, bool append) {
859
860
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (node_records == NULL) return;
861
862
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (!append) {
863 /* Print header */
864 1 fprintf(out_file,
865 "NodeId,"
866 "ProcessId,"
867 "ProcessUsefulTime,"
868 "ProcessMPITime,"
869 "NodeAvgUsefulTime,"
870 "NodeAvgMPITime,"
871 "NodeMaxUsefulTime,"
872 "NodeMaxMPITime\n");
873 }
874
875 1 for (GSList *node = node_records;
876
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
877 1 node = node->next) {
878
879 1 node_record_t *node_record = node->data;
880
881
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 for (int i = 0; i < node_record->nelems; ++i) {
882 2 fprintf(out_file,
883 "%d," /* NodeId */
884 "%d," /* ProcessId */
885 "%"PRId64"," /* ProcessUsefulTime */
886 "%"PRId64"," /* ProcessMPITime */
887 "%"PRId64"," /* NodeAvgUsefulTime */
888 "%"PRId64"," /* NodeAvgMPITime*/
889 "%"PRId64"," /* NodeMaxUsefulTime */
890 "%"PRId64"\n", /* NodeMaxMPITime*/
891 node_record->node_id,
892 node_record->processes[i].pid,
893 node_record->processes[i].useful_time,
894 node_record->processes[i].mpi_time,
895 node_record->avg_useful_time,
896 node_record->avg_mpi_time,
897 node_record->max_useful_time,
898 node_record->max_mpi_time);
899
900 }
901 }
902 }
903
904 3 static void node_to_txt(FILE *out_file) {
905
906 3 for (GSList *node = node_records;
907
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 node != NULL;
908 3 node = node->next) {
909
910 3 node_record_t *node_record = node->data;
911
912 3 fprintf(out_file,
913 " |----------------------------------------------------------|\n"
914 " | Extended Report Node %4d |\n"
915 " |----------------------------------------------------------|\n"
916 " | Process | Useful Time | MPI Time |\n"
917 " |------------|----------------------|----------------------|\n",
918 node_record->node_id);
919
920
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 3 times.
9 for (int i = 0; i < node_record->nelems; ++i) {
921 6 fprintf(out_file,
922 " | %-10d | %18e s | %18e s |\n"
923 " |------------|----------------------|----------------------|\n",
924 node_record->processes[i].pid,
925 nsecs_to_secs(node_record->processes[i].useful_time),
926 nsecs_to_secs(node_record->processes[i].mpi_time));
927 }
928
929
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (node_record->nelems > 0) {
930 3 fprintf(out_file,
931 " |------------|----------------------|----------------------|\n"
932 " | %-10s | %18e s | %18e s |\n"
933 " |------------|----------------------|----------------------|\n"
934 " | %-10s | %18e s | %18e s |\n"
935 " |------------|----------------------|----------------------|\n",
936 "Node Avg",
937 nsecs_to_secs(node_record->avg_useful_time),
938 nsecs_to_secs(node_record->avg_mpi_time),
939 "Node Max",
940 nsecs_to_secs(node_record->max_useful_time),
941 nsecs_to_secs(node_record->max_mpi_time));
942 }
943 }
944 3 }
945
946 37 static void node_finalize(void) {
947
948 /* Free every record data */
949 37 for (GSList *node = node_records;
950
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 37 times.
49 node != NULL;
951 12 node = node->next) {
952
953 12 node_record_t *record = node->data;
954 12 free(record);
955 }
956
957 /* Free list */
958 37 g_slist_free(node_records);
959 37 node_records = NULL;
960 37 }
961
962
963 /*********************************************************************************/
964 /* Process */
965 /*********************************************************************************/
966
967 typedef struct region_record_t {
968 char name[DLB_MONITOR_NAME_MAX];
969 int num_mpi_ranks;
970 process_record_t process_records[];
971 } region_record_t;
972
973 static GSList *region_records = NULL;
974
975 14 void talp_output_record_process(const char *region_name,
976 const process_record_t *process_record, int num_mpi_ranks) {
977
978 14 region_record_t *region_record = NULL;
979
980 /* Find region or allocate new one */
981 14 for (GSList *node = region_records;
982
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 14 times.
15 node != NULL;
983 1 node = node->next) {
984
985 1 region_record_t *record = node->data;
986
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (strcmp(record->name, region_name) == 0) {
987 region_record = record;
988 break;
989 }
990 }
991
992 /* Allocate if not found */
993
1/2
✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
14 if (region_record == NULL) {
994 /* Allocate and initialize new region */
995 14 size_t region_record_size = sizeof(region_record_t) +
996 14 sizeof(process_record_t) * num_mpi_ranks;
997 14 region_record = malloc(region_record_size);
998 14 *region_record = (const region_record_t) {
999 .num_mpi_ranks = num_mpi_ranks,
1000 };
1001 14 snprintf(region_record->name, DLB_MONITOR_NAME_MAX, "%s",
1002 region_name);
1003
1004 /* Insert to list */
1005 14 region_records = g_slist_prepend(region_records, region_record);
1006 }
1007
1008 /* Copy process_record */
1009 14 int rank = process_record->rank;
1010
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14 times.
14 ensure(rank < num_mpi_ranks, "Wrong rank number in %s", __func__);
1011 14 memcpy(&region_record->process_records[rank], process_record, sizeof(process_record_t));
1012 14 }
1013
1014 24 static void process_print(void) {
1015
1016 24 for (GSList *node = region_records;
1017
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 24 times.
27 node != NULL;
1018 3 node = node->next) {
1019
1020 3 region_record_t *region_record = node->data;
1021
1022
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 for (int i = 0; i < region_record->num_mpi_ranks; ++i) {
1023
1024 3 process_record_t *process_record = &region_record->process_records[i];
1025
1026 3 info("%s", make_header("Monitoring Region Summary"));
1027 3 info("### Name: %s",
1028 3 region_record->name);
1029 3 info("### Process: %d (%s)",
1030 3 process_record->pid, process_record->hostname);
1031 3 info("### Rank: %d",
1032 process_record->rank);
1033 3 info("### CpuSet: %s",
1034 3 process_record->cpuset);
1035 3 info("### Elapsed time: %"PRId64" ns",
1036 process_record->monitor.elapsed_time);
1037 3 info("### Useful time: %"PRId64" ns",
1038 process_record->monitor.useful_time);
1039
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
3 if (process_record->monitor.mpi_time > 0) {
1040 1 info("### Not useful MPI: %"PRId64" ns",
1041 process_record->monitor.mpi_time);
1042 }
1043
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 if (process_record->monitor.mpi_worker_idle_time > 0) {
1044 info("### Not useful MPI in worker threads: %"PRId64" ns",
1045 process_record->monitor.mpi_worker_idle_time);
1046 }
1047
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (process_record->monitor.omp_load_imbalance_time > 0
1048
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 || process_record->monitor.omp_scheduling_time > 0
1049
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 || process_record->monitor.omp_serialization_time > 0) {
1050 info("### Not useful OMP Load Imbalance: %"PRId64" ns",
1051 process_record->monitor.omp_load_imbalance_time);
1052 info("### Not useful OMP Scheduling: %"PRId64" ns",
1053 process_record->monitor.omp_scheduling_time);
1054 info("### Not useful OMP Serialization: %"PRId64" ns",
1055 process_record->monitor.omp_serialization_time);
1056 }
1057
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 if (process_record->monitor.gpu_runtime_time > 0) {
1058 info("### Not useful GPU runtime: %"PRId64" ns",
1059 process_record->monitor.gpu_runtime_time);
1060 info("### Device useful time: %"PRId64" ns",
1061 process_record->monitor.gpu_useful_time);
1062 info("### Device communication time: %"PRId64" ns",
1063 process_record->monitor.gpu_communication_time);
1064 }
1065
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 if (process_record->monitor.instructions > 0
1066 && process_record->monitor.cycles > 0) {
1067 info("### IPC : %.2f",
1068 (float)process_record->monitor.instructions
1069 / process_record->monitor.cycles);
1070 }
1071 }
1072 }
1073 24 }
1074
1075 6 static void process_to_json(FILE *out_file) {
1076
1077
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 if (region_records == NULL) return;
1078
1079 /* If there are pop_metrics or node_metrics, append to the existing dictionary */
1080
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 if (pop_metrics_records != NULL
1081 || node_records != NULL) {
1082 6 fprintf(out_file,",\n");
1083 }
1084
1085 6 fprintf(out_file,
1086 " \"Process\": {\n");
1087
1088 6 for (GSList *node = region_records;
1089
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 node != NULL;
1090 6 node = node->next) {
1091
1092 6 region_record_t *region_record = node->data;
1093
1094 6 fprintf(out_file,
1095 " \"%s\": [\n",
1096 6 region_record->name);
1097
1098
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 for (int i = 0; i < region_record->num_mpi_ranks; ++i) {
1099
1100 6 process_record_t *process_record = &region_record->process_records[i];
1101
1102 6 fprintf(out_file,
1103 " {\n"
1104 " \"rank\": %d,\n"
1105 " \"pid\": %d,\n"
1106 " \"nodeId\": %d,\n"
1107 " \"hostname\": \"%s\",\n"
1108 " \"cpuset\": %s,\n"
1109 " \"numCpus\": %d,\n"
1110 " \"avgCpus\": %.1f,\n"
1111 " \"cycles\": %"PRId64",\n"
1112 " \"instructions\": %"PRId64",\n"
1113 " \"numMeasurements\": %d,\n"
1114 " \"numResets\": %d,\n"
1115 " \"numMpiCalls\": %"PRId64",\n"
1116 " \"numOmpParallels\": %"PRId64",\n"
1117 " \"numOmpTasks\": %"PRId64",\n"
1118 " \"numGpuRuntimeCalls\": %"PRId64",\n"
1119 " \"elapsedTime\": %"PRId64",\n"
1120 " \"usefulTime\": %"PRId64",\n"
1121 " \"mpiTime\": %"PRId64",\n"
1122 " \"mpiWorkerIdleTime\": %"PRId64",\n"
1123 " \"ompLoadImbalanceTime\": %"PRId64",\n"
1124 " \"ompSchedulingTime\": %"PRId64",\n"
1125 " \"ompSerializationTime\": %"PRId64",\n"
1126 " \"gpuRuntimeTime\": %"PRId64",\n"
1127 " \"gpuUsefulTime\": %"PRId64",\n"
1128 " \"gpuCommunicationTime\": %"PRId64"\n"
1129 " }%s\n",
1130 process_record->rank,
1131 process_record->pid,
1132 process_record->node_id,
1133 6 process_record->hostname,
1134 6 process_record->cpuset_quoted,
1135 process_record->monitor.num_cpus,
1136 6 process_record->monitor.avg_cpus,
1137 process_record->monitor.cycles,
1138 process_record->monitor.instructions,
1139 process_record->monitor.num_measurements,
1140 process_record->monitor.num_resets,
1141 process_record->monitor.num_mpi_calls,
1142 process_record->monitor.num_omp_parallels,
1143 process_record->monitor.num_omp_tasks,
1144 process_record->monitor.num_gpu_runtime_calls,
1145 process_record->monitor.elapsed_time,
1146 process_record->monitor.useful_time,
1147 process_record->monitor.mpi_time,
1148 process_record->monitor.mpi_worker_idle_time,
1149 process_record->monitor.omp_load_imbalance_time,
1150 process_record->monitor.omp_scheduling_time,
1151 process_record->monitor.omp_serialization_time,
1152 process_record->monitor.gpu_runtime_time,
1153 process_record->monitor.gpu_useful_time,
1154 process_record->monitor.gpu_communication_time,
1155
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 i + 1 < region_record->num_mpi_ranks ? "," : "");
1156 }
1157 6 fprintf(out_file,
1158 " ]%s\n",
1159
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 node->next != NULL ? "," : "");
1160 }
1161 6 fprintf(out_file,
1162 " }"); /* no eol */
1163 }
1164
1165 1 static void process_to_xml(FILE *out_file) {
1166
1167 1 for (GSList *node = region_records;
1168
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
1169 1 node = node->next) {
1170
1171 1 region_record_t *region_record = node->data;
1172
1173 1 fprintf(out_file,
1174 " <Process>\n"
1175 " <name>%s</name>\n",
1176 1 region_record->name);
1177
1178
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 for (int i = 0; i < region_record->num_mpi_ranks; ++i) {
1179
1180 1 process_record_t *process_record = &region_record->process_records[i];
1181
1182 1 fprintf(out_file,
1183 " <process>\n"
1184 " <rank>%d</rank>\n"
1185 " <pid>%d</pid>\n"
1186 " <nodeId>%d</nodeId>\n"
1187 " <hostname>%s</hostname>\n"
1188 " <cpuset>%s</cpuset>\n"
1189 " <numCpus>%d</numCpus>\n"
1190 " <avgCpus>%.1f</avgCpus>\n"
1191 " <cycles>%"PRId64"</cycles>\n"
1192 " <instructions>%"PRId64"</instructions>\n"
1193 " <numMeasurements>%d</numMeasurements>\n"
1194 " <numResets>%d</numResets>\n"
1195 " <numMpiCalls>%"PRId64"</numMpiCalls>\n"
1196 " <numOmpParallels>%"PRId64"</numOmpParallels>\n"
1197 " <numOmpTasks>%"PRId64"</numOmpTasks>\n"
1198 " <elapsedTime>%"PRId64"</elapsedTime>\n"
1199 " <usefulTime>%"PRId64"</usefulTime>\n"
1200 " <mpiTime>%"PRId64"</mpiTime>\n"
1201 " <ompLoadImbalanceTime>%"PRId64"</ompLoadImbalanceTime>\n"
1202 " <ompSchedulingTime>%"PRId64"</ompSchedulingTime>\n"
1203 " <ompSerializationTime>%"PRId64"</ompSerializationTime>\n"
1204 " </process>\n",
1205 process_record->rank,
1206 process_record->pid,
1207 process_record->node_id,
1208 1 process_record->hostname,
1209 1 process_record->cpuset_quoted,
1210 process_record->monitor.num_cpus,
1211 1 process_record->monitor.avg_cpus,
1212 process_record->monitor.cycles,
1213 process_record->monitor.instructions,
1214 process_record->monitor.num_measurements,
1215 process_record->monitor.num_resets,
1216 process_record->monitor.num_mpi_calls,
1217 process_record->monitor.num_omp_parallels,
1218 process_record->monitor.num_omp_tasks,
1219 process_record->monitor.elapsed_time,
1220 process_record->monitor.useful_time,
1221 process_record->monitor.mpi_time,
1222 process_record->monitor.omp_load_imbalance_time,
1223 process_record->monitor.omp_scheduling_time,
1224 process_record->monitor.omp_serialization_time);
1225 }
1226 1 fprintf(out_file,
1227 " </Process>\n");
1228 }
1229 1 }
1230
1231 3 static void process_to_csv(FILE *out_file, bool append) {
1232
1233
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (region_records == NULL) return;
1234
1235
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (!append) {
1236 /* Print header */
1237 1 fprintf(out_file,
1238 "Region,"
1239 "Rank,"
1240 "PID,"
1241 "NodeId,"
1242 "Hostname,"
1243 "CpuSet,"
1244 "NumCpus,"
1245 "AvgCpus,"
1246 "Cycles,"
1247 "Instructions,"
1248 "NumMeasurements,"
1249 "NumResets,"
1250 "NumMpiCalls,"
1251 "NumOmpParallels,"
1252 "NumOmpTasks,"
1253 "NumGpuCalls,"
1254 "ElapsedTime,"
1255 "UsefulTime,"
1256 "MPITime,"
1257 "MPIWorkerIdleTime,"
1258 "OMPLoadImbalance,"
1259 "OMPSchedulingTime,"
1260 "OMPSerializationTime,"
1261 "GPURuntimeTime,"
1262 "GPUUsefulTime,"
1263 "GPUCommunicationTime\n");
1264 }
1265
1266 1 for (GSList *node = region_records;
1267
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 node != NULL;
1268 1 node = node->next) {
1269
1270 1 region_record_t *region_record = node->data;
1271
1272
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 for (int i = 0; i < region_record->num_mpi_ranks; ++i) {
1273
1274 1 process_record_t *process_record = &region_record->process_records[i];
1275
1276 1 fprintf(out_file,
1277 "%s," /* Region */
1278 "%d," /* Rank */
1279 "%d," /* PID */
1280 "%d," /* NodeId */
1281 "%s," /* Hostname */
1282 "%s," /* CpuSet */
1283 "%d," /* NumCpus */
1284 "%.1f," /* AvgCpus */
1285 "%"PRId64"," /* Cycles */
1286 "%"PRId64"," /* Instructions */
1287 "%d," /* NumMeasurements */
1288 "%d," /* NumResets */
1289 "%"PRId64"," /* NumMpiCalls */
1290 "%"PRId64"," /* NumOmpParallels */
1291 "%"PRId64"," /* NumOmpTasks */
1292 "%"PRId64"," /* NumGpuCalls */
1293 "%"PRId64"," /* ElapsedTime */
1294 "%"PRId64"," /* UsefulTime */
1295 "%"PRId64"," /* MPITime */
1296 "%"PRId64"," /* MPIWorkerIdleTime */
1297 "%"PRId64"," /* OMPLoadImbalance */
1298 "%"PRId64"," /* OMPSchedulingTime */
1299 "%"PRId64"," /* OMPSerializationTime */
1300 "%"PRId64"," /* GPURuntimeTime */
1301 "%"PRId64"," /* GPUUsefulTime */
1302 "%"PRId64"\n", /* GPUCommunicationTime */
1303 1 region_record->name,
1304 process_record->rank,
1305 process_record->pid,
1306 process_record->node_id,
1307 1 process_record->hostname,
1308 1 process_record->cpuset_quoted,
1309 process_record->monitor.num_cpus,
1310 1 process_record->monitor.avg_cpus,
1311 process_record->monitor.cycles,
1312 process_record->monitor.instructions,
1313 process_record->monitor.num_measurements,
1314 process_record->monitor.num_resets,
1315 process_record->monitor.num_mpi_calls,
1316 process_record->monitor.num_omp_parallels,
1317 process_record->monitor.num_omp_tasks,
1318 process_record->monitor.num_gpu_runtime_calls,
1319 process_record->monitor.elapsed_time,
1320 process_record->monitor.useful_time,
1321 process_record->monitor.mpi_time,
1322 process_record->monitor.mpi_worker_idle_time,
1323 process_record->monitor.omp_load_imbalance_time,
1324 process_record->monitor.omp_scheduling_time,
1325 process_record->monitor.omp_serialization_time,
1326 process_record->monitor.gpu_runtime_time,
1327 process_record->monitor.gpu_useful_time,
1328 process_record->monitor.gpu_communication_time);
1329 }
1330 }
1331 }
1332
1333 3 static void process_to_txt(FILE *out_file) {
1334
1335 3 for (GSList *node = region_records;
1336
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 node != NULL;
1337 3 node = node->next) {
1338
1339 3 region_record_t *region_record = node->data;
1340
1341
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 for (int i = 0; i < region_record->num_mpi_ranks; ++i) {
1342
1343 3 process_record_t *process_record = &region_record->process_records[i];
1344
1345 6 float ipc = process_record->monitor.cycles > 0
1346 ? (float)process_record->monitor.instructions
1347 / process_record->monitor.cycles
1348
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 : 0.0f;
1349
1350 3 fprintf(out_file,
1351 "%s\n"
1352 "### Name: %s\n"
1353 "### Process: %d (%s)\n"
1354 "### Rank: %d\n"
1355 "### CpuSet: %s\n"
1356 "### Elapsed time: %"PRId64" ns\n"
1357 "### Useful time: %"PRId64" ns\n"
1358 "### Not useful MPI: %"PRId64" ns\n"
1359 "### Not useful MPI in worker threads: %"PRId64" ns\n"
1360 "### Not useful OMP Load Imbalance: %"PRId64" ns\n"
1361 "### Not useful OMP Scheduling: %"PRId64" ns\n"
1362 "### Not useful OMP Serialization: %"PRId64" ns\n"
1363 "### Not useful GPU runtime: %"PRId64" ns\n"
1364 "### Device useful time: %"PRId64" ns\n"
1365 "### Device communication time: %"PRId64" ns\n"
1366 "### IPC: %.2f\n",
1367 make_header("Monitoring Region Summary"),
1368 3 region_record->name,
1369 3 process_record->pid, process_record->hostname,
1370 process_record->rank,
1371 3 process_record->cpuset,
1372 process_record->monitor.elapsed_time,
1373 process_record->monitor.useful_time,
1374 process_record->monitor.mpi_time,
1375 process_record->monitor.mpi_worker_idle_time,
1376 process_record->monitor.omp_load_imbalance_time,
1377 process_record->monitor.omp_scheduling_time,
1378 process_record->monitor.omp_serialization_time,
1379 process_record->monitor.gpu_runtime_time,
1380 process_record->monitor.gpu_useful_time,
1381 process_record->monitor.gpu_communication_time,
1382 ipc);
1383 }
1384 }
1385 3 }
1386
1387 37 static void process_finalize(void) {
1388
1389 /* Free every record data */
1390 37 for (GSList *node = region_records;
1391
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 37 times.
51 node != NULL;
1392 14 node = node->next) {
1393
1394 14 region_record_t *record = node->data;
1395 14 free(record);
1396 }
1397
1398 /* Free list */
1399 37 g_slist_free(region_records);
1400 37 region_records = NULL;
1401 37 }
1402
1403
1404 /*********************************************************************************/
1405 /* TALP Common */
1406 /*********************************************************************************/
1407 typedef struct TALPCommonRecord {
1408 char *dlb_version; // Version X.Y.Z[-#-hash]
1409 char *time_of_creation; // ISO 8601 string
1410 } talp_common_record_t;
1411 static talp_common_record_t common_record;
1412
1413 37 static void talp_output_record_common(void) {
1414 /* Initialize structure */
1415 37 time_t now = time(NULL);
1416 37 common_record = (const talp_common_record_t) {
1417 .dlb_version = PACKAGE_VERSION,
1418 37 .time_of_creation = get_iso_8601_string(localtime(&now)),
1419 };
1420 37 }
1421
1422 6 static void common_to_json(FILE *out_file) {
1423 6 fprintf(out_file,
1424 " \"dlbVersion\": \"%s\",\n"
1425 " \"timestamp\": \"%s\",\n",
1426 common_record.dlb_version,
1427 common_record.time_of_creation);
1428 6 }
1429
1430 1 static void common_to_xml(FILE *out_file) {
1431
1432 1 fprintf(out_file,
1433 " <dlbVersion>%s</dlbVersion>\n"
1434 " <timestamp>%s</timestamp>\n",
1435 common_record.dlb_version,
1436 common_record.time_of_creation);
1437 1 }
1438
1439 3 static void common_to_txt(FILE *out_file) {
1440
1441 3 fprintf(out_file,
1442 "%s\n"
1443 "### DLB Version: %s\n"
1444 "### Timestamp: %s\n",
1445 make_header("TALP Common Data"),
1446 common_record.dlb_version,
1447 common_record.time_of_creation);
1448 3 }
1449
1450 37 static void common_finalize(void) {
1451 37 free(common_record.time_of_creation);
1452 37 }
1453
1454
1455
1456
1457 /*********************************************************************************/
1458 /* TALP Resources */
1459 /*********************************************************************************/
1460 typedef struct TALPResourcesRecord {
1461 unsigned int num_cpus;
1462 unsigned int num_nodes;
1463 unsigned int num_mpi_ranks;
1464 unsigned int num_gpus;
1465 } talp_resources_record_t;
1466 static talp_resources_record_t resources_record;
1467
1468 11 void talp_output_record_resources(int num_cpus, int num_nodes, int num_mpi_ranks,
1469 int num_gpus) {
1470
1471 11 resources_record = (const talp_resources_record_t) {
1472 11 .num_cpus = (unsigned int) num_cpus,
1473 11 .num_nodes = (unsigned int) num_nodes,
1474 11 .num_mpi_ranks = (unsigned int) num_mpi_ranks,
1475 11 .num_gpus = (unsigned int) num_gpus,
1476 };
1477 11 }
1478
1479 6 static void resources_to_json(FILE *out_file) {
1480 6 fprintf(out_file,
1481 " \"resources\": {\n"
1482 " \"numCpus\": %u,\n"
1483 " \"numNodes\": %u,\n"
1484 " \"numMpiRanks\": %u,\n"
1485 " \"numGpus\": %u\n"
1486 " },\n",
1487 resources_record.num_cpus,
1488 resources_record.num_nodes,
1489 resources_record.num_mpi_ranks,
1490 resources_record.num_gpus);
1491 6 }
1492
1493 1 static void resources_to_xml(FILE *out_file) {
1494
1495 1 fprintf(out_file,
1496 " <resources>\n"
1497 " <numCpus>%u</numCpus>\n"
1498 " <numNodes>%u</numNodes>\n"
1499 " <numMpiRanks>%u</numMpiRanks>\n"
1500 " </resources>",
1501 resources_record.num_cpus,
1502 resources_record.num_nodes,
1503 resources_record.num_mpi_ranks);
1504 1 }
1505
1506 3 static void resources_to_txt(FILE *out_file) {
1507
1508 3 fprintf(out_file,
1509 "%s\n"
1510 "### Number of CPUs: %u\n"
1511 "### Number of Nodes: %u\n"
1512 "### Number of MPI processes: %u\n"
1513 "### Number of GPUs: %u\n",
1514 make_header("TALP Resources"),
1515 resources_record.num_cpus,
1516 resources_record.num_nodes,
1517 resources_record.num_mpi_ranks,
1518 resources_record.num_gpus);
1519 3 }
1520
1521
1522 /*********************************************************************************/
1523 /* TALP Process info */
1524 /*********************************************************************************/
1525
1526 typedef struct talp_process_info_record_t {
1527 char hostname[HOST_NAME_MAX];
1528 pid_t pid;
1529 } talp_process_info_record_t;
1530
1531 static talp_process_info_record_t process_info_record = {0};
1532
1533 19 void talp_output_record_process_info(void) {
1534
1535 19 gethostname(process_info_record.hostname, HOST_NAME_MAX);
1536 19 process_info_record.pid = getpid();
1537 19 }
1538
1539 6 static void process_info_to_json(FILE *out_file) {
1540
1541
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 1 times.
6 if (process_info_record.pid != 0) {
1542 5 fprintf(out_file,
1543 " \"processInfo\": {\n"
1544 " \"hostname\": \"%s\",\n"
1545 " \"pid\": %d\n"
1546 " },\n",
1547 process_info_record.hostname,
1548 process_info_record.pid);
1549 }
1550 6 }
1551
1552 /*********************************************************************************/
1553 /* Helper functions */
1554 /*********************************************************************************/
1555
1556 6 static void json_header(FILE *out_file) {
1557 6 fprintf(out_file, "{\n");
1558 6 }
1559
1560 6 static void json_footer(FILE *out_file) {
1561 6 fprintf(out_file, "\n}\n");
1562 6 }
1563
1564 1 static void xml_header(FILE *out_file) {
1565 1 fprintf(out_file, "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"
1566 "<root>\n");
1567 1 }
1568
1569 1 static void xml_footer(FILE *out_file) {
1570 1 fprintf(out_file, "</root>\n");
1571 1 }
1572
1573
1574 /*********************************************************************************/
1575 /* Output directory/file logic */
1576 /*********************************************************************************/
1577
1578 // Helper: recursively create directories (mkdir -p equivalent)
1579 11 static int mkdir_p(const char *path, mode_t mode) {
1580 char tmp[PATH_MAX];
1581 11 char *p = NULL;
1582
1583 11 snprintf(tmp, sizeof(tmp), "%s", path);
1584 11 size_t len = strlen(tmp);
1585
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 11 times.
11 if (len == 0) return -1;
1586
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 11 times.
11 if (tmp[len - 1] == '/') {
1587 tmp[len - 1] = '\0';
1588 }
1589
1590
2/2
✓ Branch 0 taken 523 times.
✓ Branch 1 taken 11 times.
534 for (p = tmp + 1; *p; p++) {
1591
2/2
✓ Branch 0 taken 31 times.
✓ Branch 1 taken 492 times.
523 if (*p == '/') {
1592 31 *p = '\0';
1593
2/4
✓ Branch 1 taken 31 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✓ Branch 4 taken 31 times.
31 if (mkdir(tmp, mode) != 0 && errno != EEXIST) {
1594 return -1;
1595 }
1596 31 *p = '/';
1597 }
1598 }
1599
1600
4/4
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 1 times.
✓ Branch 4 taken 9 times.
11 if (mkdir(tmp, mode) != 0 && errno != EEXIST) {
1601 1 return -1;
1602 }
1603
1604 10 return 0;
1605 }
1606
1607 // open file for appending or writing, creating dirs as needed
1608 15 static FILE *open_file_with_dirs(const char *filename, bool *append) {
1609
2/2
✓ Branch 1 taken 4 times.
✓ Branch 2 taken 11 times.
15 if (access(filename, F_OK) == 0) {
1610 FILE *f;
1611
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 3 times.
4 if (append) {
1612 1 *append = true;
1613 1 f = fopen(filename, "a");
1614 } else {
1615 3 f = fopen(filename, "w");
1616 }
1617
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 3 times.
4 if (!f) {
1618 1 warning("Cannot open existing file %s: %s", filename, strerror(errno));
1619 }
1620 4 return f;
1621 } else {
1622 // Ensure parent directories exist
1623 char pathbuf[PATH_MAX];
1624 11 snprintf(pathbuf, sizeof(pathbuf), "%s", filename);
1625 11 char *dir = dirname(pathbuf);
1626
1627
2/2
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 10 times.
11 if (mkdir_p(dir, 0755) != 0) {
1628 1 warning("Cannot create directory %s: %s", dir, strerror(errno));
1629 1 return NULL;
1630 }
1631
1632
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 6 times.
10 if (append) {
1633 4 *append = false;
1634 }
1635 10 FILE *f = fopen(filename, "w");
1636
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
10 if (!f) {
1637 warning("Cannot create file %s: %s", filename, strerror(errno));
1638 }
1639 10 return f;
1640 }
1641 }
1642
1643
1644
1645 /*********************************************************************************/
1646 /* Finalize */
1647 /*********************************************************************************/
1648
1649 320 static bool check_coefficient(float coeffiecient) {
1650
2/4
✓ Branch 0 taken 320 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 320 times.
✗ Branch 3 not taken.
320 return 0.0f <= coeffiecient && coeffiecient <= 1.0;
1651 }
1652
1653 static void warn_negative_counters(void) {
1654 static bool warned_once = false;
1655 if (!warned_once) {
1656 warning("Some obtained PAPI counters contain negative values. Check your"
1657 " installation or report the error to %s", PACKAGE_BUGREPORT);
1658 warned_once = true;
1659 }
1660 }
1661
1662 static void warn_wrong_coefficient(void) {
1663 static bool warned_once = false;
1664 if (!warned_once) {
1665 warning("Some computed POP metric coefficient is not within the allowed"
1666 " range [0.0, 1.0]. If you think this is an unexpected value,"
1667 " please report the error to %s", PACKAGE_BUGREPORT);
1668 warned_once = true;
1669 }
1670 }
1671
1672 37 static void sanitize_records(void) {
1673
1674 /* pop_metrics_records:
1675 * - instructions and cycles need to be >= 0
1676 * - computed efficiencyes need to be [0.0, 1.0]
1677 */
1678 37 for (GSList *node = pop_metrics_records;
1679
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 37 times.
69 node != NULL;
1680 32 node = node->next) {
1681
1682 32 dlb_pop_metrics_t *record = node->data;
1683
1684
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 32 times.
32 if (record->cycles < 0) {
1685 record->cycles = 0.0;
1686 warn_negative_counters();
1687 }
1688
1689
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 32 times.
32 if (record->instructions < 0) {
1690 record->instructions = 0.0;
1691 warn_negative_counters();
1692 }
1693
1694
1/2
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
32 if (!check_coefficient(record->parallel_efficiency)
1695
1/2
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
32 || !check_coefficient(record->mpi_parallel_efficiency)
1696
1/2
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
32 || !check_coefficient(record->mpi_communication_efficiency)
1697
1/2
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
32 || !check_coefficient(record->mpi_load_balance)
1698
1/2
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
32 || !check_coefficient(record->mpi_load_balance_in)
1699
1/2
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
32 || !check_coefficient(record->mpi_load_balance_out)
1700
1/2
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
32 || !check_coefficient(record->omp_parallel_efficiency)
1701
1/2
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
32 || !check_coefficient(record->omp_load_balance)
1702
1/2
✓ Branch 1 taken 32 times.
✗ Branch 2 not taken.
32 || !check_coefficient(record->omp_scheduling_efficiency)
1703
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 32 times.
32 || !check_coefficient(record->omp_serialization_efficiency)) {
1704 warn_wrong_coefficient();
1705 }
1706 }
1707
1708 /* node_records: nothing to sanitize for now */
1709
1710 /* region_records: */
1711 37 for (GSList *node = region_records;
1712
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 37 times.
51 node != NULL;
1713 14 node = node->next) {
1714
1715 14 region_record_t *region_record = node->data;
1716
1717
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 14 times.
28 for (int i = 0; i < region_record->num_mpi_ranks; ++i) {
1718
1719 14 dlb_monitor_t *monitor = &region_record->process_records[i].monitor;
1720
1721
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14 times.
14 if (monitor->cycles < 0) {
1722 monitor->cycles = 0.0;
1723 warn_negative_counters();
1724 }
1725
1726
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14 times.
14 if (monitor->instructions < 0) {
1727 monitor->instructions = 0.0;
1728 warn_negative_counters();
1729 }
1730 }
1731 }
1732 37 }
1733
1734 /* Return an allocated string: base + "_%h_%p.partial" + extension */
1735 1 static char *build_partial_template(const char *filename) {
1736
1737 1 const char *dot = strrchr(filename, '.');
1738 1 size_t base_len = dot - filename;
1739 1 const char *ext = dot;
1740
1741
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 ensure(dot != NULL, "expected filename with extension %s. Please report bug.", __func__);
1742
1743 1 size_t len =
1744 base_len +
1745 1 strlen("_%h_%p.partial") +
1746 1 strlen(ext) +
1747 1;
1748
1749 1 char *out = malloc(len);
1750
1751 1 snprintf(out, len,
1752 "%.*s_%%h_%%p.partial%s",
1753 (int)base_len,
1754 filename,
1755 ext);
1756
1757 1 return out;
1758 }
1759
1760 /* Detect job ID across schedulers */
1761 2 static const char *get_job_id(void) {
1762
1763 2 const char *vars[] = {
1764 "SLURM_JOB_ID",
1765 "SLURM_JOBID",
1766 "FLUX_JOB_ID",
1767 "PBS_JOBID",
1768 "LSB_JOBID",
1769 "JOB_ID",
1770 NULL
1771 };
1772
1773
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 2 times.
14 for (int i = 0; vars[i]; i++) {
1774 12 const char *v = getenv(vars[i]);
1775
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 12 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
12 if (v && *v)
1776 return v;
1777 }
1778
1779 2 return NULL;
1780 }
1781
1782 /* Expands output file, e.g.: talp_%p.json -> talp_123.json */
1783 13 static char *expand_output_filename(const char *template)
1784 {
1785
2/2
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 2 times.
13 if (strchr(template, '%') == NULL) return NULL;
1786
1787 char hostname[HOST_NAME_MAX];
1788 2 gethostname(hostname, HOST_NAME_MAX);
1789
1790 2 pid_t pid = getpid();
1791 char pid_buf[32];
1792 2 snprintf(pid_buf, sizeof(pid_buf), "%d", pid);
1793
1794 2 const char *job = get_job_id();
1795
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 const char *job_str = job ? job : "0";
1796
1797 2 size_t hst_len = strlen(hostname);
1798 2 size_t pid_len = strlen(pid_buf);
1799 2 size_t job_len = strlen(job_str);
1800
1801 /* Compute output length */
1802
1803 2 size_t out_len = 0;
1804
1805
2/2
✓ Branch 0 taken 140 times.
✓ Branch 1 taken 2 times.
142 for (size_t i = 0; template[i]; i++) {
1806
3/4
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 136 times.
✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.
140 if (template[i] == '%' && template[i+1]) {
1807
2/5
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
4 switch (template[i+1]) {
1808 2 case 'h':
1809 2 out_len += hst_len;
1810 2 i++;
1811 2 continue;
1812 2 case 'p':
1813 2 out_len += pid_len;
1814 2 i++;
1815 2 continue;
1816 case 'j':
1817 out_len += job_len;
1818 i++;
1819 continue;
1820 case '%':
1821 out_len += 1;
1822 i++;
1823 continue;
1824 }
1825 }
1826 136 out_len += 1;
1827 }
1828
1829
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (out_len > PATH_MAX) return NULL;
1830
1831 /* Fill buffer */
1832
1833 2 char *output_filename = malloc(out_len + 1);
1834
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (!output_filename) return NULL;
1835
1836 2 char *out = output_filename;
1837
1838
2/2
✓ Branch 0 taken 140 times.
✓ Branch 1 taken 2 times.
142 for (size_t i = 0; template[i]; ++i) {
1839
3/4
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 136 times.
✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.
140 if (template[i] == '%' && template[i+1]) {
1840
2/5
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
4 switch (template[i+1]) {
1841 2 case 'h':
1842 2 memcpy(out, hostname, hst_len);
1843 2 out += hst_len;
1844 2 ++i;
1845 2 continue;
1846
1847 2 case 'p':
1848 2 memcpy(out, pid_buf, pid_len);
1849 2 out += pid_len;
1850 2 ++i;
1851 2 continue;
1852
1853 case 'j':
1854 memcpy(out, job_str, job_len);
1855 out += job_len;
1856 ++i;
1857 continue;
1858
1859 case '%':
1860 *out++ = '%';
1861 ++i;
1862 continue;
1863 }
1864 }
1865
1866 136 *out++ = template[i];
1867 }
1868
1869 2 *out = '\0';
1870
1871 2 return output_filename;
1872 }
1873
1874 37 void talp_output_finalize(const char *output_file, bool partial_output) {
1875
1876 /* For efficiency when adding records, they are prepended to their respective lists.
1877 * Then, they are reversed here to print them in alphabetical order. */
1878 37 pop_metrics_records = g_slist_reverse(pop_metrics_records);
1879 37 node_records = g_slist_reverse(node_records);
1880 37 region_records = g_slist_reverse(region_records);
1881
1882 /* Sanitize erroneous values */
1883 37 sanitize_records();
1884
1885 37 talp_output_record_common();
1886
1887 /* If the process has changed the locale, temporarily push the C locale to
1888 * print floats with the expected notation (a comma as a decimal separator
1889 * will break CSV and JSON files). The object associated with the locale
1890 * can be safely freed after it has been set. */
1891 37 locale_t new_locale = newlocale(LC_ALL, "C", 0);
1892 37 uselocale(new_locale);
1893 37 freelocale(new_locale);
1894
1895
2/2
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 13 times.
37 if (output_file == NULL) {
1896 /* No output file, just print all records */
1897 24 pop_metrics_print();
1898 24 node_print();
1899 24 process_print();
1900 } else {
1901 /* Do not open file if process has no data */
1902
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 13 times.
13 if (pop_metrics_records == NULL
1903 && node_records == NULL
1904 && region_records == NULL) return;
1905
1906 /* Check file extension */
1907 typedef enum Extension {
1908 EXT_JSON,
1909 EXT_XML,
1910 EXT_CSV,
1911 EXT_TXT,
1912 } extension_t;
1913 13 extension_t extension = EXT_TXT;
1914 13 const char *ext = strrchr(output_file, '.');
1915
2/2
✓ Branch 0 taken 12 times.
✓ Branch 1 taken 1 times.
13 if (ext != NULL) {
1916
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 if (strcmp(ext+1, "json") == 0) {
1917 6 extension = EXT_JSON;
1918
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 5 times.
6 } else if (strcmp(ext+1, "xml") == 0) {
1919 1 extension = EXT_XML;
1920
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 2 times.
5 } else if (strcmp(ext+1, "csv") == 0) {
1921 3 extension = EXT_CSV;
1922 }
1923 }
1924
1925 /* Deprecation warning*/
1926
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 12 times.
13 if(extension == EXT_XML){
1927 1 warning("Deprecated: The support for XML output is deprecated and"
1928 " will be removed in the next release");
1929 }
1930
1931 /* Flag check */
1932
3/4
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 6 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 7 times.
13 if (extension != EXT_JSON && partial_output) {
1933 warning("Option --talp-partial-output is only supported for JSON format."
1934 " Disabling option");
1935 partial_output = false;
1936 }
1937
1938 /* Obtain the real filename depending on whether output_file is a template,
1939 * or the user requested partial output */
1940 13 char *template = NULL;
1941 13 char *filename = NULL;
1942
1943
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 12 times.
13 if (partial_output) {
1944 1 template = build_partial_template(output_file);
1945 } else {
1946 12 template = strdup(output_file);
1947 }
1948
1949 13 filename = expand_output_filename(template);
1950
1951
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 11 times.
13 if (filename != NULL) {
1952 2 output_file = filename;
1953 }
1954
1955 13 free(template);
1956
1957 /* Specific case where output file needs to be split */
1958
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 10 times.
13 if (extension == EXT_CSV
1959 3 && !!(pop_metrics_records != NULL)
1960 3 + !!(node_records != NULL)
1961
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
4 + !!(region_records != NULL) > 1) {
1962
1963 /* Length without extension */
1964 1 int filename_useful_len = ext - output_file;
1965
1966 /* POP */
1967
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (pop_metrics_records != NULL) {
1968 1 const char *pop_ext = "-pop.csv";
1969 1 size_t pop_file_len = filename_useful_len + strlen(pop_ext) + 1;
1970 1 char *pop_filename = malloc(sizeof(char)*pop_file_len);
1971 1 sprintf(pop_filename, "%.*s%s", filename_useful_len, output_file, pop_ext);
1972 bool append_to_csv;
1973 1 FILE *pop_file = open_file_with_dirs(pop_filename, &append_to_csv);
1974
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (pop_file) {
1975 1 pop_metrics_to_csv(pop_file, append_to_csv);
1976 1 fclose(pop_file);
1977 } else {
1978 warning("Writing metrics to stdout instead:");
1979 pop_metrics_to_csv(stdout, /* append: */ false);
1980 }
1981 }
1982
1983 /* Node */
1984
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (node_records != NULL) {
1985 1 const char *node_ext = "-node.csv";
1986 1 size_t node_file_len = filename_useful_len + strlen(node_ext) + 1;
1987 1 char *node_filename = malloc(sizeof(char)*node_file_len);
1988 1 sprintf(node_filename, "%.*s%s", filename_useful_len, output_file, node_ext);
1989 bool append_to_csv;
1990 1 FILE *node_file = open_file_with_dirs(node_filename, &append_to_csv);
1991
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (node_file) {
1992 1 node_to_csv(node_file, append_to_csv);
1993 1 fclose(node_file);
1994 } else {
1995 warning("Writing metrics to stdout instead:");
1996 node_to_csv(stdout, /* append: */ false);
1997 }
1998 }
1999
2000 /* Process */
2001
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (region_records != NULL) {
2002 1 const char *process_ext = "-process.csv";
2003 1 size_t process_file_len = filename_useful_len + strlen(process_ext) + 1;
2004 1 char *process_filename = malloc(sizeof(char)*process_file_len);
2005 1 sprintf(process_filename, "%.*s%s", filename_useful_len, output_file, process_ext);
2006 bool append_to_csv;
2007 1 FILE *process_file = open_file_with_dirs(process_filename, &append_to_csv);
2008
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (process_file) {
2009 1 process_to_csv(process_file, append_to_csv);
2010 1 fclose(process_file);
2011 } else {
2012 warning("Writing metrics to stdout instead:");
2013 process_to_csv(stdout, /* append: */ false);
2014 }
2015 }
2016 }
2017
2018 /* Write to file */
2019 else {
2020 /* Open file */
2021 bool append_to_csv;
2022
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 10 times.
12 FILE *out_file = open_file_with_dirs(output_file,
2023 extension == EXT_CSV ? &append_to_csv : NULL);
2024
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 10 times.
12 if (!out_file) {
2025 2 warning("Writing metrics to stdout instead:");
2026 2 out_file = stdout;
2027 2 append_to_csv = false;
2028 }
2029
2030 /* Write records to file */
2031
4/5
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 3 times.
✗ Branch 4 not taken.
12 switch(extension) {
2032 6 case EXT_JSON:
2033 6 json_header(out_file);
2034 6 common_to_json(out_file);
2035 6 resources_to_json(out_file);
2036 6 process_info_to_json(out_file);
2037 6 pop_metrics_to_json(out_file);
2038 6 node_to_json(out_file);
2039 6 process_to_json(out_file);
2040 6 json_footer(out_file);
2041 6 break;
2042 1 case EXT_XML:
2043 1 xml_header(out_file);
2044 1 common_to_xml(out_file);
2045 1 resources_to_xml(out_file);
2046 1 pop_metrics_to_xml(out_file);
2047 1 node_to_xml(out_file);
2048 1 process_to_xml(out_file);
2049 1 xml_footer(out_file);
2050 1 break;
2051 2 case EXT_CSV:
2052 2 pop_metrics_to_csv(out_file, append_to_csv);
2053 2 node_to_csv(out_file, append_to_csv);
2054 2 process_to_csv(out_file, append_to_csv);
2055 2 break;
2056 3 case EXT_TXT:
2057 3 common_to_txt(out_file);
2058 3 resources_to_txt(out_file);
2059 3 pop_metrics_to_txt(out_file);
2060 3 node_to_txt(out_file);
2061 3 process_to_txt(out_file);
2062 3 break;
2063 }
2064
2065 /* Close file */
2066
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 2 times.
12 if (out_file != stdout) {
2067 10 fclose(out_file);
2068 }
2069 }
2070
2071
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 11 times.
13 if (filename != NULL) {
2072 2 free(filename);
2073 2 filename = NULL;
2074 }
2075 }
2076
2077 // Restore locale
2078 37 uselocale(LC_GLOBAL_LOCALE);
2079
2080 // De-allocate all records
2081 37 common_finalize();
2082 37 pop_metrics_finalize();
2083 37 node_finalize();
2084 37 process_finalize();
2085 }
2086