Line | Branch | Exec | Source |
---|---|---|---|
1 | /*********************************************************************************/ | ||
2 | /* Copyright 2009-2024 Barcelona Supercomputing Center */ | ||
3 | /* */ | ||
4 | /* This file is part of the DLB library. */ | ||
5 | /* */ | ||
6 | /* DLB is free software: you can redistribute it and/or modify */ | ||
7 | /* it under the terms of the GNU Lesser General Public License as published by */ | ||
8 | /* the Free Software Foundation, either version 3 of the License, or */ | ||
9 | /* (at your option) any later version. */ | ||
10 | /* */ | ||
11 | /* DLB is distributed in the hope that it will be useful, */ | ||
12 | /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ | ||
13 | /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ | ||
14 | /* GNU Lesser General Public License for more details. */ | ||
15 | /* */ | ||
16 | /* You should have received a copy of the GNU Lesser General Public License */ | ||
17 | /* along with DLB. If not, see <https://www.gnu.org/licenses/>. */ | ||
18 | /*********************************************************************************/ | ||
19 | |||
20 | #ifdef HAVE_CONFIG_H | ||
21 | #include <config.h> | ||
22 | #endif | ||
23 | |||
24 | #include "support/talp_output.h" | ||
25 | |||
26 | #include "apis/dlb_talp.h" | ||
27 | #include "LB_core/DLB_talp.h" | ||
28 | #include "support/debug.h" | ||
29 | #include "support/gslist.h" | ||
30 | #include "support/mytime.h" | ||
31 | #include "support/perf_metrics.h" | ||
32 | |||
33 | #include <errno.h> | ||
34 | #include <stdio.h> | ||
35 | #include <stdlib.h> | ||
36 | #include <string.h> | ||
37 | #include <limits.h> | ||
38 | #include <locale.h> | ||
39 | #include <pthread.h> | ||
40 | |||
41 | |||
42 | 9 | static float sanitized_ipc(float instructions, float cycles) { | |
43 |
3/4✓ Branch 0 taken 1 times.
✓ Branch 1 taken 8 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
|
9 | if (instructions > 0 && cycles > 0) { |
44 | 1 | return instructions / cycles; | |
45 | } else { | ||
46 | 8 | return 0.0f; | |
47 | } | ||
48 | } | ||
49 | |||
50 | |||
51 | /*********************************************************************************/ | ||
52 | /* Monitoring Region */ | ||
53 | /*********************************************************************************/ | ||
54 | |||
55 | 7 | void talp_output_print_monitoring_region(const dlb_monitor_t *monitor, | |
56 | const char *cpuset_str, bool have_mpi, bool have_openmp, bool have_papi) { | ||
57 | |||
58 | char elapsed_time_str[16]; | ||
59 | 7 | ns_to_human(elapsed_time_str, 16, monitor->elapsed_time); | |
60 | |||
61 | 7 | info("################# Monitoring Region Summary #################"); | |
62 | 7 | info("### Name: %s", monitor->name); | |
63 | 7 | info("### Elapsed Time: %s", elapsed_time_str); | |
64 | 7 | info("### Useful time: %"PRId64" ns", | |
65 | 7 | monitor->useful_time); | |
66 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 4 times.
|
7 | if (have_mpi) { |
67 | 3 | info("### Not useful MPI: %"PRId64" ns", | |
68 | 3 | monitor->mpi_time); | |
69 | } | ||
70 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.
|
7 | if (have_openmp) { |
71 | ✗ | info("### Not useful OMP Load Balance: %"PRId64" ns", | |
72 | ✗ | monitor->omp_load_imbalance_time); | |
73 | ✗ | info("### Not useful OMP Scheduling: %"PRId64" ns", | |
74 | ✗ | monitor->omp_scheduling_time); | |
75 | ✗ | info("### Not useful OMP Serialization: %"PRId64" ns", | |
76 | ✗ | monitor->omp_serialization_time); | |
77 | } | ||
78 | 7 | info("### CpuSet: %s", cpuset_str); | |
79 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.
|
7 | if (have_papi) { |
80 | ✗ | float ipc = sanitized_ipc(monitor->instructions, monitor->cycles); | |
81 | ✗ | info("### IPC: %.2f ", ipc); | |
82 | } | ||
83 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 4 times.
|
7 | if (have_mpi) { |
84 | 3 | info("### Number of MPI calls: %"PRId64, | |
85 | 3 | monitor->num_mpi_calls); | |
86 | } | ||
87 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.
|
7 | if (have_openmp) { |
88 | ✗ | info("### Number of OpenMP parallels: %"PRId64, | |
89 | ✗ | monitor->num_omp_parallels); | |
90 | ✗ | info("### Number of OpenMP tasks: %"PRId64, | |
91 | ✗ | monitor->num_omp_tasks); | |
92 | } | ||
93 | 7 | } | |
94 | |||
95 | |||
96 | /*********************************************************************************/ | ||
97 | /* POP Metrics */ | ||
98 | /*********************************************************************************/ | ||
99 | |||
100 | static GSList *pop_metrics_records = NULL; | ||
101 | |||
102 | 20 | void talp_output_record_pop_metrics(const dlb_pop_metrics_t *metrics) { | |
103 | |||
104 | /* Copy structure */ | ||
105 | 20 | dlb_pop_metrics_t *new_record = malloc(sizeof(dlb_pop_metrics_t)); | |
106 | 20 | *new_record = *metrics; | |
107 | |||
108 | /* Add record to list */ | ||
109 | 20 | pop_metrics_records = g_slist_prepend(pop_metrics_records, new_record); | |
110 | 20 | } | |
111 | |||
112 | 14 | static void pop_metrics_print(void) { | |
113 | |||
114 | 14 | for (GSList *node = pop_metrics_records; | |
115 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 14 times.
|
28 | node != NULL; |
116 | 14 | node = node->next) { | |
117 | |||
118 | 14 | dlb_pop_metrics_t *record = node->data; | |
119 | |||
120 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 5 times.
|
14 | if (record->elapsed_time > 0) { |
121 | |||
122 | 9 | float avg_ipc = sanitized_ipc(record->instructions, record->cycles); | |
123 | char elapsed_time_str[16]; | ||
124 | 9 | ns_to_human(elapsed_time_str, 16, record->elapsed_time); | |
125 | 9 | info("############### Monitoring Region POP Metrics ###############"); | |
126 | 9 | info("### Name: %s", record->name); | |
127 | 9 | info("### Elapsed Time: %s", elapsed_time_str); | |
128 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 8 times.
|
9 | if (avg_ipc > 0.0f) { |
129 | 1 | info("### Average IPC: %1.2f", avg_ipc); | |
130 | } | ||
131 |
1/2✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
|
9 | if (record->mpi_parallel_efficiency > 0.0f && |
132 |
1/2✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
|
9 | record->omp_parallel_efficiency > 0.0f) { |
133 | 9 | info("### Parallel efficiency: %1.2f", | |
134 | 9 | record->parallel_efficiency); | |
135 | } | ||
136 |
1/2✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
|
9 | if (record->mpi_parallel_efficiency > 0.0f) { |
137 | 9 | info("### MPI Parallel efficiency: %1.2f", | |
138 | 9 | record->mpi_parallel_efficiency); | |
139 | 9 | info("### - MPI Communication efficiency: %1.2f", | |
140 | 9 | record->mpi_communication_efficiency); | |
141 | 9 | info("### - MPI Load Balance: %1.2f", | |
142 | 9 | record->mpi_load_balance); | |
143 | 9 | info("### - MPI Load Balance in: %1.2f", | |
144 | 9 | record->mpi_load_balance_in); | |
145 | 9 | info("### - MPI Load Balance out: %1.2f", | |
146 | 9 | record->mpi_load_balance_out); | |
147 | } | ||
148 |
1/2✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
|
9 | if (record->omp_parallel_efficiency > 0.0f) { |
149 | 9 | info("### OpenMP Parallel efficiency: %1.2f", | |
150 | 9 | record->omp_parallel_efficiency); | |
151 | 9 | info("### - OpenMP Load Balance: %1.2f", | |
152 | 9 | record->omp_load_balance); | |
153 | 9 | info("### - OpenMP Scheduling efficiency: %1.2f", | |
154 | 9 | record->omp_scheduling_efficiency); | |
155 | 9 | info("### - OpenMP Serialization efficiency: %1.2f", | |
156 | 9 | record->omp_serialization_efficiency); | |
157 | } | ||
158 | } else { | ||
159 | 5 | info("############### Monitoring Region POP Metrics ###############"); | |
160 | 5 | info("### Name: %s", record->name); | |
161 | 5 | info("### No data ###"); | |
162 | } | ||
163 | } | ||
164 | 14 | } | |
165 | |||
166 | 1 | static void pop_metrics_to_json(FILE *out_file) { | |
167 | |||
168 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (pop_metrics_records != NULL) { |
169 | 1 | fprintf(out_file, | |
170 | " \"Application\": {\n"); | ||
171 | |||
172 | 1 | for (GSList *node = pop_metrics_records; | |
173 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | node != NULL; |
174 | 1 | node = node->next) { | |
175 | |||
176 | 1 | dlb_pop_metrics_t *record = node->data; | |
177 | |||
178 | 1 | fprintf(out_file, | |
179 | " \"%s\": {\n" | ||
180 | " \"numCpus\": %d,\n" | ||
181 | " \"numMpiRanks\": %d,\n" | ||
182 | " \"numNodes\": %d,\n" | ||
183 | " \"avgCpus\": %.1f,\n" | ||
184 | " \"cycles\": %.0f,\n" | ||
185 | " \"instructions\": %.0f,\n" | ||
186 | " \"numMeasurements\": %"PRId64",\n" | ||
187 | " \"numMpiCalls\": %"PRId64",\n" | ||
188 | " \"numOmpParallels\": %"PRId64",\n" | ||
189 | " \"numOmpTasks\": %"PRId64",\n" | ||
190 | " \"elapsedTime\": %"PRId64",\n" | ||
191 | " \"usefulTime\": %"PRId64",\n" | ||
192 | " \"mpiTime\": %"PRId64",\n" | ||
193 | " \"ompLoadImbalanceTime\": %"PRId64",\n" | ||
194 | " \"ompSchedulingTime\": %"PRId64",\n" | ||
195 | " \"ompSerializationTime\": %"PRId64",\n" | ||
196 | " \"usefulNormdApp\": %.0f,\n" | ||
197 | " \"mpiNormdApp\": %.0f,\n" | ||
198 | " \"maxUsefulNormdProc\": %.0f,\n" | ||
199 | " \"maxUsefulNormdNode\": %.0f,\n" | ||
200 | " \"mpiNormdOfMaxUseful\": %.0f,\n" | ||
201 | " \"parallelEfficiency\": %.2f,\n" | ||
202 | " \"mpiParallelEfficiency\": %.2f,\n" | ||
203 | " \"mpiCommunicationEfficiency\": %.2f,\n" | ||
204 | " \"mpiLoadBalance\": %.2f,\n" | ||
205 | " \"mpiLoadBalanceIn\": %.2f,\n" | ||
206 | " \"mpiLoadBalanceOut\": %.2f,\n" | ||
207 | " \"ompParallelEfficiency\": %.2f,\n" | ||
208 | " \"ompLoadBalance\": %.2f,\n" | ||
209 | " \"ompSchedulingEfficiency\": %.2f,\n" | ||
210 | " \"ompSerializationEfficiency\": %.2f\n" | ||
211 | " }%s\n", | ||
212 | 1 | record->name, | |
213 | record->num_cpus, | ||
214 | record->num_mpi_ranks, | ||
215 | record->num_nodes, | ||
216 | 1 | record->avg_cpus, | |
217 | record->cycles, | ||
218 | record->instructions, | ||
219 | record->num_measurements, | ||
220 | record->num_mpi_calls, | ||
221 | record->num_omp_parallels, | ||
222 | record->num_omp_tasks, | ||
223 | record->elapsed_time, | ||
224 | record->useful_time, | ||
225 | record->mpi_time, | ||
226 | record->omp_load_imbalance_time, | ||
227 | record->omp_scheduling_time, | ||
228 | record->omp_serialization_time, | ||
229 | record->useful_normd_app, | ||
230 | record->mpi_normd_app, | ||
231 | record->max_useful_normd_proc, | ||
232 | record->max_useful_normd_node, | ||
233 | record->mpi_normd_of_max_useful, | ||
234 | 1 | record->parallel_efficiency, | |
235 | 1 | record->mpi_parallel_efficiency, | |
236 | 1 | record->mpi_communication_efficiency, | |
237 | 1 | record->mpi_load_balance, | |
238 | 1 | record->mpi_load_balance_in, | |
239 | 1 | record->mpi_load_balance_out, | |
240 | 1 | record->omp_parallel_efficiency, | |
241 | 1 | record->omp_load_balance, | |
242 | 1 | record->omp_scheduling_efficiency, | |
243 | 1 | record->omp_serialization_efficiency, | |
244 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | node->next != NULL ? "," : ""); |
245 | } | ||
246 | 1 | fprintf(out_file, | |
247 | " }"); /* no eol */ | ||
248 | } | ||
249 | 1 | } | |
250 | |||
251 | 1 | static void pop_metrics_to_xml(FILE *out_file) { | |
252 | |||
253 | 1 | for (GSList *node = pop_metrics_records; | |
254 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | node != NULL; |
255 | 1 | node = node->next) { | |
256 | |||
257 | 1 | dlb_pop_metrics_t *record = node->data; | |
258 | |||
259 | 1 | fprintf(out_file, | |
260 | " <Application>\n" | ||
261 | " <name>%s</name>\n" | ||
262 | " <numCpus>%d</numCpus>\n" | ||
263 | " <numMpiRanks>%d</numMpiRanks>\n" | ||
264 | " <numNodes>%d</numNodes>\n" | ||
265 | " <avgCpus>%.1f</avgCpus>\n" | ||
266 | " <cycles>%.0f</cycles>\n" | ||
267 | " <instructions>%.0f</instructions>\n" | ||
268 | " <numMeasurements>%"PRId64"</numMeasurements>\n" | ||
269 | " <numMpiCalls>%"PRId64"</numMpiCalls>\n" | ||
270 | " <numOmpParallels>%"PRId64"</numOmpParallels>\n" | ||
271 | " <numOmpTasks>%"PRId64"</numOmpTasks>\n" | ||
272 | " <elapsedTime>%"PRId64"</elapsedTime>\n" | ||
273 | " <usefulTime>%"PRId64"</usefulTime>\n" | ||
274 | " <mpiTime>%"PRId64"</mpiTime>\n" | ||
275 | " <ompLoadImbalanceTime>%"PRId64"</ompLoadImbalanceTime>\n" | ||
276 | " <ompSchedulingTime>%"PRId64"</ompSchedulingTime>\n" | ||
277 | " <ompSerializationTime>%"PRId64"</ompSerializationTime>\n" | ||
278 | " <usefulNormdApp>%.0f</usefulNormdApp>\n" | ||
279 | " <mpiNormdApp>%.0f</mpiNormdApp>\n" | ||
280 | " <maxUsefulNormdProc>%.0f</maxUsefulNormdProc>\n" | ||
281 | " <maxUsefulNormdNode>%.0f</maxUsefulNormdNode>\n" | ||
282 | " <mpiNormdOfMaxUseful>%.0f</mpiNormdOfMaxUseful>\n" | ||
283 | " <parallelEfficiency>%.2f</parallelEfficiency>\n" | ||
284 | " <mpiParallelEfficiency>%.2f</mpiParallelEfficiency>\n" | ||
285 | " <mpiCommunicationEfficiency>%.2f</mpiCommunicationEfficiency>\n" | ||
286 | " <mpiLoadBalance>%.2f</mpiLoadBalance>\n" | ||
287 | " <mpiLoadBalanceIn>%.2f</mpiLoadBalanceIn>\n" | ||
288 | " <mpiLoadBalanceOut>%.2f</mpiLoadBalanceOut>\n" | ||
289 | " <ompParallelEfficiency>%.2f</ompParallelEfficiency>\n" | ||
290 | " <ompLoadBalance>%.2f</ompLoadBalance>\n" | ||
291 | " <ompSchedulingEfficiency>%.2f</ompSchedulingEfficiency>\n" | ||
292 | " <ompSerializationEfficiency>%.2f</ompSerializationEfficiency>\n" | ||
293 | " </Application>\n", | ||
294 | 1 | record->name, | |
295 | record->num_cpus, | ||
296 | record->num_mpi_ranks, | ||
297 | record->num_nodes, | ||
298 | 1 | record->avg_cpus, | |
299 | record->cycles, | ||
300 | record->instructions, | ||
301 | record->num_measurements, | ||
302 | record->num_mpi_calls, | ||
303 | record->num_omp_parallels, | ||
304 | record->num_omp_tasks, | ||
305 | record->elapsed_time, | ||
306 | record->useful_time, | ||
307 | record->mpi_time, | ||
308 | record->omp_load_imbalance_time, | ||
309 | record->omp_scheduling_time, | ||
310 | record->omp_serialization_time, | ||
311 | record->useful_normd_app, | ||
312 | record->mpi_normd_app, | ||
313 | record->max_useful_normd_proc, | ||
314 | record->max_useful_normd_node, | ||
315 | record->mpi_normd_of_max_useful, | ||
316 | 1 | record->parallel_efficiency, | |
317 | 1 | record->mpi_parallel_efficiency, | |
318 | 1 | record->mpi_communication_efficiency, | |
319 | 1 | record->mpi_load_balance, | |
320 | 1 | record->mpi_load_balance_in, | |
321 | 1 | record->mpi_load_balance_out, | |
322 | 1 | record->omp_parallel_efficiency, | |
323 | 1 | record->omp_load_balance, | |
324 | 1 | record->omp_scheduling_efficiency, | |
325 | 1 | record->omp_serialization_efficiency | |
326 | ); | ||
327 | } | ||
328 | 1 | } | |
329 | |||
330 | 1 | static void pop_metrics_to_txt(FILE *out_file) { | |
331 | |||
332 | 1 | for (GSList *node = pop_metrics_records; | |
333 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | node != NULL; |
334 | 1 | node = node->next) { | |
335 | |||
336 | 1 | dlb_pop_metrics_t *record = node->data; | |
337 | |||
338 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (record->elapsed_time > 0) { |
339 | 1 | fprintf(out_file, | |
340 | "############### Monitoring Region POP Metrics ###############\n" | ||
341 | "### Name: %s\n" | ||
342 | "### Number of CPUs: %d\n" | ||
343 | "### Number of MPI processes: %d\n" | ||
344 | "### Number of nodes: %d\n" | ||
345 | "### Average CPUs: %.1f\n" | ||
346 | "### Cycles: %.0f\n" | ||
347 | "### Instructions: %.0f\n" | ||
348 | "### Number of measurements: %"PRId64"\n" | ||
349 | "### Number of MPI calls: %"PRId64"\n" | ||
350 | "### Number of OpenMP parallel regions: %"PRId64"\n" | ||
351 | "### Number of OpenMP explicit tasks: %"PRId64"\n" | ||
352 | "### Elapsed Time (ns): %"PRId64"\n" | ||
353 | "### Useful Time (ns): %"PRId64"\n" | ||
354 | "### MPI Time (ns): %"PRId64"\n" | ||
355 | "### OpenMP Load Imbalance Time (ns): %"PRId64"\n" | ||
356 | "### OpenMP Scheduling Time (ns): %"PRId64"\n" | ||
357 | "### OpenMP Serialization Time (ns): %"PRId64"\n" | ||
358 | "### Useful Time normalized to App: %.0f\n" | ||
359 | "### MPI Time normalized to App: %.0f\n" | ||
360 | "### Maximum useful time across processes: %.0f\n" | ||
361 | "### Maximum useful time across nodes: %.0f\n" | ||
362 | "### MPI time normalized at process level of\n" | ||
363 | "### the process with the max useful time: %.0f\n" | ||
364 | "### Parallel efficiency: %.2f\n" | ||
365 | "### MPI Parallel efficiency: %.2f\n" | ||
366 | "### - MPI Communication efficiency: %.2f\n" | ||
367 | "### - MPI Load Balance: %.2f\n" | ||
368 | "### - MPI Load Balance in: %.2f\n" | ||
369 | "### - MPI Load Balance out: %.2f\n" | ||
370 | "### OpenMP Parallel efficiency: %.2f\n" | ||
371 | "### - OpenMP Load Balance: %.2f\n" | ||
372 | "### - OpenMP Scheduling efficiency: %.2f\n" | ||
373 | "### - OpenMP Serialization efficiency: %.2f\n", | ||
374 | 1 | record->name, | |
375 | record->num_cpus, | ||
376 | record->num_mpi_ranks, | ||
377 | record->num_nodes, | ||
378 | 1 | record->avg_cpus, | |
379 | record->cycles, | ||
380 | record->instructions, | ||
381 | record->num_measurements, | ||
382 | record->num_mpi_calls, | ||
383 | record->num_omp_parallels, | ||
384 | record->num_omp_tasks, | ||
385 | record->elapsed_time, | ||
386 | record->useful_time, | ||
387 | record->mpi_time, | ||
388 | record->omp_load_imbalance_time, | ||
389 | record->omp_scheduling_time, | ||
390 | record->omp_serialization_time, | ||
391 | record->useful_normd_app, | ||
392 | record->mpi_normd_app, | ||
393 | record->max_useful_normd_proc, | ||
394 | record->max_useful_normd_node, | ||
395 | record->mpi_normd_of_max_useful, | ||
396 | 1 | record->parallel_efficiency, | |
397 | 1 | record->mpi_parallel_efficiency, | |
398 | 1 | record->mpi_communication_efficiency, | |
399 | 1 | record->mpi_load_balance, | |
400 | 1 | record->mpi_load_balance_in, | |
401 | 1 | record->mpi_load_balance_out, | |
402 | 1 | record->omp_parallel_efficiency, | |
403 | 1 | record->omp_load_balance, | |
404 | 1 | record->omp_scheduling_efficiency, | |
405 | 1 | record->omp_serialization_efficiency | |
406 | ); | ||
407 | } else { | ||
408 | ✗ | fprintf(out_file, | |
409 | "############### Monitoring Region POP Metrics ###############\n" | ||
410 | "### Name: %s\n" | ||
411 | "### No data ###\n", | ||
412 | ✗ | record->name); | |
413 | } | ||
414 | } | ||
415 | 1 | } | |
416 | |||
417 | 3 | static void pop_metrics_to_csv(FILE *out_file, bool append) { | |
418 | |||
419 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | if (pop_metrics_records == NULL) return; |
420 | |||
421 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | if (!append) { |
422 | /* Print header */ | ||
423 | 2 | fprintf(out_file, | |
424 | "name," | ||
425 | "numCpus," | ||
426 | "numMpiRanks," | ||
427 | "numNodes," | ||
428 | "avgCpus," | ||
429 | "cycles," | ||
430 | "instructions," | ||
431 | "numMeasurements," | ||
432 | "numMpiCalls," | ||
433 | "numOmpParallels," | ||
434 | "numOmpTasks," | ||
435 | "elapsedTime," | ||
436 | "usefulTime," | ||
437 | "mpiTime," | ||
438 | "ompLoadImbalanceTime," | ||
439 | "ompSchedulingTime," | ||
440 | "ompSerializationTime," | ||
441 | "usefulNormdApp," | ||
442 | "mpiNormdApp," | ||
443 | "maxUsefulNormdProc," | ||
444 | "maxUsefulNormdNode," | ||
445 | "mpiNormdOfMaxUseful," | ||
446 | "parallelEfficiency," | ||
447 | "mpiParallelEfficiency," | ||
448 | "mpiCommunicationEfficiency," | ||
449 | "mpiLoadBalance," | ||
450 | "mpiLoadBalanceIn," | ||
451 | "mpiLoadBalanceOut," | ||
452 | "ompParallelEfficiency," | ||
453 | "ompLoadBalance," | ||
454 | "ompSchedulingEfficiency," | ||
455 | "ompSerializationEfficiency\n" | ||
456 | ); | ||
457 | } | ||
458 | |||
459 | 3 | for (GSList *node = pop_metrics_records; | |
460 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
|
6 | node != NULL; |
461 | 3 | node = node->next) { | |
462 | |||
463 | 3 | dlb_pop_metrics_t *record = node->data; | |
464 | |||
465 | 3 | fprintf(out_file, | |
466 | "\"%s\"," /* name */ | ||
467 | "%d," /* numCpus */ | ||
468 | "%d," /* numMpiRanks */ | ||
469 | "%d," /* numNodes */ | ||
470 | "%.1f," /* avgCpus */ | ||
471 | "%.0f," /* cycles */ | ||
472 | "%.0f," /* instructions */ | ||
473 | "%"PRId64"," /* numMeasurements */ | ||
474 | "%"PRId64"," /* numMpiRanks */ | ||
475 | "%"PRId64"," /* numOmpParallels */ | ||
476 | "%"PRId64"," /* numOmpTasks */ | ||
477 | "%"PRId64"," /* elapsedTime */ | ||
478 | "%"PRId64"," /* usefulTime */ | ||
479 | "%"PRId64"," /* mpiTime */ | ||
480 | "%"PRId64"," /* ompLoadImbalanceTime */ | ||
481 | "%"PRId64"," /* ompSchedulingTime */ | ||
482 | "%"PRId64"," /* ompSerializationTime */ | ||
483 | "%.0f," /* usefulNormdApp */ | ||
484 | "%.0f," /* mpiNormdApp */ | ||
485 | "%.0f," /* maxUsefulNormdProc */ | ||
486 | "%.0f," /* maxUsefulNormdNode */ | ||
487 | "%.0f," /* mpiNormdOfMaxUseful */ | ||
488 | "%.2f," /* parallelEfficiency */ | ||
489 | "%.2f," /* mpiParallelEfficiency */ | ||
490 | "%.2f," /* mpiCommunicationEfficiency */ | ||
491 | "%.2f," /* mpiLoadBalance */ | ||
492 | "%.2f," /* mpiLoadBalanceIn */ | ||
493 | "%.2f," /* mpiLoadBalanceOut */ | ||
494 | "%.2f," /* ompParallelEfficiency */ | ||
495 | "%.2f," /* ompLoadBalance */ | ||
496 | "%.2f," /* ompSchedulingEfficiency */ | ||
497 | "%.2f\n", /* ompSerializationEfficiency */ | ||
498 | 3 | record->name, | |
499 | record->num_cpus, | ||
500 | record->num_mpi_ranks, | ||
501 | record->num_nodes, | ||
502 | 3 | record->avg_cpus, | |
503 | record->cycles, | ||
504 | record->instructions, | ||
505 | record->num_measurements, | ||
506 | record->num_mpi_calls, | ||
507 | record->num_omp_parallels, | ||
508 | record->num_omp_tasks, | ||
509 | record->elapsed_time, | ||
510 | record->useful_time, | ||
511 | record->mpi_time, | ||
512 | record->omp_load_imbalance_time, | ||
513 | record->omp_scheduling_time, | ||
514 | record->omp_serialization_time, | ||
515 | record->useful_normd_app, | ||
516 | record->mpi_normd_app, | ||
517 | record->max_useful_normd_proc, | ||
518 | record->max_useful_normd_node, | ||
519 | record->mpi_normd_of_max_useful, | ||
520 | 3 | record->parallel_efficiency, | |
521 | 3 | record->mpi_parallel_efficiency, | |
522 | 3 | record->mpi_communication_efficiency, | |
523 | 3 | record->mpi_load_balance, | |
524 | 3 | record->mpi_load_balance_in, | |
525 | 3 | record->mpi_load_balance_out, | |
526 | 3 | record->omp_parallel_efficiency, | |
527 | 3 | record->omp_load_balance, | |
528 | 3 | record->omp_scheduling_efficiency, | |
529 | 3 | record->omp_serialization_efficiency | |
530 | ); | ||
531 | } | ||
532 | } | ||
533 | |||
534 | 20 | static void pop_metrics_finalize(void) { | |
535 | |||
536 | /* Free every record data */ | ||
537 | 20 | for (GSList *node = pop_metrics_records; | |
538 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 20 times.
|
40 | node != NULL; |
539 | 20 | node = node->next) { | |
540 | |||
541 | 20 | dlb_pop_metrics_t *record = node->data; | |
542 | 20 | free(record); | |
543 | } | ||
544 | |||
545 | /* Free list */ | ||
546 | 20 | g_slist_free(pop_metrics_records); | |
547 | 20 | pop_metrics_records = NULL; | |
548 | 20 | } | |
549 | |||
550 | |||
551 | /*********************************************************************************/ | ||
552 | /* Node */ | ||
553 | /*********************************************************************************/ | ||
554 | |||
555 | static GSList *node_records = NULL; | ||
556 | |||
557 | 5 | void talp_output_record_node(const node_record_t *node_record) { | |
558 | |||
559 | 5 | int nelems = node_record->nelems; | |
560 | |||
561 | /* Allocate new record */ | ||
562 | 5 | size_t process_records_size = sizeof(process_in_node_record_t) * nelems; | |
563 | 5 | size_t node_record_size = sizeof(node_record_t) + process_records_size; | |
564 | 5 | node_record_t *new_record = malloc(node_record_size); | |
565 | |||
566 | /* Memcpy the entire struct */ | ||
567 | 5 | memcpy(new_record, node_record, node_record_size); | |
568 | |||
569 | /* Insert to list */ | ||
570 | 5 | node_records = g_slist_prepend(node_records, new_record); | |
571 | 5 | } | |
572 | |||
573 | 14 | static void node_print(void) { | |
574 | |||
575 | 14 | for (GSList *node = node_records; | |
576 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 14 times.
|
15 | node != NULL; |
577 | 1 | node = node->next) { | |
578 | |||
579 | 1 | node_record_t *node_record = node->data; | |
580 | |||
581 | 1 | info(" |----------------------------------------------------------|"); | |
582 | 1 | info(" | Extended Report Node %4d |", | |
583 | node_record->node_id); | ||
584 | 1 | info(" |----------------------------------------------------------|"); | |
585 | 1 | info(" | Process | Useful Time | MPI Time |"); | |
586 | 1 | info(" |------------|----------------------|----------------------|"); | |
587 | |||
588 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | for (int i = 0; i < node_record->nelems; ++i) { |
589 | 2 | info(" | %-10d | %18e s | %18e s |", | |
590 | node_record->processes[i].pid, | ||
591 | nsecs_to_secs(node_record->processes[i].useful_time), | ||
592 | nsecs_to_secs(node_record->processes[i].mpi_time)); | ||
593 | 2 | info(" |------------|----------------------|----------------------|"); | |
594 | } | ||
595 | |||
596 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (node_record->nelems > 0) { |
597 | 1 | info(" |------------|----------------------|----------------------|"); | |
598 | 1 | info(" | %-10s | %18e s | %18e s |", "Node Avg", | |
599 | nsecs_to_secs(node_record->avg_useful_time), | ||
600 | nsecs_to_secs(node_record->avg_mpi_time)); | ||
601 | 1 | info(" |------------|----------------------|----------------------|"); | |
602 | 1 | info(" | %-10s | %18e s | %18e s |", "Node Max", | |
603 | nsecs_to_secs(node_record->max_useful_time), | ||
604 | nsecs_to_secs(node_record->max_mpi_time)); | ||
605 | 1 | info(" |------------|----------------------|----------------------|"); | |
606 | } | ||
607 | } | ||
608 | 14 | } | |
609 | |||
610 | 1 | static void node_to_json(FILE *out_file) { | |
611 | |||
612 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (node_records == NULL) return; |
613 | |||
614 | /* If there are pop_metrics, append to the existing dictionary */ | ||
615 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (pop_metrics_records != NULL) { |
616 | 1 | fprintf(out_file,",\n"); | |
617 | } | ||
618 | |||
619 | 1 | fprintf(out_file, | |
620 | " \"node\": [\n"); | ||
621 | |||
622 | 1 | for (GSList *node = node_records; | |
623 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | node != NULL; |
624 | 1 | node = node->next) { | |
625 | |||
626 | 1 | node_record_t *node_record = node->data; | |
627 | |||
628 | 1 | fprintf(out_file, | |
629 | " {\n" | ||
630 | " \"id\": \"%d\",\n" | ||
631 | " \"process\": [\n", | ||
632 | node_record->node_id); | ||
633 | |||
634 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | for (int i = 0; i < node_record->nelems; ++i) { |
635 | 2 | fprintf(out_file, | |
636 | " {\n" | ||
637 | " \"id\": %d,\n" | ||
638 | " \"usefulTime\": %"PRId64",\n" | ||
639 | " \"mpiTime\": %"PRId64"\n" | ||
640 | " }%s\n", | ||
641 | node_record->processes[i].pid, | ||
642 | node_record->processes[i].useful_time, | ||
643 | node_record->processes[i].mpi_time, | ||
644 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | i+1 < node_record->nelems ? "," : ""); |
645 | } | ||
646 | |||
647 | 1 | fprintf(out_file, | |
648 | " ],\n" | ||
649 | " \"nodeAvg\": {\n" | ||
650 | " \"usefulTime\": %"PRId64",\n" | ||
651 | " \"mpiTime\": %"PRId64"\n" | ||
652 | " },\n" | ||
653 | " \"nodeMax\": {\n" | ||
654 | " \"usefulTime\": %"PRId64",\n" | ||
655 | " \"mpiTime\": %"PRId64"\n" | ||
656 | " }\n" | ||
657 | " }%s\n", | ||
658 | node_record->avg_useful_time, | ||
659 | node_record->avg_mpi_time, | ||
660 | node_record->max_useful_time, | ||
661 | node_record->max_mpi_time, | ||
662 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | node->next != NULL ? "," : ""); |
663 | } | ||
664 | 1 | fprintf(out_file, | |
665 | " ]"); /* no eol */ | ||
666 | } | ||
667 | |||
668 | 1 | static void node_to_xml(FILE *out_file) { | |
669 | |||
670 | 1 | for (GSList *node = node_records; | |
671 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | node != NULL; |
672 | 1 | node = node->next) { | |
673 | |||
674 | 1 | node_record_t *node_record = node->data; | |
675 | |||
676 | 1 | fprintf(out_file, | |
677 | " <node>\n" | ||
678 | " <id>%d</id>\n", | ||
679 | node_record->node_id); | ||
680 | |||
681 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | for (int i = 0; i < node_record->nelems; ++i) { |
682 | 2 | fprintf(out_file, | |
683 | " <process>\n" | ||
684 | " <id>%d</id>\n" | ||
685 | " <usefulTime>%"PRId64"</usefulTime>\n" | ||
686 | " <mpiTime>%"PRId64"</mpiTime>\n" | ||
687 | " </process>\n", | ||
688 | node_record->processes[i].pid, | ||
689 | node_record->processes[i].useful_time, | ||
690 | node_record->processes[i].mpi_time); | ||
691 | } | ||
692 | |||
693 | 1 | fprintf(out_file, | |
694 | " <nodeAvg>\n" | ||
695 | " <usefulTime>%"PRId64"</usefulTime>\n" | ||
696 | " <mpiTime>%"PRId64"</mpiTime>\n" | ||
697 | " </nodeAvg>\n" | ||
698 | " <nodeMax>\n" | ||
699 | " <usefulTime>%"PRId64"</usefulTime>\n" | ||
700 | " <mpiTime>%"PRId64"</mpiTime>\n" | ||
701 | " </nodeMax>\n" | ||
702 | " </node>\n", | ||
703 | node_record->avg_useful_time, | ||
704 | node_record->avg_mpi_time, | ||
705 | node_record->max_useful_time, | ||
706 | node_record->max_mpi_time); | ||
707 | } | ||
708 | 1 | } | |
709 | |||
710 | 3 | static void node_to_csv(FILE *out_file, bool append) { | |
711 | |||
712 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | if (node_records == NULL) return; |
713 | |||
714 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (!append) { |
715 | /* Print header */ | ||
716 | 1 | fprintf(out_file, | |
717 | "NodeId," | ||
718 | "ProcessId," | ||
719 | "ProcessUsefulTime," | ||
720 | "ProcessMPITime," | ||
721 | "NodeAvgUsefulTime," | ||
722 | "NodeAvgMPITime," | ||
723 | "NodeMaxUsefulTime," | ||
724 | "NodeMaxMPITime\n"); | ||
725 | } | ||
726 | |||
727 | 1 | for (GSList *node = node_records; | |
728 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | node != NULL; |
729 | 1 | node = node->next) { | |
730 | |||
731 | 1 | node_record_t *node_record = node->data; | |
732 | |||
733 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | for (int i = 0; i < node_record->nelems; ++i) { |
734 | 2 | fprintf(out_file, | |
735 | "%d," /* NodeId */ | ||
736 | "%d," /* ProcessId */ | ||
737 | "%"PRId64"," /* ProcessUsefulTime */ | ||
738 | "%"PRId64"," /* ProcessMPITime */ | ||
739 | "%"PRId64"," /* NodeAvgUsefulTime */ | ||
740 | "%"PRId64"," /* NodeAvgMPITime*/ | ||
741 | "%"PRId64"," /* NodeMaxUsefulTime */ | ||
742 | "%"PRId64"\n", /* NodeMaxMPITime*/ | ||
743 | node_record->node_id, | ||
744 | node_record->processes[i].pid, | ||
745 | node_record->processes[i].useful_time, | ||
746 | node_record->processes[i].mpi_time, | ||
747 | node_record->avg_useful_time, | ||
748 | node_record->avg_mpi_time, | ||
749 | node_record->max_useful_time, | ||
750 | node_record->max_mpi_time); | ||
751 | |||
752 | } | ||
753 | } | ||
754 | } | ||
755 | |||
756 | 1 | static void node_to_txt(FILE *out_file) { | |
757 | |||
758 | 1 | for (GSList *node = node_records; | |
759 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | node != NULL; |
760 | 1 | node = node->next) { | |
761 | |||
762 | 1 | node_record_t *node_record = node->data; | |
763 | |||
764 | 1 | fprintf(out_file, | |
765 | " |----------------------------------------------------------|\n" | ||
766 | " | Extended Report Node %4d |\n" | ||
767 | " |----------------------------------------------------------|\n" | ||
768 | " | Process | Useful Time | MPI Time |\n" | ||
769 | " |------------|----------------------|----------------------|\n", | ||
770 | node_record->node_id); | ||
771 | |||
772 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | for (int i = 0; i < node_record->nelems; ++i) { |
773 | 2 | fprintf(out_file, | |
774 | " | %-10d | %18e s | %18e s |\n" | ||
775 | " |------------|----------------------|----------------------|\n", | ||
776 | node_record->processes[i].pid, | ||
777 | nsecs_to_secs(node_record->processes[i].useful_time), | ||
778 | nsecs_to_secs(node_record->processes[i].mpi_time)); | ||
779 | } | ||
780 | |||
781 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (node_record->nelems > 0) { |
782 | 1 | fprintf(out_file, | |
783 | " |------------|----------------------|----------------------|\n" | ||
784 | " | %-10s | %18e s | %18e s |\n" | ||
785 | " |------------|----------------------|----------------------|\n" | ||
786 | " | %-10s | %18e s | %18e s |\n" | ||
787 | " |------------|----------------------|----------------------|\n", | ||
788 | "Node Avg", | ||
789 | nsecs_to_secs(node_record->avg_useful_time), | ||
790 | nsecs_to_secs(node_record->avg_mpi_time), | ||
791 | "Node Max", | ||
792 | nsecs_to_secs(node_record->max_useful_time), | ||
793 | nsecs_to_secs(node_record->max_mpi_time)); | ||
794 | } | ||
795 | } | ||
796 | 1 | } | |
797 | |||
798 | 20 | static void node_finalize(void) { | |
799 | |||
800 | /* Free every record data */ | ||
801 | 20 | for (GSList *node = node_records; | |
802 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 20 times.
|
25 | node != NULL; |
803 | 5 | node = node->next) { | |
804 | |||
805 | 5 | node_record_t *record = node->data; | |
806 | 5 | free(record); | |
807 | } | ||
808 | |||
809 | /* Free list */ | ||
810 | 20 | g_slist_free(node_records); | |
811 | 20 | node_records = NULL; | |
812 | 20 | } | |
813 | |||
814 | |||
815 | /*********************************************************************************/ | ||
816 | /* Process */ | ||
817 | /*********************************************************************************/ | ||
818 | |||
819 | typedef struct region_record_t { | ||
820 | char name[DLB_MONITOR_NAME_MAX]; | ||
821 | int num_mpi_ranks; | ||
822 | process_record_t process_records[]; | ||
823 | } region_record_t; | ||
824 | |||
825 | static GSList *region_records = NULL; | ||
826 | |||
827 | 7 | void talp_output_record_process(const char *region_name, | |
828 | const process_record_t *process_record, int num_mpi_ranks) { | ||
829 | |||
830 | 7 | region_record_t *region_record = NULL; | |
831 | |||
832 | /* Find region or allocate new one */ | ||
833 | 7 | for (GSList *node = region_records; | |
834 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 7 times.
|
8 | node != NULL; |
835 | 1 | node = node->next) { | |
836 | |||
837 | 1 | region_record_t *record = node->data; | |
838 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (strcmp(record->name, region_name) == 0) { |
839 | ✗ | region_record = record; | |
840 | ✗ | break; | |
841 | } | ||
842 | } | ||
843 | |||
844 | /* Allocate if not found */ | ||
845 |
1/2✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
|
7 | if (region_record == NULL) { |
846 | /* Allocate and initialize new region */ | ||
847 | 7 | size_t region_record_size = sizeof(region_record_t) + | |
848 | 7 | sizeof(process_record_t) * num_mpi_ranks; | |
849 | 7 | region_record = malloc(region_record_size); | |
850 | 7 | *region_record = (const region_record_t) { | |
851 | .num_mpi_ranks = num_mpi_ranks, | ||
852 | }; | ||
853 | 7 | snprintf(region_record->name, DLB_MONITOR_NAME_MAX, "%s", | |
854 | region_name); | ||
855 | |||
856 | /* Insert to list */ | ||
857 | 7 | region_records = g_slist_prepend(region_records, region_record); | |
858 | } | ||
859 | |||
860 | /* Copy process_record */ | ||
861 | 7 | int rank = process_record->rank; | |
862 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.
|
7 | ensure(rank < num_mpi_ranks, "Wrong rank number in %s", __func__); |
863 | 7 | memcpy(®ion_record->process_records[rank], process_record, sizeof(process_record_t)); | |
864 | 7 | } | |
865 | |||
866 | 14 | static void process_print(void) { | |
867 | |||
868 | 14 | for (GSList *node = region_records; | |
869 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 14 times.
|
17 | node != NULL; |
870 | 3 | node = node->next) { | |
871 | |||
872 | 3 | region_record_t *region_record = node->data; | |
873 | |||
874 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
|
6 | for (int i = 0; i < region_record->num_mpi_ranks; ++i) { |
875 | |||
876 | 3 | process_record_t *process_record = ®ion_record->process_records[i]; | |
877 | |||
878 | 3 | info("################# Monitoring Region Summary ##################"); | |
879 | 3 | info("### Name: %s", | |
880 | 3 | region_record->name); | |
881 | 3 | info("### Process: %d (%s)", | |
882 | 3 | process_record->pid, process_record->hostname); | |
883 | 3 | info("### Rank: %d", | |
884 | process_record->rank); | ||
885 | 3 | info("### CpuSet: %s", | |
886 | 3 | process_record->cpuset); | |
887 | 3 | info("### Elapsed time: %"PRId64" ns", | |
888 | process_record->monitor.elapsed_time); | ||
889 | 3 | info("### Useful time: %"PRId64" ns", | |
890 | process_record->monitor.useful_time); | ||
891 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
|
3 | if (process_record->monitor.mpi_time > 0) { |
892 | 1 | info("### Not useful MPI: %"PRId64" ns", | |
893 | process_record->monitor.mpi_time); | ||
894 | } | ||
895 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | if (process_record->monitor.omp_load_imbalance_time > 0 |
896 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | || process_record->monitor.omp_scheduling_time > 0 |
897 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | || process_record->monitor.omp_serialization_time > 0) { |
898 | ✗ | info("### Not useful OMP Load Imbalance: %"PRId64" ns", | |
899 | process_record->monitor.omp_load_imbalance_time); | ||
900 | ✗ | info("### Not useful OMP Scheduling: %"PRId64" ns", | |
901 | process_record->monitor.omp_scheduling_time); | ||
902 | ✗ | info("### Not useful OMP Serialization: %"PRId64" ns", | |
903 | process_record->monitor.omp_serialization_time); | ||
904 | } | ||
905 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | if (process_record->monitor.instructions > 0 |
906 | ✗ | && process_record->monitor.cycles > 0) { | |
907 | ✗ | info("### IPC : %.2f", | |
908 | ✗ | (float)process_record->monitor.instructions | |
909 | ✗ | / process_record->monitor.cycles); | |
910 | } | ||
911 | } | ||
912 | } | ||
913 | 14 | } | |
914 | |||
915 | 1 | static void process_to_json(FILE *out_file) { | |
916 | |||
917 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (region_records == NULL) return; |
918 | |||
919 | /* If there are pop_metrics or node_metrics, append to the existing dictionary */ | ||
920 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (pop_metrics_records != NULL |
921 | ✗ | || node_records != NULL) { | |
922 | 1 | fprintf(out_file,",\n"); | |
923 | } | ||
924 | |||
925 | 1 | fprintf(out_file, | |
926 | " \"Process\": {\n"); | ||
927 | |||
928 | 1 | for (GSList *node = region_records; | |
929 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | node != NULL; |
930 | 1 | node = node->next) { | |
931 | |||
932 | 1 | region_record_t *region_record = node->data; | |
933 | |||
934 | 1 | fprintf(out_file, | |
935 | " \"%s\": [\n", | ||
936 | 1 | region_record->name); | |
937 | |||
938 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | for (int i = 0; i < region_record->num_mpi_ranks; ++i) { |
939 | |||
940 | 1 | process_record_t *process_record = ®ion_record->process_records[i]; | |
941 | |||
942 | 1 | fprintf(out_file, | |
943 | " {\n" | ||
944 | " \"rank\": %d,\n" | ||
945 | " \"pid\": %d,\n" | ||
946 | " \"nodeId\": %d,\n" | ||
947 | " \"hostname\": \"%s\",\n" | ||
948 | " \"cpuset\": %s,\n" | ||
949 | " \"numCpus\": %d,\n" | ||
950 | " \"avgCpus\": %.1f,\n" | ||
951 | " \"cycles\": %"PRId64",\n" | ||
952 | " \"instructions\": %"PRId64",\n" | ||
953 | " \"numMeasurements\": %d,\n" | ||
954 | " \"numResets\": %d,\n" | ||
955 | " \"numMpiCalls\": %"PRId64",\n" | ||
956 | " \"numOmpParallels\": %"PRId64",\n" | ||
957 | " \"numOmpTasks\": %"PRId64",\n" | ||
958 | " \"elapsedTime\": %"PRId64",\n" | ||
959 | " \"usefulTime\": %"PRId64",\n" | ||
960 | " \"mpiTime\": %"PRId64",\n" | ||
961 | " \"ompLoadImbalanceTime\": %"PRId64",\n" | ||
962 | " \"ompSchedulingTime\": %"PRId64",\n" | ||
963 | " \"ompSerializationTime\": %"PRId64"\n" | ||
964 | " }%s\n", | ||
965 | process_record->rank, | ||
966 | process_record->pid, | ||
967 | process_record->node_id, | ||
968 | 1 | process_record->hostname, | |
969 | 1 | process_record->cpuset_quoted, | |
970 | process_record->monitor.num_cpus, | ||
971 | 1 | process_record->monitor.avg_cpus, | |
972 | process_record->monitor.cycles, | ||
973 | process_record->monitor.instructions, | ||
974 | process_record->monitor.num_measurements, | ||
975 | process_record->monitor.num_resets, | ||
976 | process_record->monitor.num_mpi_calls, | ||
977 | process_record->monitor.num_omp_parallels, | ||
978 | process_record->monitor.num_omp_tasks, | ||
979 | process_record->monitor.elapsed_time, | ||
980 | process_record->monitor.useful_time, | ||
981 | process_record->monitor.mpi_time, | ||
982 | process_record->monitor.omp_load_imbalance_time, | ||
983 | process_record->monitor.omp_scheduling_time, | ||
984 | process_record->monitor.omp_serialization_time, | ||
985 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | i + 1 < region_record->num_mpi_ranks ? "," : ""); |
986 | } | ||
987 | 1 | fprintf(out_file, | |
988 | " ]%s\n", | ||
989 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | node->next != NULL ? "," : ""); |
990 | } | ||
991 | 1 | fprintf(out_file, | |
992 | " }"); /* no eol */ | ||
993 | } | ||
994 | |||
995 | 1 | static void process_to_xml(FILE *out_file) { | |
996 | |||
997 | 1 | for (GSList *node = region_records; | |
998 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | node != NULL; |
999 | 1 | node = node->next) { | |
1000 | |||
1001 | 1 | region_record_t *region_record = node->data; | |
1002 | |||
1003 | 1 | fprintf(out_file, | |
1004 | " <Process>\n" | ||
1005 | " <name>%s</name>\n", | ||
1006 | 1 | region_record->name); | |
1007 | |||
1008 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | for (int i = 0; i < region_record->num_mpi_ranks; ++i) { |
1009 | |||
1010 | 1 | process_record_t *process_record = ®ion_record->process_records[i]; | |
1011 | |||
1012 | 1 | fprintf(out_file, | |
1013 | " <process>\n" | ||
1014 | " <rank>%d</rank>\n" | ||
1015 | " <pid>%d</pid>\n" | ||
1016 | " <nodeId>%d</nodeId>\n" | ||
1017 | " <hostname>%s</hostname>\n" | ||
1018 | " <cpuset>%s</cpuset>\n" | ||
1019 | " <numCpus>%d</numCpus>\n" | ||
1020 | " <avgCpus>%.1f</avgCpus>\n" | ||
1021 | " <cycles>%"PRId64"</cycles>\n" | ||
1022 | " <instructions>%"PRId64"</instructions>\n" | ||
1023 | " <numMeasurements>%d</numMeasurements>\n" | ||
1024 | " <numResets>%d</numResets>\n" | ||
1025 | " <numMpiCalls>%"PRId64"</numMpiCalls>\n" | ||
1026 | " <numOmpParallels>%"PRId64"</numOmpParallels>\n" | ||
1027 | " <numOmpTasks>%"PRId64"</numOmpTasks>\n" | ||
1028 | " <elapsedTime>%"PRId64"</elapsedTime>\n" | ||
1029 | " <usefulTime>%"PRId64"</usefulTime>\n" | ||
1030 | " <mpiTime>%"PRId64"</mpiTime>\n" | ||
1031 | " <ompLoadImbalanceTime>%"PRId64"</ompLoadImbalanceTime>\n" | ||
1032 | " <ompSchedulingTime>%"PRId64"</ompSchedulingTime>\n" | ||
1033 | " <ompSerializationTime>%"PRId64"</ompSerializationTime>\n" | ||
1034 | " </process>\n", | ||
1035 | process_record->rank, | ||
1036 | process_record->pid, | ||
1037 | process_record->node_id, | ||
1038 | 1 | process_record->hostname, | |
1039 | 1 | process_record->cpuset_quoted, | |
1040 | process_record->monitor.num_cpus, | ||
1041 | 1 | process_record->monitor.avg_cpus, | |
1042 | process_record->monitor.cycles, | ||
1043 | process_record->monitor.instructions, | ||
1044 | process_record->monitor.num_measurements, | ||
1045 | process_record->monitor.num_resets, | ||
1046 | process_record->monitor.num_mpi_calls, | ||
1047 | process_record->monitor.num_omp_parallels, | ||
1048 | process_record->monitor.num_omp_tasks, | ||
1049 | process_record->monitor.elapsed_time, | ||
1050 | process_record->monitor.useful_time, | ||
1051 | process_record->monitor.mpi_time, | ||
1052 | process_record->monitor.omp_load_imbalance_time, | ||
1053 | process_record->monitor.omp_scheduling_time, | ||
1054 | process_record->monitor.omp_serialization_time); | ||
1055 | } | ||
1056 | 1 | fprintf(out_file, | |
1057 | " </Process>\n"); | ||
1058 | } | ||
1059 | 1 | } | |
1060 | |||
1061 | 3 | static void process_to_csv(FILE *out_file, bool append) { | |
1062 | |||
1063 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | if (region_records == NULL) return; |
1064 | |||
1065 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (!append) { |
1066 | /* Print header */ | ||
1067 | 1 | fprintf(out_file, | |
1068 | "Region," | ||
1069 | "Rank," | ||
1070 | "PID," | ||
1071 | "NodeId," | ||
1072 | "Hostname," | ||
1073 | "CpuSet," | ||
1074 | "NumCpus," | ||
1075 | "AvgCpus," | ||
1076 | "Cycles," | ||
1077 | "Instructions," | ||
1078 | "NumMeasurements," | ||
1079 | "NumResets," | ||
1080 | "NumMpiCalls," | ||
1081 | "NumOmpParallels," | ||
1082 | "NumOmpTasks," | ||
1083 | "ElapsedTime," | ||
1084 | "UsefulTime," | ||
1085 | "MPITime," | ||
1086 | "OMPLoadImbalance," | ||
1087 | "OMPSchedulingTime," | ||
1088 | "OMPSerializationTime\n"); | ||
1089 | } | ||
1090 | |||
1091 | 1 | for (GSList *node = region_records; | |
1092 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | node != NULL; |
1093 | 1 | node = node->next) { | |
1094 | |||
1095 | 1 | region_record_t *region_record = node->data; | |
1096 | |||
1097 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | for (int i = 0; i < region_record->num_mpi_ranks; ++i) { |
1098 | |||
1099 | 1 | process_record_t *process_record = ®ion_record->process_records[i]; | |
1100 | |||
1101 | 1 | fprintf(out_file, | |
1102 | "%s," /* Region */ | ||
1103 | "%d," /* Rank */ | ||
1104 | "%d," /* PID */ | ||
1105 | "%d," /* NodeId */ | ||
1106 | "%s," /* Hostname */ | ||
1107 | "%s," /* CpuSet */ | ||
1108 | "%d" /* NumCpus */ | ||
1109 | "%.1f," /* AvgCpus */ | ||
1110 | "%"PRId64"," /* Cycles */ | ||
1111 | "%"PRId64"," /* Instructions */ | ||
1112 | "%d," /* NumMeasurements */ | ||
1113 | "%d," /* NumResets */ | ||
1114 | "%"PRId64"," /* NumMpiCalls */ | ||
1115 | "%"PRId64"," /* NumOmpParallels */ | ||
1116 | "%"PRId64"," /* NumOmpTasks */ | ||
1117 | "%"PRId64"," /* ElapsedTime */ | ||
1118 | "%"PRId64"," /* UsefulTime */ | ||
1119 | "%"PRId64"," /* MPITime */ | ||
1120 | "%"PRId64"," /* OMPLoadImbalance */ | ||
1121 | "%"PRId64"," /* OMPSchedulingTime */ | ||
1122 | "%"PRId64",", /* OMPSerializationTime */ | ||
1123 | 1 | region_record->name, | |
1124 | process_record->rank, | ||
1125 | process_record->pid, | ||
1126 | process_record->node_id, | ||
1127 | 1 | process_record->hostname, | |
1128 | 1 | process_record->cpuset_quoted, | |
1129 | process_record->monitor.num_cpus, | ||
1130 | 1 | process_record->monitor.avg_cpus, | |
1131 | process_record->monitor.cycles, | ||
1132 | process_record->monitor.instructions, | ||
1133 | process_record->monitor.num_measurements, | ||
1134 | process_record->monitor.num_resets, | ||
1135 | process_record->monitor.num_mpi_calls, | ||
1136 | process_record->monitor.num_omp_parallels, | ||
1137 | process_record->monitor.num_omp_tasks, | ||
1138 | process_record->monitor.elapsed_time, | ||
1139 | process_record->monitor.useful_time, | ||
1140 | process_record->monitor.mpi_time, | ||
1141 | process_record->monitor.omp_load_imbalance_time, | ||
1142 | process_record->monitor.omp_scheduling_time, | ||
1143 | process_record->monitor.omp_serialization_time); | ||
1144 | } | ||
1145 | } | ||
1146 | } | ||
1147 | |||
1148 | 1 | static void process_to_txt(FILE *out_file) { | |
1149 | |||
1150 | 1 | for (GSList *node = region_records; | |
1151 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | node != NULL; |
1152 | 1 | node = node->next) { | |
1153 | |||
1154 | 1 | region_record_t *region_record = node->data; | |
1155 | |||
1156 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | for (int i = 0; i < region_record->num_mpi_ranks; ++i) { |
1157 | |||
1158 | 1 | process_record_t *process_record = ®ion_record->process_records[i]; | |
1159 | |||
1160 | 2 | float ipc = process_record->monitor.cycles > 0 | |
1161 | ✗ | ? (float)process_record->monitor.instructions | |
1162 | ✗ | / process_record->monitor.cycles | |
1163 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | : 0.0f; |
1164 | |||
1165 | 1 | fprintf(out_file, | |
1166 | "################# Monitoring Region Summary ##################\n" | ||
1167 | "### Name: %s\n" | ||
1168 | "### Process: %d (%s)\n" | ||
1169 | "### Rank: %d\n" | ||
1170 | "### CpuSet: %s\n" | ||
1171 | "### Elapsed time: %"PRId64" ns\n" | ||
1172 | "### Useful time: %"PRId64" ns\n" | ||
1173 | "### Not useful MPI: %"PRId64" ns\n" | ||
1174 | "### Not useful OMP Load Imbalance: %"PRId64" ns\n" | ||
1175 | "### Not useful OMP Scheduling: %"PRId64" ns\n" | ||
1176 | "### Not useful OMP Serialization: %"PRId64" ns\n" | ||
1177 | "### IPC: %.2f\n", | ||
1178 | 1 | region_record->name, | |
1179 | 1 | process_record->pid, process_record->hostname, | |
1180 | process_record->rank, | ||
1181 | 1 | process_record->cpuset, | |
1182 | process_record->monitor.elapsed_time, | ||
1183 | process_record->monitor.useful_time, | ||
1184 | process_record->monitor.mpi_time, | ||
1185 | process_record->monitor.omp_load_imbalance_time, | ||
1186 | process_record->monitor.omp_scheduling_time, | ||
1187 | process_record->monitor.omp_serialization_time, | ||
1188 | ipc); | ||
1189 | } | ||
1190 | } | ||
1191 | 1 | } | |
1192 | |||
1193 | 20 | static void process_finalize(void) { | |
1194 | |||
1195 | /* Free every record data */ | ||
1196 | 20 | for (GSList *node = region_records; | |
1197 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 20 times.
|
27 | node != NULL; |
1198 | 7 | node = node->next) { | |
1199 | |||
1200 | 7 | region_record_t *record = node->data; | |
1201 | 7 | free(record); | |
1202 | } | ||
1203 | |||
1204 | /* Free list */ | ||
1205 | 20 | g_slist_free(region_records); | |
1206 | 20 | region_records = NULL; | |
1207 | 20 | } | |
1208 | |||
1209 | |||
1210 | /*********************************************************************************/ | ||
1211 | /* TALP Common */ | ||
1212 | /*********************************************************************************/ | ||
1213 | typedef struct TALPCommonRecord { | ||
1214 | char *time_of_creation; // ISO 8601 string | ||
1215 | char *dlb_major_version; // Major.Minor DLB version used | ||
1216 | char *dlb_git_description; // GIT description output | ||
1217 | } talp_common_record_t; | ||
1218 | static talp_common_record_t common_record; | ||
1219 | |||
1220 | 20 | static void talp_output_record_common(void) { | |
1221 | /* Initialize structure */ | ||
1222 | 20 | time_t now = time(NULL); | |
1223 | 20 | common_record = (const talp_common_record_t) { | |
1224 | 20 | .time_of_creation = get_iso_8601_string(localtime(&now)), | |
1225 | .dlb_major_version = PACKAGE_VERSION, | ||
1226 | .dlb_git_description = DLB_GIT_DESCRIPTION, | ||
1227 | }; | ||
1228 | 20 | } | |
1229 | |||
1230 | 1 | static void common_to_json(FILE *out_file) { | |
1231 | 1 | fprintf(out_file, | |
1232 | " \"dlbVersion\": \"%s\",\n" | ||
1233 | " \"dlbGitVersion\": \"%s\",\n" | ||
1234 | " \"timestamp\": \"%s\",\n", | ||
1235 | common_record.dlb_major_version, | ||
1236 | common_record.dlb_git_description, | ||
1237 | common_record.time_of_creation); | ||
1238 | 1 | } | |
1239 | |||
1240 | 1 | static void common_to_xml(FILE *out_file) { | |
1241 | |||
1242 | 1 | fprintf(out_file, | |
1243 | " <dlbVersion>%s</dlbVersion>\n" | ||
1244 | " <dlbGitVersion>%s</dlbGitVersion>\n" | ||
1245 | " <timestamp>%s</timestamp>\n", | ||
1246 | common_record.dlb_major_version, | ||
1247 | common_record.dlb_git_description, | ||
1248 | common_record.time_of_creation); | ||
1249 | 1 | } | |
1250 | |||
1251 | 1 | static void common_to_txt(FILE *out_file) { | |
1252 | |||
1253 | 1 | fprintf(out_file, | |
1254 | "################ TALP Common Data ################\n" | ||
1255 | "### DLB Version: %s\n" | ||
1256 | "### DLB Git Version: %s\n" | ||
1257 | "### Timestamp: %s\n", | ||
1258 | common_record.dlb_major_version, | ||
1259 | common_record.dlb_git_description, | ||
1260 | common_record.time_of_creation); | ||
1261 | 1 | } | |
1262 | |||
1263 | 20 | static void common_finalize(void) { | |
1264 | 20 | free(common_record.time_of_creation); | |
1265 | 20 | } | |
1266 | |||
1267 | |||
1268 | |||
1269 | |||
1270 | /*********************************************************************************/ | ||
1271 | /* TALP Resources */ | ||
1272 | /*********************************************************************************/ | ||
1273 | typedef struct TALPResourcesRecord { | ||
1274 | unsigned int num_cpus; | ||
1275 | unsigned int num_nodes; | ||
1276 | unsigned int num_mpi_ranks; | ||
1277 | } talp_resources_record_t; | ||
1278 | static talp_resources_record_t resources_record; | ||
1279 | |||
1280 | 3 | void talp_output_record_resources(int num_cpus, int num_nodes, int num_mpi_ranks) { | |
1281 | |||
1282 | 3 | resources_record = (const talp_resources_record_t) { | |
1283 | 3 | .num_cpus = (unsigned int) num_cpus, | |
1284 | 3 | .num_nodes = (unsigned int) num_nodes, | |
1285 | 3 | .num_mpi_ranks = (unsigned int) num_mpi_ranks | |
1286 | }; | ||
1287 | 3 | } | |
1288 | |||
1289 | 1 | static void resources_to_json(FILE *out_file) { | |
1290 | 1 | fprintf(out_file, | |
1291 | " \"resources\": {\n" | ||
1292 | " \"numCpus\": %u,\n" | ||
1293 | " \"numNodes\": %u,\n" | ||
1294 | " \"numMpiRanks\": %u\n" | ||
1295 | " },\n", | ||
1296 | resources_record.num_cpus, | ||
1297 | resources_record.num_nodes, | ||
1298 | resources_record.num_mpi_ranks); | ||
1299 | 1 | } | |
1300 | |||
1301 | 1 | static void resources_to_xml(FILE *out_file) { | |
1302 | |||
1303 | 1 | fprintf(out_file, | |
1304 | " <resources>\n" | ||
1305 | " <numCpus>%u</numCpus>\n" | ||
1306 | " <numNodes>%u</numNodes>\n" | ||
1307 | " <numMpiRanks>%u</numMpiRanks>\n" | ||
1308 | " </resources>", | ||
1309 | resources_record.num_cpus, | ||
1310 | resources_record.num_nodes, | ||
1311 | resources_record.num_mpi_ranks); | ||
1312 | 1 | } | |
1313 | |||
1314 | 1 | static void resources_to_txt(FILE *out_file) { | |
1315 | |||
1316 | 1 | fprintf(out_file, | |
1317 | "################# TALP Resources #################\n" | ||
1318 | "### Number of CPUs: %u\n" | ||
1319 | "### Number of Nodes: %u\n" | ||
1320 | "### Number of MPI processes: %u\n", | ||
1321 | resources_record.num_cpus, | ||
1322 | resources_record.num_nodes, | ||
1323 | resources_record.num_mpi_ranks); | ||
1324 | 1 | } | |
1325 | |||
1326 | |||
1327 | /*********************************************************************************/ | ||
1328 | /* Helper functions */ | ||
1329 | /*********************************************************************************/ | ||
1330 | |||
1331 | 1 | static void json_header(FILE *out_file) { | |
1332 | 1 | fprintf(out_file, "{\n"); | |
1333 | 1 | } | |
1334 | |||
1335 | 1 | static void json_footer(FILE *out_file) { | |
1336 | 1 | fprintf(out_file, "\n}\n"); | |
1337 | 1 | } | |
1338 | |||
1339 | 1 | static void xml_header(FILE *out_file) { | |
1340 | 1 | fprintf(out_file, "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n" | |
1341 | "<root>\n"); | ||
1342 | 1 | } | |
1343 | |||
1344 | 1 | static void xml_footer(FILE *out_file) { | |
1345 | 1 | fprintf(out_file, "</root>\n"); | |
1346 | 1 | } | |
1347 | |||
1348 | |||
1349 | /*********************************************************************************/ | ||
1350 | /* Finalize */ | ||
1351 | /*********************************************************************************/ | ||
1352 | |||
1353 | 200 | static bool check_coefficient(float coeffiecient) { | |
1354 |
2/4✓ Branch 0 taken 200 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 200 times.
✗ Branch 3 not taken.
|
200 | return 0.0f <= coeffiecient && coeffiecient <= 1.0; |
1355 | } | ||
1356 | |||
1357 | ✗ | static void warn_negative_counters(void) { | |
1358 | static bool warned_once = false; | ||
1359 | ✗ | if (!warned_once) { | |
1360 | ✗ | warning("Some obtained PAPI counters contain negative values. Check your" | |
1361 | " installation or report the error to %s", PACKAGE_BUGREPORT); | ||
1362 | ✗ | warned_once = true; | |
1363 | } | ||
1364 | } | ||
1365 | |||
1366 | ✗ | static void warn_wrong_coefficient(void) { | |
1367 | static bool warned_once = false; | ||
1368 | ✗ | if (!warned_once) { | |
1369 | ✗ | warning("Some computed POP metric coefficient is not within the allowed" | |
1370 | " range [0.0, 1.0]. If you think this is an unexpected value," | ||
1371 | " please report the error to %s", PACKAGE_BUGREPORT); | ||
1372 | ✗ | warned_once = true; | |
1373 | } | ||
1374 | } | ||
1375 | |||
1376 | 20 | static void sanitize_records(void) { | |
1377 | |||
1378 | /* pop_metrics_records: | ||
1379 | * - instructions and cycles need to be >= 0 | ||
1380 | * - computed efficiencyes need to be [0.0, 1.0] | ||
1381 | */ | ||
1382 | 20 | for (GSList *node = pop_metrics_records; | |
1383 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 20 times.
|
40 | node != NULL; |
1384 | 20 | node = node->next) { | |
1385 | |||
1386 | 20 | dlb_pop_metrics_t *record = node->data; | |
1387 | |||
1388 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
|
20 | if (record->cycles < 0) { |
1389 | ✗ | record->cycles = 0.0; | |
1390 | ✗ | warn_negative_counters(); | |
1391 | } | ||
1392 | |||
1393 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
|
20 | if (record->instructions < 0) { |
1394 | ✗ | record->instructions = 0.0; | |
1395 | ✗ | warn_negative_counters(); | |
1396 | } | ||
1397 | |||
1398 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | if (!check_coefficient(record->parallel_efficiency) |
1399 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | || !check_coefficient(record->mpi_parallel_efficiency) |
1400 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | || !check_coefficient(record->mpi_communication_efficiency) |
1401 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | || !check_coefficient(record->mpi_load_balance) |
1402 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | || !check_coefficient(record->mpi_load_balance_in) |
1403 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | || !check_coefficient(record->mpi_load_balance_out) |
1404 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | || !check_coefficient(record->omp_parallel_efficiency) |
1405 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | || !check_coefficient(record->omp_load_balance) |
1406 |
1/2✓ Branch 1 taken 20 times.
✗ Branch 2 not taken.
|
20 | || !check_coefficient(record->omp_scheduling_efficiency) |
1407 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 20 times.
|
20 | || !check_coefficient(record->omp_serialization_efficiency)) { |
1408 | ✗ | warn_wrong_coefficient(); | |
1409 | } | ||
1410 | } | ||
1411 | |||
1412 | /* node_records: nothing to sanitize for now */ | ||
1413 | |||
1414 | /* region_records: */ | ||
1415 | 20 | for (GSList *node = region_records; | |
1416 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 20 times.
|
27 | node != NULL; |
1417 | 7 | node = node->next) { | |
1418 | |||
1419 | 7 | region_record_t *region_record = node->data; | |
1420 | |||
1421 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 7 times.
|
14 | for (int i = 0; i < region_record->num_mpi_ranks; ++i) { |
1422 | |||
1423 | 7 | dlb_monitor_t *monitor = ®ion_record->process_records[i].monitor; | |
1424 | |||
1425 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.
|
7 | if (monitor->cycles < 0) { |
1426 | ✗ | monitor->cycles = 0.0; | |
1427 | ✗ | warn_negative_counters(); | |
1428 | } | ||
1429 | |||
1430 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.
|
7 | if (monitor->instructions < 0) { |
1431 | ✗ | monitor->instructions = 0.0; | |
1432 | ✗ | warn_negative_counters(); | |
1433 | } | ||
1434 | } | ||
1435 | } | ||
1436 | 20 | } | |
1437 | |||
1438 | 20 | void talp_output_finalize(const char *output_file) { | |
1439 | |||
1440 | /* For efficiency, all records are prepended to their respective lists and | ||
1441 | * reversed here */ | ||
1442 | 20 | pop_metrics_records = g_slist_reverse(pop_metrics_records); | |
1443 | 20 | node_records = g_slist_reverse(node_records); | |
1444 | 20 | region_records = g_slist_reverse(region_records); | |
1445 | |||
1446 | /* Sanitize erroneous values */ | ||
1447 | 20 | sanitize_records(); | |
1448 | |||
1449 | 20 | talp_output_record_common(); | |
1450 | |||
1451 | /* If the process has changed the locale, temporarily push the C locale to | ||
1452 | * print floats with the expected notation (a comma as a decimal separator | ||
1453 | * will break CSV and JSON files). The object associated with the locale | ||
1454 | * can be safely freed after it has been set. */ | ||
1455 | 20 | locale_t new_locale = newlocale(LC_ALL, "C", 0); | |
1456 | 20 | uselocale(new_locale); | |
1457 | 20 | freelocale(new_locale); | |
1458 | |||
1459 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 6 times.
|
20 | if (output_file == NULL) { |
1460 | /* No output file, just print all records */ | ||
1461 | 14 | pop_metrics_print(); | |
1462 | 14 | node_print(); | |
1463 | 14 | process_print(); | |
1464 | } else { | ||
1465 | /* Do not open file if process has no data */ | ||
1466 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
|
6 | if (pop_metrics_records == NULL |
1467 | ✗ | && node_records == NULL | |
1468 | ✗ | && region_records == NULL) return; | |
1469 | |||
1470 | /* Check file extension */ | ||
1471 | typedef enum Extension { | ||
1472 | EXT_JSON, | ||
1473 | EXT_XML, | ||
1474 | EXT_CSV, | ||
1475 | EXT_TXT, | ||
1476 | } extension_t; | ||
1477 | 6 | extension_t extension = EXT_TXT; | |
1478 | 6 | const char *ext = strrchr(output_file, '.'); | |
1479 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | if (ext != NULL) { |
1480 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 5 times.
|
6 | if (strcmp(ext+1, "json") == 0) { |
1481 | 1 | extension = EXT_JSON; | |
1482 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
|
5 | } else if (strcmp(ext+1, "xml") == 0) { |
1483 | 1 | extension = EXT_XML; | |
1484 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | } else if (strcmp(ext+1, "csv") == 0) { |
1485 | 3 | extension = EXT_CSV; | |
1486 | } | ||
1487 | } | ||
1488 | |||
1489 | /* Deprecation warning*/ | ||
1490 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 5 times.
|
6 | if(extension == EXT_XML){ |
1491 | 1 | warning("Deprecated: The support for XML output is deprecated and" | |
1492 | " will be removed in the next release"); | ||
1493 | } | ||
1494 | |||
1495 | /* Specific case where output file needs to be split */ | ||
1496 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
|
6 | if (extension == EXT_CSV |
1497 | 3 | && !!(pop_metrics_records != NULL) | |
1498 | 3 | + !!(node_records != NULL) | |
1499 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
|
4 | + !!(region_records != NULL) > 1) { |
1500 | |||
1501 | /* Length without extension */ | ||
1502 | 1 | int filename_useful_len = ext - output_file; | |
1503 | |||
1504 | /* POP */ | ||
1505 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (pop_metrics_records != NULL) { |
1506 | 1 | const char *pop_ext = "-pop.csv"; | |
1507 | 1 | size_t pop_file_len = filename_useful_len + strlen(pop_ext) + 1; | |
1508 | 1 | char *pop_filename = malloc(sizeof(char)*pop_file_len); | |
1509 | 1 | sprintf(pop_filename, "%.*s%s", filename_useful_len, output_file, pop_ext); | |
1510 | FILE *pop_file; | ||
1511 | bool append_to_csv; | ||
1512 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
|
1 | if (access(pop_filename, F_OK) == 0) { |
1513 | ✗ | pop_file = fopen(pop_filename, "a"); | |
1514 | ✗ | append_to_csv = true; | |
1515 | } else { | ||
1516 | 1 | pop_file = fopen(pop_filename, "w"); | |
1517 | 1 | append_to_csv = false; | |
1518 | } | ||
1519 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (pop_file == NULL) { |
1520 | ✗ | warning("Cannot open file %s: %s", pop_filename, strerror(errno)); | |
1521 | } else { | ||
1522 | 1 | pop_metrics_to_csv(pop_file, append_to_csv); | |
1523 | 1 | fclose(pop_file); | |
1524 | } | ||
1525 | } | ||
1526 | |||
1527 | /* Node */ | ||
1528 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (node_records != NULL) { |
1529 | 1 | const char *node_ext = "-node.csv"; | |
1530 | 1 | size_t node_file_len = filename_useful_len + strlen(node_ext) + 1; | |
1531 | 1 | char *node_filename = malloc(sizeof(char)*node_file_len); | |
1532 | 1 | sprintf(node_filename, "%.*s%s", filename_useful_len, output_file, node_ext); | |
1533 | FILE *node_file; | ||
1534 | bool append_to_csv; | ||
1535 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
|
1 | if (access(node_filename, F_OK) == 0) { |
1536 | ✗ | node_file = fopen(node_filename, "a"); | |
1537 | ✗ | append_to_csv = true; | |
1538 | } else { | ||
1539 | 1 | node_file = fopen(node_filename, "w"); | |
1540 | 1 | append_to_csv = false; | |
1541 | } | ||
1542 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (node_file == NULL) { |
1543 | ✗ | warning("Cannot open file %s: %s", node_filename, strerror(errno)); | |
1544 | } else { | ||
1545 | 1 | node_to_csv(node_file, append_to_csv); | |
1546 | 1 | fclose(node_file); | |
1547 | } | ||
1548 | } | ||
1549 | |||
1550 | /* Process */ | ||
1551 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (region_records != NULL) { |
1552 | 1 | const char *process_ext = "-process.csv"; | |
1553 | 1 | size_t process_file_len = filename_useful_len + strlen(process_ext) + 1; | |
1554 | 1 | char *process_filename = malloc(sizeof(char)*process_file_len); | |
1555 | 1 | sprintf(process_filename, "%.*s%s", filename_useful_len, output_file, process_ext); | |
1556 | FILE *process_file; | ||
1557 | bool append_to_csv; | ||
1558 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
|
1 | if (access(process_filename, F_OK) == 0) { |
1559 | ✗ | process_file = fopen(process_filename, "a"); | |
1560 | ✗ | append_to_csv = true; | |
1561 | } else { | ||
1562 | 1 | process_file = fopen(process_filename, "w"); | |
1563 | 1 | append_to_csv = false; | |
1564 | } | ||
1565 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (process_file == NULL) { |
1566 | ✗ | warning("Cannot open file %s: %s", process_filename, strerror(errno)); | |
1567 | } else { | ||
1568 | 1 | process_to_csv(process_file, append_to_csv); | |
1569 | 1 | fclose(process_file); | |
1570 | } | ||
1571 | } | ||
1572 | } | ||
1573 | |||
1574 | /* Write to file */ | ||
1575 | else { | ||
1576 | /* Open file */ | ||
1577 | FILE *out_file; | ||
1578 | bool append_to_csv; | ||
1579 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 3 times.
|
5 | if (extension == EXT_CSV |
1580 |
2/2✓ Branch 1 taken 1 times.
✓ Branch 2 taken 1 times.
|
2 | && access(output_file, F_OK) == 0) { |
1581 | /* Specific case where new entries are appended to existing csv */ | ||
1582 | 1 | out_file = fopen(output_file, "a"); | |
1583 | 1 | append_to_csv = true; | |
1584 | } else { | ||
1585 | 4 | out_file = fopen(output_file, "w"); | |
1586 | 4 | append_to_csv = false; | |
1587 | } | ||
1588 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | if (out_file == NULL) { |
1589 | ✗ | warning("Cannot open file %s: %s", output_file, strerror(errno)); | |
1590 | } else { | ||
1591 | /* Write records to file */ | ||
1592 |
4/5✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 1 times.
✗ Branch 4 not taken.
|
5 | switch(extension) { |
1593 | 1 | case EXT_JSON: | |
1594 | 1 | json_header(out_file); | |
1595 | 1 | common_to_json(out_file); | |
1596 | 1 | resources_to_json(out_file); | |
1597 | 1 | pop_metrics_to_json(out_file); | |
1598 | 1 | node_to_json(out_file); | |
1599 | 1 | process_to_json(out_file); | |
1600 | 1 | json_footer(out_file); | |
1601 | 1 | break; | |
1602 | 1 | case EXT_XML: | |
1603 | 1 | xml_header(out_file); | |
1604 | 1 | common_to_xml(out_file); | |
1605 | 1 | resources_to_xml(out_file); | |
1606 | 1 | pop_metrics_to_xml(out_file); | |
1607 | 1 | node_to_xml(out_file); | |
1608 | 1 | process_to_xml(out_file); | |
1609 | 1 | xml_footer(out_file); | |
1610 | 1 | break; | |
1611 | 2 | case EXT_CSV: | |
1612 | 2 | pop_metrics_to_csv(out_file, append_to_csv); | |
1613 | 2 | node_to_csv(out_file, append_to_csv); | |
1614 | 2 | process_to_csv(out_file, append_to_csv); | |
1615 | 2 | break; | |
1616 | 1 | case EXT_TXT: | |
1617 | 1 | common_to_txt(out_file); | |
1618 | 1 | resources_to_txt(out_file); | |
1619 | 1 | pop_metrics_to_txt(out_file); | |
1620 | 1 | node_to_txt(out_file); | |
1621 | 1 | process_to_txt(out_file); | |
1622 | 1 | break; | |
1623 | } | ||
1624 | /* Close file */ | ||
1625 | 5 | fclose(out_file); | |
1626 | } | ||
1627 | } | ||
1628 | } | ||
1629 | |||
1630 | // Restore locale | ||
1631 | 20 | uselocale(LC_GLOBAL_LOCALE); | |
1632 | |||
1633 | // De-allocate all records | ||
1634 | 20 | common_finalize(); | |
1635 | 20 | pop_metrics_finalize(); | |
1636 | 20 | node_finalize(); | |
1637 | 20 | process_finalize(); | |
1638 | } | ||
1639 |