| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /*********************************************************************************/ | ||
| 2 | /* Copyright 2009-2025 Barcelona Supercomputing Center */ | ||
| 3 | /* */ | ||
| 4 | /* This file is part of the DLB library. */ | ||
| 5 | /* */ | ||
| 6 | /* DLB is free software: you can redistribute it and/or modify */ | ||
| 7 | /* it under the terms of the GNU Lesser General Public License as published by */ | ||
| 8 | /* the Free Software Foundation, either version 3 of the License, or */ | ||
| 9 | /* (at your option) any later version. */ | ||
| 10 | /* */ | ||
| 11 | /* DLB is distributed in the hope that it will be useful, */ | ||
| 12 | /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ | ||
| 13 | /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ | ||
| 14 | /* GNU Lesser General Public License for more details. */ | ||
| 15 | /* */ | ||
| 16 | /* You should have received a copy of the GNU Lesser General Public License */ | ||
| 17 | /* along with DLB. If not, see <https://www.gnu.org/licenses/>. */ | ||
| 18 | /*********************************************************************************/ | ||
| 19 | |||
| 20 | #include "talp/talp_record.h" | ||
| 21 | |||
| 22 | #include "LB_comm/shmem_talp.h" | ||
| 23 | #include "LB_core/node_barrier.h" | ||
| 24 | #include "LB_core/spd.h" | ||
| 25 | #include "apis/dlb_talp.h" | ||
| 26 | #include "support/debug.h" | ||
| 27 | #include "support/mask_utils.h" | ||
| 28 | #include "support/options.h" | ||
| 29 | #include "talp/perf_metrics.h" | ||
| 30 | #include "talp/regions.h" | ||
| 31 | #include "talp/talp_output.h" | ||
| 32 | #include "talp/talp_types.h" | ||
| 33 | #ifdef MPI_LIB | ||
| 34 | #include "mpi/mpi_core.h" | ||
| 35 | #endif | ||
| 36 | |||
| 37 | #include <stddef.h> | ||
| 38 | #include <stdio.h> | ||
| 39 | #include <unistd.h> | ||
| 40 | |||
| 41 | |||
| 42 | /*********************************************************************************/ | ||
| 43 | /* TALP Record in serial (non-MPI) mode */ | ||
| 44 | /*********************************************************************************/ | ||
| 45 | |||
| 46 | /* For any given monitor, record metrics considering only this (sub-)process */ | ||
| 47 | 1221 | void talp_record_monitor(const subprocess_descriptor_t *spd, | |
| 48 | const dlb_monitor_t *monitor) { | ||
| 49 | |||
| 50 |
2/2✓ Branch 0 taken 18 times.
✓ Branch 1 taken 1203 times.
|
1221 | if (spd->options.talp_summary != SUMMARY_NONE) { |
| 51 | 18 | talp_output_record_process_info(); | |
| 52 | } | ||
| 53 | |||
| 54 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1219 times.
|
1221 | if (spd->options.talp_summary & SUMMARY_PROCESS) { |
| 55 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | verbose(VB_TALP, "TALP process summary: recording region %s", monitor->name); |
| 56 | |||
| 57 | 2 | process_record_t process_record = { | |
| 58 | .rank = 0, | ||
| 59 | 2 | .pid = spd->id, | |
| 60 | .monitor = *monitor, | ||
| 61 | }; | ||
| 62 | |||
| 63 | /* Fill hostname and CPU mask strings in process_record */ | ||
| 64 | 2 | gethostname(process_record.hostname, HOST_NAME_MAX); | |
| 65 | 2 | snprintf(process_record.cpuset, TALP_OUTPUT_CPUSET_MAX, "%s", | |
| 66 | mu_to_str(&spd->process_mask)); | ||
| 67 | 2 | mu_get_quoted_mask(&spd->process_mask, process_record.cpuset_quoted, | |
| 68 | TALP_OUTPUT_CPUSET_MAX); | ||
| 69 | |||
| 70 | /* Add record */ | ||
| 71 | 2 | talp_output_record_process(monitor->name, &process_record, 1); | |
| 72 | } | ||
| 73 | |||
| 74 |
2/2✓ Branch 0 taken 18 times.
✓ Branch 1 taken 1203 times.
|
1221 | if (spd->options.talp_summary & SUMMARY_POP_METRICS) { |
| 75 |
2/2✓ Branch 0 taken 17 times.
✓ Branch 1 taken 1 times.
|
18 | if (monitor->elapsed_time > 0) { |
| 76 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 17 times.
|
17 | verbose(VB_TALP, "TALP summary: recording region %s", monitor->name); |
| 77 | |||
| 78 | pop_base_metrics_t base_metrics; | ||
| 79 | 17 | perf_metrics__local_monitor_into_base_metrics(&base_metrics, monitor); | |
| 80 | |||
| 81 | dlb_pop_metrics_t pop_metrics; | ||
| 82 | 17 | perf_metrics__base_to_pop_metrics(monitor->name, &base_metrics, &pop_metrics); | |
| 83 | 17 | talp_output_record_pop_metrics(&pop_metrics); | |
| 84 | |||
| 85 | 17 | talp_info_t *talp_info = spd->talp_info; | |
| 86 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 6 times.
|
17 | if(monitor == talp_info->monitor) { |
| 87 | 11 | talp_output_record_resources(monitor->num_cpus, | |
| 88 | /* num_nodes */ 1, /* num_ranks */ 0, base_metrics.num_gpus); | ||
| 89 | } | ||
| 90 | |||
| 91 | } else { | ||
| 92 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | verbose(VB_TALP, "TALP summary: recording empty region %s", monitor->name); |
| 93 | 1 | dlb_pop_metrics_t pop_metrics = {0}; | |
| 94 | 1 | snprintf(pop_metrics.name, DLB_MONITOR_NAME_MAX, "%s", monitor->name); | |
| 95 | 1 | talp_output_record_pop_metrics(&pop_metrics); | |
| 96 | } | ||
| 97 | } | ||
| 98 | 1221 | } | |
| 99 | |||
| 100 | |||
| 101 | /*********************************************************************************/ | ||
| 102 | /* TALP Record in MPI mode */ | ||
| 103 | /*********************************************************************************/ | ||
| 104 | |||
| 105 | #if MPI_LIB | ||
| 106 | |||
| 107 | /* Compute Node summary of all Global Monitors and record data */ | ||
| 108 | void talp_record_node_summary(const subprocess_descriptor_t *spd) { | ||
| 109 | node_record_t *node_summary = NULL; | ||
| 110 | size_t node_summary_size = 0; | ||
| 111 | |||
| 112 | /* Perform a barrier so that all processes in the node have arrived at the | ||
| 113 | * MPI_Finalize */ | ||
| 114 | node_barrier(spd, NULL); | ||
| 115 | |||
| 116 | /* Node process 0 reduces all global regions from all processes in the node */ | ||
| 117 | if (_process_id == 0) { | ||
| 118 | /* Obtain a list of regions associated with the Global Region Name, sorted by PID */ | ||
| 119 | int max_procs = mu_get_system_size(); | ||
| 120 | talp_region_list_t *region_list = malloc(max_procs * sizeof(talp_region_list_t)); | ||
| 121 | int nelems; | ||
| 122 | shmem_talp__get_regionlist(region_list, &nelems, max_procs, region_get_global_name()); | ||
| 123 | |||
| 124 | /* Allocate and initialize node summary structure */ | ||
| 125 | node_summary_size = sizeof(node_record_t) + sizeof(process_in_node_record_t) * nelems; | ||
| 126 | node_summary = malloc(node_summary_size); | ||
| 127 | *node_summary = (const node_record_t) { | ||
| 128 | .node_id = _node_id, | ||
| 129 | .nelems = nelems, | ||
| 130 | }; | ||
| 131 | |||
| 132 | /* Iterate the PID list and gather times of every process */ | ||
| 133 | for (int i = 0; i < nelems; ++i) { | ||
| 134 | int64_t mpi_time = region_list[i].mpi_time; | ||
| 135 | int64_t useful_time = region_list[i].useful_time; | ||
| 136 | |||
| 137 | /* Save times in local structure */ | ||
| 138 | node_summary->processes[i].pid = region_list[i].pid; | ||
| 139 | node_summary->processes[i].mpi_time = mpi_time; | ||
| 140 | node_summary->processes[i].useful_time = useful_time; | ||
| 141 | |||
| 142 | /* Accumulate total and max values */ | ||
| 143 | node_summary->avg_useful_time += useful_time; | ||
| 144 | node_summary->avg_mpi_time += mpi_time; | ||
| 145 | node_summary->max_useful_time = max_int64(useful_time, node_summary->max_useful_time); | ||
| 146 | node_summary->max_mpi_time = max_int64(mpi_time, node_summary->max_mpi_time); | ||
| 147 | } | ||
| 148 | free(region_list); | ||
| 149 | |||
| 150 | /* Compute average values */ | ||
| 151 | node_summary->avg_useful_time /= node_summary->nelems; | ||
| 152 | node_summary->avg_mpi_time /= node_summary->nelems; | ||
| 153 | } | ||
| 154 | |||
| 155 | /* Perform a final barrier so that all processes let the _process_id 0 to | ||
| 156 | * gather all the data */ | ||
| 157 | node_barrier(spd, NULL); | ||
| 158 | |||
| 159 | /* All main processes from each node send data to rank 0 */ | ||
| 160 | if (_process_id == 0) { | ||
| 161 | verbose(VB_TALP, "Node summary: gathering data"); | ||
| 162 | |||
| 163 | /* MPI type: int64_t */ | ||
| 164 | MPI_Datatype mpi_int64_type = get_mpi_int64_type(); | ||
| 165 | |||
| 166 | /* MPI type: pid_t */ | ||
| 167 | MPI_Datatype mpi_pid_type; | ||
| 168 | PMPI_Type_match_size(MPI_TYPECLASS_INTEGER, sizeof(pid_t), &mpi_pid_type); | ||
| 169 | |||
| 170 | /* MPI struct type: process_in_node_record_t */ | ||
| 171 | MPI_Datatype mpi_process_info_type; | ||
| 172 | { | ||
| 173 | int count = 3; | ||
| 174 | int blocklengths[] = {1, 1, 1}; | ||
| 175 | MPI_Aint displacements[] = { | ||
| 176 | offsetof(process_in_node_record_t, pid), | ||
| 177 | offsetof(process_in_node_record_t, mpi_time), | ||
| 178 | offsetof(process_in_node_record_t, useful_time)}; | ||
| 179 | MPI_Datatype types[] = {mpi_pid_type, mpi_int64_type, mpi_int64_type}; | ||
| 180 | MPI_Datatype tmp_type; | ||
| 181 | PMPI_Type_create_struct(count, blocklengths, displacements, types, &tmp_type); | ||
| 182 | PMPI_Type_create_resized(tmp_type, 0, sizeof(process_in_node_record_t), | ||
| 183 | &mpi_process_info_type); | ||
| 184 | PMPI_Type_commit(&mpi_process_info_type); | ||
| 185 | } | ||
| 186 | |||
| 187 | /* MPI struct type: node_record_t */ | ||
| 188 | MPI_Datatype mpi_node_record_type;; | ||
| 189 | { | ||
| 190 | int count = 7; | ||
| 191 | int blocklengths[] = {1, 1, 1, 1, 1, 1, node_summary->nelems}; | ||
| 192 | MPI_Aint displacements[] = { | ||
| 193 | offsetof(node_record_t, node_id), | ||
| 194 | offsetof(node_record_t, nelems), | ||
| 195 | offsetof(node_record_t, avg_useful_time), | ||
| 196 | offsetof(node_record_t, avg_mpi_time), | ||
| 197 | offsetof(node_record_t, max_useful_time), | ||
| 198 | offsetof(node_record_t, max_mpi_time), | ||
| 199 | offsetof(node_record_t, processes)}; | ||
| 200 | MPI_Datatype types[] = {MPI_INT, MPI_INT, mpi_int64_type, mpi_int64_type, | ||
| 201 | mpi_int64_type, mpi_int64_type, mpi_process_info_type}; | ||
| 202 | MPI_Datatype tmp_type; | ||
| 203 | PMPI_Type_create_struct(count, blocklengths, displacements, types, &tmp_type); | ||
| 204 | PMPI_Type_create_resized(tmp_type, 0, node_summary_size, &mpi_node_record_type); | ||
| 205 | PMPI_Type_commit(&mpi_node_record_type); | ||
| 206 | } | ||
| 207 | |||
| 208 | /* Gather data */ | ||
| 209 | void *recvbuf = NULL; | ||
| 210 | if (_mpi_rank == 0) { | ||
| 211 | recvbuf = malloc(_num_nodes * node_summary_size); | ||
| 212 | } | ||
| 213 | PMPI_Gather(node_summary, 1, mpi_node_record_type, | ||
| 214 | recvbuf, 1, mpi_node_record_type, | ||
| 215 | 0, getInterNodeComm()); | ||
| 216 | |||
| 217 | /* Free send buffer and MPI Datatypes */ | ||
| 218 | free(node_summary); | ||
| 219 | PMPI_Type_free(&mpi_process_info_type); | ||
| 220 | PMPI_Type_free(&mpi_node_record_type); | ||
| 221 | |||
| 222 | /* Add records */ | ||
| 223 | if (_mpi_rank == 0) { | ||
| 224 | for (int node_id = 0; node_id < _num_nodes; ++node_id) { | ||
| 225 | verbose(VB_TALP, "Node summary: recording node %d", node_id); | ||
| 226 | node_record_t *node_record = (node_record_t*)( | ||
| 227 | (unsigned char *)recvbuf + node_summary_size * node_id); | ||
| 228 | ensure( node_id == node_record->node_id, "Node id error in %s", __func__ ); | ||
| 229 | talp_output_record_node(node_record); | ||
| 230 | } | ||
| 231 | free(recvbuf); | ||
| 232 | } | ||
| 233 | } | ||
| 234 | } | ||
| 235 | |||
| 236 | /* Gather PROCESS data of a monitor among all ranks and record it in rank 0 */ | ||
| 237 | void talp_record_process_summary(const subprocess_descriptor_t *spd, | ||
| 238 | const dlb_monitor_t *monitor) { | ||
| 239 | |||
| 240 | /* Internal monitors will not be recorded */ | ||
| 241 | if (((monitor_data_t*)monitor->_data)->flags.internal) { | ||
| 242 | return; | ||
| 243 | } | ||
| 244 | |||
| 245 | if (_mpi_rank == 0) { | ||
| 246 | verbose(VB_TALP, "Process summary: gathering region %s", monitor->name); | ||
| 247 | } | ||
| 248 | |||
| 249 | process_record_t process_record_send = { | ||
| 250 | .rank = _mpi_rank, | ||
| 251 | .pid = spd->id, | ||
| 252 | .node_id = _node_id, | ||
| 253 | .monitor = *monitor, | ||
| 254 | }; | ||
| 255 | |||
| 256 | /* Invalidate pointers of the copied monitor */ | ||
| 257 | process_record_send.monitor.name = NULL; | ||
| 258 | process_record_send.monitor._data = NULL; | ||
| 259 | |||
| 260 | /* Fill hostname and CPU mask strings in process_record_send */ | ||
| 261 | gethostname(process_record_send.hostname, HOST_NAME_MAX); | ||
| 262 | snprintf(process_record_send.cpuset, TALP_OUTPUT_CPUSET_MAX, "%s", | ||
| 263 | mu_to_str(&spd->process_mask)); | ||
| 264 | mu_get_quoted_mask(&spd->process_mask, process_record_send.cpuset_quoted, | ||
| 265 | TALP_OUTPUT_CPUSET_MAX); | ||
| 266 | |||
| 267 | /* MPI type: int64_t */ | ||
| 268 | MPI_Datatype mpi_int64_type = get_mpi_int64_type(); | ||
| 269 | |||
| 270 | /* MPI type: pid_t */ | ||
| 271 | MPI_Datatype mpi_pid_type; | ||
| 272 | PMPI_Type_match_size(MPI_TYPECLASS_INTEGER, sizeof(pid_t), &mpi_pid_type); | ||
| 273 | |||
| 274 | /* Note: obviously, it doesn't make sense to send addresses via MPI, but we | ||
| 275 | * are sending the whole dlb_monitor_t, so... Addresses are discarded | ||
| 276 | * either way. */ | ||
| 277 | |||
| 278 | /* MPI type: void* */ | ||
| 279 | MPI_Datatype address_type; | ||
| 280 | PMPI_Type_match_size(MPI_TYPECLASS_INTEGER, sizeof(void*), &address_type); | ||
| 281 | |||
| 282 | /* MPI struct type: dlb_monitor_t */ | ||
| 283 | MPI_Datatype mpi_dlb_monitor_type; | ||
| 284 | { | ||
| 285 | int blocklengths[] = { | ||
| 286 | 1, 1, 1, /* Name + Resources: num_cpus, avg_cpus */ | ||
| 287 | 1, 1, /* Hardware counters: cycles, instructions */ | ||
| 288 | 1, 1, 1, 1, 1, 1, /* Statistics: num_* */ | ||
| 289 | 1, 1, /* Monitor Start and Stop times */ | ||
| 290 | 1, 1, 1, 1, 1, 1, 1, /* Host Times */ | ||
| 291 | 1, 1, 1, /* Device Times */ | ||
| 292 | 1}; /* _data */ | ||
| 293 | |||
| 294 | enum {count = sizeof(blocklengths) / sizeof(blocklengths[0])}; | ||
| 295 | |||
| 296 | MPI_Aint displacements[] = { | ||
| 297 | offsetof(dlb_monitor_t, name), | ||
| 298 | /* Resources */ | ||
| 299 | offsetof(dlb_monitor_t, num_cpus), | ||
| 300 | offsetof(dlb_monitor_t, avg_cpus), | ||
| 301 | /* Hardware counters */ | ||
| 302 | offsetof(dlb_monitor_t, cycles), | ||
| 303 | offsetof(dlb_monitor_t, instructions), | ||
| 304 | /* Statistics */ | ||
| 305 | offsetof(dlb_monitor_t, num_measurements), | ||
| 306 | offsetof(dlb_monitor_t, num_resets), | ||
| 307 | offsetof(dlb_monitor_t, num_mpi_calls), | ||
| 308 | offsetof(dlb_monitor_t, num_omp_parallels), | ||
| 309 | offsetof(dlb_monitor_t, num_omp_tasks), | ||
| 310 | offsetof(dlb_monitor_t, num_gpu_runtime_calls), | ||
| 311 | /* Monitor Start and Stop times */ | ||
| 312 | offsetof(dlb_monitor_t, start_time), | ||
| 313 | offsetof(dlb_monitor_t, stop_time), | ||
| 314 | /* Host Times */ | ||
| 315 | offsetof(dlb_monitor_t, elapsed_time), | ||
| 316 | offsetof(dlb_monitor_t, useful_time), | ||
| 317 | offsetof(dlb_monitor_t, mpi_time), | ||
| 318 | offsetof(dlb_monitor_t, omp_load_imbalance_time), | ||
| 319 | offsetof(dlb_monitor_t, omp_scheduling_time), | ||
| 320 | offsetof(dlb_monitor_t, omp_serialization_time), | ||
| 321 | offsetof(dlb_monitor_t, gpu_runtime_time), | ||
| 322 | /* Device Times */ | ||
| 323 | offsetof(dlb_monitor_t, gpu_useful_time), | ||
| 324 | offsetof(dlb_monitor_t, gpu_communication_time), | ||
| 325 | offsetof(dlb_monitor_t, gpu_inactive_time), | ||
| 326 | /* _data */ | ||
| 327 | offsetof(dlb_monitor_t, _data)}; | ||
| 328 | |||
| 329 | MPI_Datatype types[] = { | ||
| 330 | address_type, MPI_INT, MPI_FLOAT, /* Name + Resources: num_cpus, avg_cpus */ | ||
| 331 | mpi_int64_type, mpi_int64_type, /* Hardware counters: cycles, instructions */ | ||
| 332 | MPI_INT, MPI_INT, | ||
| 333 | mpi_int64_type, mpi_int64_type, | ||
| 334 | mpi_int64_type, mpi_int64_type, /* Statistics: num_* */ | ||
| 335 | mpi_int64_type, mpi_int64_type, /* Monitor Start and Stop times */ | ||
| 336 | mpi_int64_type, mpi_int64_type, | ||
| 337 | mpi_int64_type, mpi_int64_type, | ||
| 338 | mpi_int64_type, mpi_int64_type, | ||
| 339 | mpi_int64_type, /* Host Times */ | ||
| 340 | mpi_int64_type, mpi_int64_type, | ||
| 341 | mpi_int64_type, /* Device Times */ | ||
| 342 | address_type}; /* _data */ | ||
| 343 | |||
| 344 | MPI_Datatype tmp_type; | ||
| 345 | PMPI_Type_create_struct(count, blocklengths, displacements, types, &tmp_type); | ||
| 346 | PMPI_Type_create_resized(tmp_type, 0, sizeof(dlb_monitor_t), &mpi_dlb_monitor_type); | ||
| 347 | PMPI_Type_commit(&mpi_dlb_monitor_type); | ||
| 348 | |||
| 349 | static_ensure(sizeof(blocklengths)/sizeof(blocklengths[0]) == count, | ||
| 350 | "blocklengths size mismatch"); | ||
| 351 | static_ensure(sizeof(displacements)/sizeof(displacements[0]) == count, | ||
| 352 | "displacements size mismatch"); | ||
| 353 | static_ensure(sizeof(types)/sizeof(types[0]) == count, | ||
| 354 | "types size mismatch"); | ||
| 355 | } | ||
| 356 | |||
| 357 | /* MPI struct type: process_record_t */ | ||
| 358 | MPI_Datatype mpi_process_record_type; | ||
| 359 | { | ||
| 360 | int count = 7; | ||
| 361 | int blocklengths[] = {1, 1, 1, HOST_NAME_MAX, | ||
| 362 | TALP_OUTPUT_CPUSET_MAX, TALP_OUTPUT_CPUSET_MAX, 1}; | ||
| 363 | MPI_Aint displacements[] = { | ||
| 364 | offsetof(process_record_t, rank), | ||
| 365 | offsetof(process_record_t, pid), | ||
| 366 | offsetof(process_record_t, node_id), | ||
| 367 | offsetof(process_record_t, hostname), | ||
| 368 | offsetof(process_record_t, cpuset), | ||
| 369 | offsetof(process_record_t, cpuset_quoted), | ||
| 370 | offsetof(process_record_t, monitor)}; | ||
| 371 | MPI_Datatype types[] = {MPI_INT, mpi_pid_type, MPI_INT, MPI_CHAR, MPI_CHAR, | ||
| 372 | MPI_CHAR, mpi_dlb_monitor_type}; | ||
| 373 | MPI_Datatype tmp_type; | ||
| 374 | PMPI_Type_create_struct(count, blocklengths, displacements, types, &tmp_type); | ||
| 375 | PMPI_Type_create_resized(tmp_type, 0, sizeof(process_record_t), | ||
| 376 | &mpi_process_record_type); | ||
| 377 | PMPI_Type_commit(&mpi_process_record_type); | ||
| 378 | } | ||
| 379 | |||
| 380 | /* Gather data */ | ||
| 381 | process_record_t *recvbuf = NULL; | ||
| 382 | if (_mpi_rank == 0) { | ||
| 383 | recvbuf = malloc(_mpi_size * sizeof(process_record_t)); | ||
| 384 | } | ||
| 385 | PMPI_Gather(&process_record_send, 1, mpi_process_record_type, | ||
| 386 | recvbuf, 1, mpi_process_record_type, | ||
| 387 | 0, getWorldComm()); | ||
| 388 | |||
| 389 | /* Add records */ | ||
| 390 | if (_mpi_rank == 0) { | ||
| 391 | for (int rank = 0; rank < _mpi_size; ++rank) { | ||
| 392 | verbose(VB_TALP, "Process summary: recording region %s on rank %d", | ||
| 393 | monitor->name, rank); | ||
| 394 | talp_output_record_process(monitor->name, &recvbuf[rank], _mpi_size); | ||
| 395 | } | ||
| 396 | free(recvbuf); | ||
| 397 | } | ||
| 398 | |||
| 399 | /* Free MPI types */ | ||
| 400 | PMPI_Type_free(&mpi_dlb_monitor_type); | ||
| 401 | PMPI_Type_free(&mpi_process_record_type); | ||
| 402 | } | ||
| 403 | |||
| 404 | /* Gather POP METRICS data of a monitor among all ranks and record it in rank 0 */ | ||
| 405 | void talp_record_pop_summary(const subprocess_descriptor_t *spd, | ||
| 406 | const dlb_monitor_t *monitor) { | ||
| 407 | |||
| 408 | /* Internal monitors will not be recorded */ | ||
| 409 | if (((monitor_data_t*)monitor->_data)->flags.internal) { | ||
| 410 | return; | ||
| 411 | } | ||
| 412 | |||
| 413 | if (_mpi_rank == 0) { | ||
| 414 | verbose(VB_TALP, "TALP summary: gathering region %s", monitor->name); | ||
| 415 | } | ||
| 416 | |||
| 417 | talp_info_t *talp_info = spd->talp_info; | ||
| 418 | |||
| 419 | /* Reduce monitor among all MPI ranks into MPI rank 0 */ | ||
| 420 | pop_base_metrics_t base_metrics; | ||
| 421 | perf_metrics__reduce_monitor_into_base_metrics(&base_metrics, monitor, false); | ||
| 422 | |||
| 423 | if (_mpi_rank == 0) { | ||
| 424 | if (base_metrics.elapsed_time > 0) { | ||
| 425 | |||
| 426 | /* Only the global region records the resources */ | ||
| 427 | if (monitor == talp_info->monitor) { | ||
| 428 | talp_output_record_resources(base_metrics.num_cpus, | ||
| 429 | base_metrics.num_nodes, base_metrics.num_mpi_ranks, | ||
| 430 | base_metrics.num_gpus); | ||
| 431 | } | ||
| 432 | |||
| 433 | /* Construct pop_metrics out of base metrics */ | ||
| 434 | dlb_pop_metrics_t pop_metrics; | ||
| 435 | perf_metrics__base_to_pop_metrics(monitor->name, &base_metrics, &pop_metrics); | ||
| 436 | |||
| 437 | /* Record */ | ||
| 438 | verbose(VB_TALP, "TALP summary: recording region %s", monitor->name); | ||
| 439 | talp_output_record_pop_metrics(&pop_metrics); | ||
| 440 | |||
| 441 | } else { | ||
| 442 | /* Record empty */ | ||
| 443 | verbose(VB_TALP, "TALP summary: recording empty region %s", monitor->name); | ||
| 444 | dlb_pop_metrics_t pop_metrics = {0}; | ||
| 445 | snprintf(pop_metrics.name, DLB_MONITOR_NAME_MAX, "%s", monitor->name); | ||
| 446 | talp_output_record_pop_metrics(&pop_metrics); | ||
| 447 | } | ||
| 448 | } | ||
| 449 | } | ||
| 450 | |||
| 451 | #endif /* MPI_LIB */ | ||
| 452 |