GCC Code Coverage Report


Directory: src/
File: src/talp/talp_mpi.c
Date: 2026-06-05 08:54:23
Exec Total Coverage
Lines: 42 42 100.0%
Functions: 4 4 100.0%
Branches: 23 36 63.9%

Line Branch Exec Source
1 /*********************************************************************************/
2 /* Copyright 2009-2026 Barcelona Supercomputing Center */
3 /* */
4 /* This file is part of the DLB library. */
5 /* */
6 /* DLB is free software: you can redistribute it and/or modify */
7 /* it under the terms of the GNU Lesser General Public License as published by */
8 /* the Free Software Foundation, either version 3 of the License, or */
9 /* (at your option) any later version. */
10 /* */
11 /* DLB is distributed in the hope that it will be useful, */
12 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
13 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
14 /* GNU Lesser General Public License for more details. */
15 /* */
16 /* You should have received a copy of the GNU Lesser General Public License */
17 /* along with DLB. If not, see <https://www.gnu.org/licenses/>. */
18 /*********************************************************************************/
19
20 #include "talp/talp_mpi.h"
21
22 #include "LB_comm/shmem_talp.h"
23 #include "LB_core/node_barrier.h"
24 #include "LB_core/spd.h"
25 #include "apis/dlb_talp.h"
26 #include "support/atomic.h"
27 #include "support/debug.h"
28 #include "support/mask_utils.h"
29 #include "talp/regions.h"
30 #include "talp/sample.h"
31 #include "talp/talp.h"
32 #include "talp/talp_record.h"
33 #include "talp/talp_types.h"
34 #ifdef MPI_LIB
35 #include "mpi/mpi_core.h"
36 #endif
37
38 #include <stdio.h>
39 #include <string.h>
40
41 extern __thread bool thread_is_observer;
42
43
44 #ifdef MPI_LIB
45 /* Communicate among all MPI processes so that everyone has the same monitoring regions */
46 static void talp_register_common_mpi_regions(const subprocess_descriptor_t *spd) {
47 /* Note: there's a potential race condition if this function is called
48 * (which happens on talp_mpi_finalize or talp_finalize) while another
49 * thread creates a monitoring region. The solution would be to lock the
50 * entire routine and call a specialized registering function that does not
51 * lock, or use a recursive lock. The situation is strange enough to not
52 * support it */
53
54 talp_info_t *talp_info = spd->talp_info;
55
56 /* Warn about open regions */
57 for (GSList *node = talp_info->open_regions;
58 node != NULL;
59 node = node->next) {
60 const dlb_monitor_t *monitor = node->data;
61 warning("Region %s is still open during MPI_Finalize."
62 " Collected data may be incomplete.",
63 monitor->name);
64 }
65
66 /* Gather recvcounts for each process
67 * (Each process may have different number of monitors) */
68 int nregions = g_tree_nnodes(talp_info->regions);
69 int chars_to_send = nregions * DLB_MONITOR_NAME_MAX;
70 int *recvcounts = malloc(_mpi_size * sizeof(int));
71 PMPI_Allgather(&chars_to_send, 1, MPI_INT,
72 recvcounts, 1, MPI_INT, getWorldComm());
73
74 /* Compute total characters to gather via MPI */
75 int i;
76 int total_chars = 0;
77 for (i=0; i<_mpi_size; ++i) {
78 total_chars += recvcounts[i];
79 }
80
81 if (total_chars > 0) {
82 /* Prepare sendbuffer */
83 char *sendbuffer = malloc(nregions * DLB_MONITOR_NAME_MAX * sizeof(char));
84 char *sendptr = sendbuffer;
85 for (GTreeNode *node = g_tree_node_first(talp_info->regions);
86 node != NULL;
87 node = g_tree_node_next(node)) {
88 const dlb_monitor_t *monitor = g_tree_node_value(node);
89 strcpy(sendptr, monitor->name);
90 sendptr += DLB_MONITOR_NAME_MAX;
91 }
92
93 /* Prepare recvbuffer */
94 char *recvbuffer = malloc(total_chars * sizeof(char));
95
96 /* Compute displacements */
97 int *displs = malloc(_mpi_size * sizeof(int));
98 int next_disp = 0;
99 for (i=0; i<_mpi_size; ++i) {
100 displs[i] = next_disp;
101 next_disp += recvcounts[i];
102 }
103
104 /* Gather all regions */
105 PMPI_Allgatherv(sendbuffer, nregions * DLB_MONITOR_NAME_MAX, MPI_CHAR,
106 recvbuffer, recvcounts, displs, MPI_CHAR, getWorldComm());
107
108 /* Register all regions. Existing ones will be skipped. */
109 for (i=0; i<total_chars; i+=DLB_MONITOR_NAME_MAX) {
110 region_register(spd, &recvbuffer[i]);
111 }
112
113 free(sendbuffer);
114 free(recvbuffer);
115 free(displs);
116 }
117
118 free(recvcounts);
119 }
120 #endif
121
122
123 /*********************************************************************************/
124 /* TALP MPI functions */
125 /*********************************************************************************/
126
127 /* Start global monitoring region (if not already started) */
128 9 void talp_mpi_init(const subprocess_descriptor_t *spd) {
129
130
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 9 times.
9 ensure(!thread_is_observer, "An observer thread cannot call talp_mpi_init");
131
132 9 talp_info_t *talp_info = spd->talp_info;
133
134
1/2
✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
9 if (talp_info) {
135 9 talp_info->flags.have_mpi = true;
136
137 /* Start global region (no-op if already started) */
138 9 region_start(spd, talp_info->monitor);
139
140 /* Add MPI_Init statistic and set useful state */
141 9 talp_sample_t *sample = talp_sample_get(talp_info);
142 9 DLB_ATOMIC_ADD_RLX(&sample->stats.num_mpi_calls, 1);
143 9 talp_sample_set_state(talp_info, TALP_STATE_USEFUL);
144 }
145 9 }
146
147 /* Stop global monitoring region and gather APP data if needed */
148 6 void talp_mpi_finalize(const subprocess_descriptor_t *spd) {
149
150
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 ensure(!thread_is_observer, "An observer thread cannot call talp_mpi_finalize");
151
152 6 talp_info_t *talp_info = spd->talp_info;
153
154
2/4
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 6 times.
6 if (talp_info == NULL || !talp_info->flags.have_mpi) return;
155
156 #ifdef MPI_LIB
157 /* We also need to measure the waiting time of an MPI_Finalize.
158 * For this, we call an MPI_Barrier and the appropriate TALP functions.
159 * The num_mpi_calls variable is also incremented inside those. */
160 sync_call_flags_t flags = { .is_blocking = true, .is_collective = true };
161 talp_into_sync_call(spd, flags);
162 PMPI_Barrier(getWorldComm());
163 talp_out_of_sync_call(spd, flags);
164 #else
165 /* Add MPI_Finalize to the number of MPI calls.
166 * Even though talp_mpi_finalize should never be called if no MPI_LIB,
167 * we keep this case for testing purposes. */
168 6 talp_sample_t *sample = talp_sample_get(talp_info);
169 6 DLB_ATOMIC_ADD_RLX(&sample->stats.num_mpi_calls, 1);
170 #endif
171
172 /* Stop global region */
173 6 region_stop(spd, talp_info->monitor);
174
175 6 monitor_data_t *monitor_data = talp_info->monitor->_data;
176
177 /* Update shared memory values */
178
3/4
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.
6 if (talp_info->flags.have_shmem || talp_info->flags.have_minimal_shmem) {
179 // TODO: is it needed? isn't it updated when stopped?
180 1 shmem_talp__set_times(monitor_data->node_shared_id,
181 1 talp_info->monitor->mpi_time,
182 1 talp_info->monitor->useful_time);
183 }
184
185 /* If TALP partial output is enabled, metrics are not merged here.
186 * Output is written per process in talp_finalize() */
187
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 if (spd->options.talp_partial_output) return;
188
189 #ifdef MPI_LIB
190 /* If performing any kind of TALP summary, check that the number of processes
191 * registered in the shared memory matches with the number of MPI processes in the node.
192 * This check is needed to avoid deadlocks on finalize. */
193 if (spd->options.talp_summary) {
194 verbose(VB_TALP, "Gathering TALP metrics");
195 /* FIXME: use Named Barrier */
196 /* if (shmem_barrier__get_num_participants(spd->options.barrier_id) == _mpis_per_node) { */
197
198 /* Gather data among processes in the node if node summary is enabled */
199 if (spd->options.talp_summary & SUMMARY_NODE) {
200 talp_record_node_summary(spd);
201 }
202
203 /* Gather data among MPIs if any of these summaries is enabled */
204 if (spd->options.talp_summary
205 & (SUMMARY_POP_METRICS | SUMMARY_PROCESS)) {
206 /* Ensure everyone has the same monitoring regions */
207 talp_register_common_mpi_regions(spd);
208
209 /* Finally, reduce data */
210 for (GTreeNode *node = g_tree_node_first(talp_info->regions);
211 node != NULL;
212 node = g_tree_node_next(node)) {
213 const dlb_monitor_t *monitor = g_tree_node_value(node);
214 if (spd->options.talp_summary & SUMMARY_POP_METRICS) {
215 talp_record_pop_summary(spd, monitor);
216 }
217 if (spd->options.talp_summary & SUMMARY_PROCESS) {
218 talp_record_process_summary(spd, monitor);
219 }
220 }
221 }
222
223 /* Synchronize all processes in node before continuing with DLB finalization */
224 node_barrier(spd, NULL);
225 /* } else { */
226 /* warning("The number of MPI processes and processes registered in DLB differ." */
227 /* " TALP will not print any summary."); */
228 /* } */
229 }
230 #endif
231 }
232
233 8 void talp_into_sync_call(const subprocess_descriptor_t *spd, sync_call_flags_t flags) {
234
235 /* Observer threads may call MPI functions, but TALP must ignore them */
236
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 7 times.
8 if (unlikely(thread_is_observer)) return;
237
238 7 talp_info_t *talp_info = spd->talp_info;
239
240
2/4
✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 7 times.
7 if (talp_info == NULL || !talp_info->flags.have_mpi) return;
241
242 /* Update sample */
243 7 talp_sample_update(talp_info);
244
245 /* Into Sync call -> not_useful_mpi */
246 7 talp_sample_set_state(talp_info, TALP_STATE_NOT_USEFUL_MPI);
247 }
248
249 8 void talp_out_of_sync_call(const subprocess_descriptor_t *spd, sync_call_flags_t flags) {
250
251 /* Observer threads may call MPI functions, but TALP must ignore them */
252
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 7 times.
8 if (unlikely(thread_is_observer)) return;
253
254 7 talp_info_t *talp_info = spd->talp_info;
255
256
2/4
✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 7 times.
7 if (talp_info == NULL || !talp_info->flags.have_mpi) return;
257
258 /* Update sample */
259 7 talp_sample_update(talp_info);
260
261 /* Add statistic */
262 7 talp_sample_t *sample = talp_sample_get(talp_info);
263 7 DLB_ATOMIC_ADD_RLX(&sample->stats.num_mpi_calls, 1);
264
265 /* Out of Sync call -> useful */
266 7 talp_sample_set_state(talp_info, TALP_STATE_USEFUL);
267
268 /* Only when needed, update all regions */
269
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 3 times.
7 if (talp_info->flags.external_profiler
270
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 && talp_sample_is_main()
271
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 3 times.
4 && flags.is_blocking
272
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 && flags.is_collective) {
273 1 talp_aggregate_samples_to_regions(talp_info);
274 }
275 }
276