GCC Code Coverage Report


Directory: src/
File: src/LB_core/node_barrier.c
Date: 2024-11-22 17:07:10
Exec Total Coverage
Lines: 159 166 95.8%
Functions: 7 7 100.0%
Branches: 87 112 77.7%

Line Branch Exec Source
1 /*********************************************************************************/
2 /* Copyright 2009-2024 Barcelona Supercomputing Center */
3 /* */
4 /* This file is part of the DLB library. */
5 /* */
6 /* DLB is free software: you can redistribute it and/or modify */
7 /* it under the terms of the GNU Lesser General Public License as published by */
8 /* the Free Software Foundation, either version 3 of the License, or */
9 /* (at your option) any later version. */
10 /* */
11 /* DLB is distributed in the hope that it will be useful, */
12 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
13 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
14 /* GNU Lesser General Public License for more details. */
15 /* */
16 /* You should have received a copy of the GNU Lesser General Public License */
17 /* along with DLB. If not, see <https://www.gnu.org/licenses/>. */
18 /*********************************************************************************/
19
20 #ifdef HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23
24 #include "LB_core/node_barrier.h"
25
26 #include "apis/dlb_errors.h"
27 #include "apis/dlb_types.h"
28 #include "LB_core/spd.h"
29 #include "LB_comm/shmem_barrier.h"
30 #include "support/debug.h"
31 #include "support/tracing.h"
32
33 #include <pthread.h>
34 #include <stdlib.h>
35 #include <string.h>
36
37 /* Per process, private Barrier data */
38 typedef struct barrier_info {
39 char default_barrier_name[BARRIER_NAME_MAX]; /* only to keep compatibility with --barrier-id */
40 barrier_t *default_barrier;
41 barrier_t **barrier_list;
42 int max_barriers;
43 } barrier_info_t;
44
45 static const char *default_barrier_name = "default";
46 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
47
48 /* Parse, for the specific barrier, whether it should do LeWI based on:
49 * - if barrier_name == default_barrier_name:
50 * if lewi_barrier and !lewi_barrier_select;
51 * or "default" in lewi_barrier_select
52 * - else:
53 * if barrier_flags has LEWI;
54 * or if barrier_flags has SELECTIVE and name in lewi_barrier_select
55 */
56 112 static bool parse_lewi_barrier(const char *barrier_name, bool lewi_barrier,
57 const char *lewi_barrier_select, int api_flags) {
58
2/2
✓ Branch 0 taken 82 times.
✓ Branch 1 taken 30 times.
112 if (strncmp(barrier_name, default_barrier_name, BARRIER_NAME_MAX) == 0) {
59
2/2
✓ Branch 0 taken 70 times.
✓ Branch 1 taken 12 times.
82 if (strlen(lewi_barrier_select) == 0) {
60 /* Default barrier: --lewi-barrier-select not set, --lewi-barrier dictates */
61 70 return lewi_barrier;
62 }
63 } else {
64
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 25 times.
30 if (api_flags == DLB_BARRIER_LEWI_ON) {
65 /* Named barrier: LeWI is forced by API */
66 5 return true;
67
2/2
✓ Branch 0 taken 19 times.
✓ Branch 1 taken 6 times.
25 } else if (api_flags == DLB_BARRIER_LEWI_OFF) {
68 /* Named barrier: LeWI is disallowed by API */
69 19 return false;
70 }
71 }
72
73 /* Find barrier_name in --lewi-barrier-select */
74 18 size_t len = strlen(lewi_barrier_select);
75
2/2
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 2 times.
18 if (len > 0) {
76 16 bool found_in_select = false;
77 16 char *barrier_select_copy = malloc(sizeof(char)*(len+1));
78 16 strcpy(barrier_select_copy, lewi_barrier_select);
79 char *saveptr;
80 16 char *token = strtok_r(barrier_select_copy, ",", &saveptr);
81
2/2
✓ Branch 0 taken 24 times.
✓ Branch 1 taken 8 times.
32 while (token) {
82
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 16 times.
24 if (strcmp(token, barrier_name) == 0) {
83 8 found_in_select = true;
84 8 break;
85 }
86 /* next token */
87 16 token = strtok_r(NULL, ",", &saveptr);
88 }
89 16 free(barrier_select_copy);
90
91 16 return found_in_select;
92 }
93
94 2 return false;
95 }
96
97 81 void node_barrier_init(subprocess_descriptor_t *spd) {
98
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 81 times.
81 if (spd->barrier_info != NULL) {
99 fatal("Cannot initialize Node Barrier, barrier_info not NULL\n"
100 "Please, report bug at " PACKAGE_BUGREPORT);
101 }
102
103 /* Even though default_barrier_name may change, no harm to use it here
104 * because we parse the user's options. */
105 81 bool lewi_barrier = parse_lewi_barrier(default_barrier_name,
106 81 spd->options.lewi_barrier, spd->options.lewi_barrier_select, 0);
107
108 barrier_info_t *barrier_info;
109 81 pthread_mutex_lock(&mutex);
110 {
111 /* Initialize barrier_info */
112 81 barrier_info = malloc(sizeof(barrier_info_t));
113 81 *barrier_info = (const barrier_info_t){};
114 81 spd->barrier_info = barrier_info;
115
116 /* --barrier-id may be deprecated in the future, but for now we just modify
117 * the default barrier name so that processes with different barrier id's
118 * don't synchronize with each other. */
119
1/2
✓ Branch 0 taken 81 times.
✗ Branch 1 not taken.
81 if (spd->options.barrier_id == 0) {
120 81 sprintf(barrier_info->default_barrier_name, "%s", default_barrier_name);
121 } else {
122 snprintf(barrier_info->default_barrier_name, BARRIER_NAME_MAX,
123 "default (id: %d)", spd->options.barrier_id);
124 }
125
126 /* Initialize default barrier */
127 162 barrier_info->default_barrier = shmem_barrier__register(
128 81 barrier_info->default_barrier_name, lewi_barrier);
129
130 /* Initialize barrier_list */
131 81 barrier_info->max_barriers = shmem_barrier__get_max_barriers();
132 81 barrier_info->barrier_list = calloc(barrier_info->max_barriers, sizeof(void*));
133 }
134 81 pthread_mutex_unlock(&mutex);
135
136
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 81 times.
81 if (barrier_info->default_barrier == NULL) {
137 warning("DLB system barrier could nout be initialized");
138 }
139 81 }
140
141 81 void node_barrier_finalize(subprocess_descriptor_t *spd) {
142 81 pthread_mutex_lock(&mutex);
143 {
144
1/2
✓ Branch 0 taken 81 times.
✗ Branch 1 not taken.
81 if (spd->barrier_info != NULL) {
145 /* Detach all, no need to check for non NULL values */
146 81 barrier_info_t *barrier_info = spd->barrier_info;
147 81 shmem_barrier__detach(barrier_info->default_barrier);
148 int i;
149
2/2
✓ Branch 0 taken 1036 times.
✓ Branch 1 taken 81 times.
1117 for (i=0; i<barrier_info->max_barriers; ++i) {
150 1036 shmem_barrier__detach(barrier_info->barrier_list[i]);
151 }
152 81 free(barrier_info->barrier_list);
153 81 *barrier_info = (const barrier_info_t){};
154 81 free(spd->barrier_info);
155 81 spd->barrier_info = NULL;
156 }
157 }
158 81 pthread_mutex_unlock(&mutex);
159 81 }
160
161 38 barrier_t* node_barrier_register(subprocess_descriptor_t *spd,
162 const char *barrier_name, int flags) {
163
164 /* This function does not allow registering the default barrier */
165
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 38 times.
38 if (barrier_name == NULL) return NULL;
166
167 38 barrier_t *barrier = NULL;
168
1/2
✓ Branch 0 taken 38 times.
✗ Branch 1 not taken.
38 if (spd->options.barrier) {
169 /* The register function cannot know whether the calling process is a new
170 * participant or just a query for the pointer. If we have at least one
171 * registered named barrier, we need to check the shared memory first. */
172 38 barrier_info_t *barrier_info = spd->barrier_info;
173
2/2
✓ Branch 0 taken 17 times.
✓ Branch 1 taken 21 times.
38 if (barrier_info->barrier_list[0] != NULL) {
174 17 barrier = shmem_barrier__find(barrier_name);
175
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 8 times.
17 if (barrier != NULL) {
176 /* Barrier is found in shmem, check if it's registered within the spd. */
177 int i;
178 9 int max_barriers = barrier_info->max_barriers;
179
2/2
✓ Branch 0 taken 37 times.
✓ Branch 1 taken 1 times.
38 for (i=0; i<max_barriers; ++i) {
180
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 29 times.
37 if (barrier_info->barrier_list[i] == barrier) {
181 /* Barrier already registered in spd */
182 8 return barrier;
183 }
184 }
185 /* Barrier is not registered within this spd */
186 1 barrier = NULL;
187 }
188 }
189
190 /* Register if not found */
191
1/2
✓ Branch 0 taken 30 times.
✗ Branch 1 not taken.
30 if (barrier == NULL) {
192 30 bool lewi_barrier = parse_lewi_barrier(barrier_name,
193 30 spd->options.lewi_barrier,
194 30 spd->options.lewi_barrier_select, flags);
195 30 barrier = shmem_barrier__register(barrier_name, lewi_barrier);
196
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 30 times.
30 if (barrier == NULL) return NULL;
197 }
198
199 /* Update the barrier list, if needed */
200 int i;
201 30 int max_barriers = barrier_info->max_barriers;
202 30 pthread_mutex_lock(&mutex);
203 {
204
1/2
✓ Branch 0 taken 54 times.
✗ Branch 1 not taken.
54 for (i=0; i<max_barriers; ++i) {
205
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 24 times.
54 if (barrier_info->barrier_list[i] == NULL) {
206 30 barrier_info->barrier_list[i] = barrier;
207 30 break;
208 }
209 }
210 }
211 30 pthread_mutex_unlock(&mutex);
212
213
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 30 times.
30 ensure(i < max_barriers, "Cannot register Node Barrier, no space left in"
214 " barrier_list.\nPlease, report bug at " PACKAGE_BUGREPORT);
215 }
216
217 30 return barrier;
218 }
219
220 49 int node_barrier(const subprocess_descriptor_t *spd, barrier_t *barrier) {
221 int error;
222
2/2
✓ Branch 0 taken 47 times.
✓ Branch 1 taken 2 times.
49 if (spd->options.barrier) {
223 /* Check whether barrier is valid */
224 47 barrier_info_t *barrier_info = spd->barrier_info;
225
2/2
✓ Branch 0 taken 17 times.
✓ Branch 1 taken 30 times.
47 if (barrier == NULL) {
226 /* If barrier is not provided we only need to check the reserved
227 * position in barrier_list */
228
2/2
✓ Branch 0 taken 16 times.
✓ Branch 1 taken 1 times.
17 if (barrier_info->default_barrier != NULL) {
229 16 barrier = barrier_info->default_barrier;
230 }
231
1/2
✓ Branch 0 taken 30 times.
✗ Branch 1 not taken.
30 } else if (unlikely(barrier == barrier_info->default_barrier)) {
232 /* barrier provided is the default barrier, nothing to do.
233 * (default_barrier pointer is never exposed, keep this one just in case) */
234 } else {
235 /* Otherwise, we need to check whether the provided barrier has
236 * not been detached */
237 30 int i = 0;
238 30 int max_barriers = barrier_info->max_barriers;
239 30 pthread_mutex_lock(&mutex);
240 {
241 30 while (i<max_barriers
242
2/2
✓ Branch 0 taken 50 times.
✓ Branch 1 taken 3 times.
53 && barrier_info->barrier_list[i] != NULL
243
3/4
✓ Branch 0 taken 53 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 23 times.
✓ Branch 3 taken 27 times.
103 && barrier_info->barrier_list[i] != barrier) {
244 23 ++i;
245 }
246
247
3/4
✓ Branch 0 taken 30 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 27 times.
30 if (i == max_barriers || barrier_info->barrier_list[i] == NULL) {
248 /* Not found in barrier_list */
249 3 barrier = NULL;
250 }
251 }
252 30 pthread_mutex_unlock(&mutex);
253 }
254
255 /* If barrier was found, perform the actual barrier */
256
2/2
✓ Branch 0 taken 43 times.
✓ Branch 1 taken 4 times.
47 if (barrier != NULL) {
257 instrument_event(RUNTIME_EVENT, EVENT_BARRIER, EVENT_BEGIN);
258 43 shmem_barrier__barrier(barrier);
259 instrument_event(RUNTIME_EVENT, EVENT_BARRIER, EVENT_END);
260 43 error = DLB_SUCCESS;
261 } else {
262 /* barrier not found in barrier_info, possibly a detached barrier */
263 4 error = DLB_NOUPDT;
264 }
265 } else {
266 2 error = DLB_ERR_NOCOMP;
267 }
268
269 49 return error;
270 }
271
272 11 int node_barrier_attach(subprocess_descriptor_t *spd, barrier_t *barrier) {
273 int error;
274
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 2 times.
11 if (spd->options.barrier) {
275 9 barrier_info_t *barrier_info = spd->barrier_info;
276
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 6 times.
9 if (barrier == NULL) {
277
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
3 if (barrier_info->default_barrier == NULL) {
278 /* Register default barrier again */
279 1 bool lewi_barrier = parse_lewi_barrier(default_barrier_name,
280 1 spd->options.lewi_barrier,
281 1 spd->options.lewi_barrier_select, 0);
282 2 barrier_info->default_barrier = shmem_barrier__register(
283 1 barrier_info->default_barrier_name,
284 lewi_barrier);
285 // return number of participants
286
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 error = barrier_info->default_barrier ? 1 : DLB_ERR_NOMEM;
287 } else {
288 /* Default barrier already attached */
289 2 error = DLB_ERR_PERM;
290 }
291
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 } else if (unlikely(barrier == barrier_info->default_barrier)) {
292 /* barrier provided is the default barrier, already attached.
293 * (default_barrier pointer is never exposed, keep this one just in case) */
294 error = DLB_ERR_PERM;
295 } else {
296 6 int i = 0;
297 6 int max_barriers = shmem_barrier__get_max_barriers();
298 6 pthread_mutex_lock(&mutex);
299 {
300 /* Find first NULL place or barrier in barrier_list */
301 6 while (i<max_barriers
302
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 3 times.
8 && barrier_info->barrier_list[i] != NULL
303
3/4
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 3 times.
13 && barrier_info->barrier_list[i] != barrier) {
304 2 ++i;
305 }
306
307
3/4
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 3 times.
6 if (i < max_barriers && barrier_info->barrier_list[i] == barrier) {
308 /* Already in the barrier_list */
309 3 error = DLB_ERR_PERM;
310 } else {
311 /* Attach */
312 3 error = shmem_barrier__attach(barrier);
313
314 /* Add barrier to the barrier_list */
315
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (error >= 0) {
316 3 barrier_info->barrier_list[i] = barrier;
317 }
318 }
319 }
320 6 pthread_mutex_unlock(&mutex);
321 }
322 } else {
323 /* no --barrier */
324 2 error = DLB_ERR_NOCOMP;
325 }
326
327 11 return error;
328 }
329
330 17 int node_barrier_detach(subprocess_descriptor_t *spd, barrier_t *barrier) {
331 int error;
332
2/2
✓ Branch 0 taken 15 times.
✓ Branch 1 taken 2 times.
17 if (spd->options.barrier) {
333 15 barrier_info_t *barrier_info = spd->barrier_info;
334
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 9 times.
15 if (barrier == NULL) {
335
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 if (barrier_info->default_barrier != NULL) {
336 /* Detach default barrier */
337 3 error = shmem_barrier__detach(barrier_info->default_barrier);
338
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (error >= 0) {
339 3 barrier_info->default_barrier = NULL;
340 }
341 } else {
342 /* Default barrier already detached */
343 3 error = DLB_ERR_PERM;
344 }
345
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 9 times.
9 } else if (unlikely(barrier == barrier_info->default_barrier)) {
346 /* Detach default barrier.
347 * (default_barrier pointer is never exposed, keep this one just in case) */
348 error = shmem_barrier__detach(barrier_info->default_barrier);
349 if (error >= 0) {
350 barrier_info->default_barrier = NULL;
351 }
352 } else {
353 9 int i = 0;
354 9 int max_barriers = shmem_barrier__get_max_barriers();
355 9 pthread_mutex_lock(&mutex);
356 {
357 /* Find first NULL place or barrier in barrier_list */
358 9 while (i<max_barriers
359
2/2
✓ Branch 0 taken 11 times.
✓ Branch 1 taken 1 times.
12 && barrier_info->barrier_list[i] != NULL
360
3/4
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 8 times.
23 && barrier_info->barrier_list[i] != barrier) {
361 3 ++i;
362 }
363
364
3/4
✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 8 times.
9 if (i == max_barriers || barrier_info->barrier_list[i] == NULL) {
365 /* Not found in barrier_list */
366 1 error = DLB_ERR_PERM;
367 } else {
368 /* Detach */
369 8 error = shmem_barrier__detach(barrier);
370
371 /* Remove barrier from the barrier_list */
372
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
8 if (error >= 0) {
373 8 memmove(&barrier_info->barrier_list[i],
374 8 &barrier_info->barrier_list[i+1],
375 8 sizeof(barrier_info->barrier_list[0]) * (max_barriers-1-i));
376 8 barrier_info->barrier_list[max_barriers-1] = NULL;
377 }
378 }
379 }
380 9 pthread_mutex_unlock(&mutex);
381 }
382 } else {
383 /* no --barrier */
384 2 error = DLB_ERR_NOCOMP;
385 }
386
387 17 return error;
388 }
389