| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /*********************************************************************************/ | ||
| 2 | /* Copyright 2009-2022 Barcelona Supercomputing Center */ | ||
| 3 | /* */ | ||
| 4 | /* This file is part of the DLB library. */ | ||
| 5 | /* */ | ||
| 6 | /* DLB is free software: you can redistribute it and/or modify */ | ||
| 7 | /* it under the terms of the GNU Lesser General Public License as published by */ | ||
| 8 | /* the Free Software Foundation, either version 3 of the License, or */ | ||
| 9 | /* (at your option) any later version. */ | ||
| 10 | /* */ | ||
| 11 | /* DLB is distributed in the hope that it will be useful, */ | ||
| 12 | /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ | ||
| 13 | /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ | ||
| 14 | /* GNU Lesser General Public License for more details. */ | ||
| 15 | /* */ | ||
| 16 | /* You should have received a copy of the GNU Lesser General Public License */ | ||
| 17 | /* along with DLB. If not, see <https://www.gnu.org/licenses/>. */ | ||
| 18 | /*********************************************************************************/ | ||
| 19 | |||
| 20 | #ifdef HAVE_CONFIG_H | ||
| 21 | #include <config.h> | ||
| 22 | #endif | ||
| 23 | |||
| 24 | #include "LB_comm/shmem_barrier.h" | ||
| 25 | |||
| 26 | #include "LB_core/DLB_kernel.h" | ||
| 27 | #include "LB_comm/shmem.h" | ||
| 28 | #include "apis/dlb_errors.h" | ||
| 29 | #include "support/atomic.h" | ||
| 30 | #include "support/debug.h" | ||
| 31 | #include "support/mask_utils.h" | ||
| 32 | #include "support/mytime.h" | ||
| 33 | #include "talp/talp.h" | ||
| 34 | |||
| 35 | #include <errno.h> | ||
| 36 | #include <pthread.h> | ||
| 37 | #include <stdio.h> | ||
| 38 | #include <string.h> | ||
| 39 | #include <stdlib.h> | ||
| 40 | |||
| 41 | typedef struct barrier_flags { | ||
| 42 | bool initialized:1; | ||
| 43 | bool lewi:1; | ||
| 44 | } barrier_flags_t; | ||
| 45 | |||
| 46 | typedef struct barrier_t { | ||
| 47 | char name[BARRIER_NAME_MAX]; | ||
| 48 | barrier_flags_t flags; | ||
| 49 | unsigned int participants; | ||
| 50 | atomic_uint ntimes; | ||
| 51 | atomic_uint count; | ||
| 52 | pthread_barrier_t barrier; | ||
| 53 | pthread_rwlock_t rwlock; | ||
| 54 | } barrier_t; | ||
| 55 | |||
| 56 | typedef struct { | ||
| 57 | bool initialized; | ||
| 58 | int max_barriers; // capacity | ||
| 59 | int num_barriers; // size, although detached may be counted | ||
| 60 | barrier_t barriers[]; | ||
| 61 | } shdata_t; | ||
| 62 | |||
| 63 | enum { SHMEM_BARRIER_VERSION = 7 }; | ||
| 64 | enum { SHMEM_TIMEOUT_SECONDS = 1 }; | ||
| 65 | |||
| 66 | static int max_barriers = 0; | ||
| 67 | static shmem_handler_t *shm_handler = NULL; | ||
| 68 | static shdata_t *shdata = NULL; | ||
| 69 | static const char *shmem_name = "barrier"; | ||
| 70 | |||
| 71 | 1 | static void cleanup_shmem(void *shdata_ptr, int pid) { | |
| 72 | |||
| 73 | 1 | bool shmem_empty = true; | |
| 74 | 1 | shdata_t *shared_data = shdata_ptr; | |
| 75 | 1 | int num_barriers = shared_data->num_barriers; | |
| 76 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | for (int i = 0; i < num_barriers; i++) { |
| 77 | 1 | barrier_t *barrier = &shared_data->barriers[i]; | |
| 78 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (--barrier->participants == 0) { |
| 79 | 1 | *barrier = (const barrier_t){}; | |
| 80 | } else { | ||
| 81 | ✗ | shmem_empty = false; | |
| 82 | } | ||
| 83 | } | ||
| 84 | |||
| 85 | /* If there are no registered barriers, make sure shmem is reset */ | ||
| 86 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (shmem_empty) { |
| 87 | 1 | memset(shared_data, 0, shmem_barrier__size()); | |
| 88 | } | ||
| 89 | 1 | } | |
| 90 | |||
| 91 | 63 | static void open_shmem(const char *shmem_key, int shmem_size_multiplier) { | |
| 92 | |||
| 93 | 63 | max_barriers = mu_get_system_size() * shmem_size_multiplier; | |
| 94 | 126 | shm_handler = shmem_init((void**)&shdata, | |
| 95 | 63 | &(const shmem_props_t) { | |
| 96 | 63 | .size = shmem_barrier__size(), | |
| 97 | .name = shmem_name, | ||
| 98 | .key = shmem_key, | ||
| 99 | .version = SHMEM_BARRIER_VERSION, | ||
| 100 | .cleanup_fn = cleanup_shmem, | ||
| 101 | }); | ||
| 102 | 63 | } | |
| 103 | |||
| 104 | 98 | int shmem_barrier__init(const char *shmem_key, int shmem_size_multiplier) { | |
| 105 | |||
| 106 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 62 times.
|
98 | if (shm_handler != NULL) return DLB_ERR_NOMEM; |
| 107 | |||
| 108 | 62 | open_shmem(shmem_key, shmem_size_multiplier); | |
| 109 | |||
| 110 | 62 | int error = DLB_SUCCESS; | |
| 111 | 62 | shmem_lock(shm_handler); | |
| 112 | { | ||
| 113 | // Initialize some values if this is the 1st process attached to the shmem | ||
| 114 |
1/2✓ Branch 0 taken 62 times.
✗ Branch 1 not taken.
|
62 | if (!shdata->initialized) { |
| 115 | 62 | shdata->initialized = true; | |
| 116 | 62 | shdata->num_barriers = 0; | |
| 117 | 62 | shdata->max_barriers = max_barriers; | |
| 118 | } else { | ||
| 119 | ✗ | if (shdata->max_barriers != max_barriers) { | |
| 120 | ✗ | error = DLB_ERR_INIT; | |
| 121 | } | ||
| 122 | } | ||
| 123 | } | ||
| 124 | 62 | shmem_unlock(shm_handler); | |
| 125 | |||
| 126 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 62 times.
|
62 | if (error == DLB_ERR_INIT) { |
| 127 | ✗ | warning("Cannot attach to Barrier shmem because existing size differ." | |
| 128 | " Existing shmem size: %d, expected: %d." | ||
| 129 | " Check for DLB_ARGS consistency among processes or clean up shared memory.", | ||
| 130 | ✗ | shdata->max_barriers, max_barriers); | |
| 131 | } | ||
| 132 | |||
| 133 |
1/2✓ Branch 0 taken 62 times.
✗ Branch 1 not taken.
|
62 | if (error == DLB_SUCCESS) { |
| 134 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 62 times.
|
62 | verbose(VB_BARRIER, "Barrier Module initialized."); |
| 135 | } | ||
| 136 | |||
| 137 | 62 | return error; | |
| 138 | } | ||
| 139 | |||
| 140 | 1 | void shmem_barrier_ext__init(const char *shmem_key, int shmem_size_multiplier) { | |
| 141 | |||
| 142 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (shm_handler != NULL) return; |
| 143 | |||
| 144 | 1 | open_shmem(shmem_key, shmem_size_multiplier); | |
| 145 | } | ||
| 146 | |||
| 147 | 99 | void shmem_barrier__finalize(const char *shmem_key, int shmem_size_multiplier) { | |
| 148 |
2/2✓ Branch 0 taken 37 times.
✓ Branch 1 taken 62 times.
|
99 | if (shm_handler == NULL) { |
| 149 | /* barrier_finalize may be called to finalize existing process | ||
| 150 | * even if the file descriptor is not opened. (DLB_PreInit + forc-exec case) */ | ||
| 151 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 37 times.
|
37 | if (shmem_exists(shmem_name, shmem_key)) { |
| 152 | ✗ | shmem_barrier_ext__init(shmem_key, shmem_size_multiplier); | |
| 153 | } else { | ||
| 154 | 37 | return; | |
| 155 | } | ||
| 156 | } | ||
| 157 | |||
| 158 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 62 times.
|
62 | verbose(VB_BARRIER, "Finalizing Barrier Module"); |
| 159 | |||
| 160 | 62 | shmem_finalize(shm_handler, NULL /* do not check if empty */); | |
| 161 | 62 | shm_handler = NULL; | |
| 162 | } | ||
| 163 | |||
| 164 | 1 | int shmem_barrier_ext__finalize(void) { | |
| 165 | // Protect double finalization | ||
| 166 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (shm_handler == NULL) { |
| 167 | ✗ | return DLB_ERR_NOSHMEM; | |
| 168 | } | ||
| 169 | |||
| 170 | // Shared memory destruction | ||
| 171 | 1 | shmem_finalize(shm_handler, NULL /* do not check if empty */); | |
| 172 | 1 | shm_handler = NULL; | |
| 173 | 1 | shdata = NULL; | |
| 174 | |||
| 175 | 1 | return DLB_SUCCESS; | |
| 176 | } | ||
| 177 | |||
| 178 | 118 | int shmem_barrier__get_max_barriers(void) { | |
| 179 | 118 | return max_barriers; | |
| 180 | } | ||
| 181 | |||
| 182 | /* Given a barrier_name, find whether the barrier is registered in the shared | ||
| 183 | * memory. Note that this function is not thread-safe */ | ||
| 184 | 31 | barrier_t* shmem_barrier__find(const char *barrier_name) { | |
| 185 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 30 times.
|
31 | if (shm_handler == NULL) return NULL; |
| 186 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 27 times.
|
30 | if (barrier_name == NULL) return NULL; |
| 187 | |||
| 188 | 27 | int num_barriers = shdata->num_barriers; | |
| 189 |
2/2✓ Branch 0 taken 82 times.
✓ Branch 1 taken 17 times.
|
99 | for (int i = 0; i < num_barriers; ++i) { |
| 190 |
2/2✓ Branch 0 taken 72 times.
✓ Branch 1 taken 10 times.
|
82 | if (shdata->barriers[i].participants > 0 |
| 191 |
1/2✓ Branch 0 taken 72 times.
✗ Branch 1 not taken.
|
72 | && shdata->barriers[i].flags.initialized |
| 192 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 62 times.
|
72 | && strncmp(shdata->barriers[i].name, barrier_name, |
| 193 | BARRIER_NAME_MAX-1) == 0) { | ||
| 194 | 10 | return &shdata->barriers[i]; | |
| 195 | } | ||
| 196 | } | ||
| 197 | |||
| 198 | 17 | return NULL; | |
| 199 | } | ||
| 200 | |||
| 201 | /* Register and attach process to barrier. | ||
| 202 | * barrier_name: barrier name | ||
| 203 | * lewi: whether this barrier does lewi | ||
| 204 | */ | ||
| 205 | 152 | barrier_t* shmem_barrier__register(const char *barrier_name, bool lewi) { | |
| 206 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 152 times.
|
152 | if (shm_handler == NULL) return NULL; |
| 207 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 151 times.
|
152 | if (barrier_name == NULL) return NULL; |
| 208 | |||
| 209 | /* Obtain the shared memory lock to find the appropriate place for the new | ||
| 210 | * barrier. If the barrier is not created, the first process must | ||
| 211 | * initialize it before releasing the lock. If the barrier was already | ||
| 212 | * created, new processes may attach after acquiring both the shmem and the | ||
| 213 | * barrier specific lock. */ | ||
| 214 | 151 | int participants = -1; | |
| 215 | 151 | barrier_t *barrier = NULL; | |
| 216 | 151 | shmem_lock(shm_handler); | |
| 217 | { | ||
| 218 | /* Find a new spot, or an already registered barrier */ | ||
| 219 | 151 | barrier_t *empty_spot = NULL; | |
| 220 | 151 | int num_barriers = shdata->num_barriers; | |
| 221 |
2/2✓ Branch 0 taken 193 times.
✓ Branch 1 taken 104 times.
|
297 | for (int i = 0; i < num_barriers; ++i) { |
| 222 |
1/2✓ Branch 0 taken 193 times.
✗ Branch 1 not taken.
|
193 | if (empty_spot == NULL |
| 223 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 179 times.
|
193 | && shdata->barriers[i].participants == 0 |
| 224 |
1/2✓ Branch 0 taken 14 times.
✗ Branch 1 not taken.
|
14 | && !shdata->barriers[i].flags.initialized) { |
| 225 | 14 | empty_spot = &shdata->barriers[i]; | |
| 226 | } | ||
| 227 |
1/2✓ Branch 0 taken 179 times.
✗ Branch 1 not taken.
|
179 | else if (shdata->barriers[i].participants > 0 |
| 228 |
1/2✓ Branch 0 taken 179 times.
✗ Branch 1 not taken.
|
179 | && shdata->barriers[i].flags.initialized |
| 229 |
2/2✓ Branch 0 taken 47 times.
✓ Branch 1 taken 132 times.
|
179 | && strncmp(shdata->barriers[i].name, barrier_name, |
| 230 | BARRIER_NAME_MAX-1) == 0) { | ||
| 231 | 47 | barrier = &shdata->barriers[i]; | |
| 232 | 47 | break; | |
| 233 | } | ||
| 234 | } | ||
| 235 | |||
| 236 | /* if barrier is not found and no empty spot, get a new position */ | ||
| 237 |
4/4✓ Branch 0 taken 149 times.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 135 times.
✓ Branch 3 taken 14 times.
|
151 | if (num_barriers < max_barriers && empty_spot == NULL) { |
| 238 | 135 | empty_spot = &shdata->barriers[num_barriers]; | |
| 239 | 135 | ++shdata->num_barriers; | |
| 240 | } | ||
| 241 | |||
| 242 | /* New barrier, lock first and initialize required fields */ | ||
| 243 |
4/4✓ Branch 0 taken 104 times.
✓ Branch 1 taken 47 times.
✓ Branch 2 taken 102 times.
✓ Branch 3 taken 2 times.
|
253 | if (barrier == NULL && empty_spot != NULL) { |
| 244 | 102 | barrier = empty_spot; | |
| 245 | 102 | *barrier = (const barrier_t){}; | |
| 246 | |||
| 247 | pthread_rwlockattr_t rwlockattr; | ||
| 248 | 102 | pthread_rwlockattr_init(&rwlockattr); | |
| 249 | 102 | pthread_rwlockattr_setpshared(&rwlockattr, PTHREAD_PROCESS_SHARED); | |
| 250 | 102 | pthread_rwlock_init(&barrier->rwlock, &rwlockattr); | |
| 251 | 102 | pthread_rwlockattr_destroy(&rwlockattr); | |
| 252 | |||
| 253 | /* Initialize only once the lock is acquired */ | ||
| 254 | 102 | pthread_rwlock_wrlock(&barrier->rwlock); | |
| 255 | { | ||
| 256 | 102 | barrier->flags = (const barrier_flags_t) { | |
| 257 | .initialized = true, | ||
| 258 | .lewi = lewi, | ||
| 259 | }; | ||
| 260 | 102 | barrier->participants = 1; | |
| 261 | 102 | participants = 1; | |
| 262 | 102 | snprintf(barrier->name, BARRIER_NAME_MAX, "%s", barrier_name); | |
| 263 | pthread_barrierattr_t barrierattr; | ||
| 264 | 102 | pthread_barrierattr_init(&barrierattr); | |
| 265 | 102 | pthread_barrierattr_setpshared(&barrierattr, PTHREAD_PROCESS_SHARED); | |
| 266 | 102 | pthread_barrier_init(&barrier->barrier, &barrierattr, barrier->participants); | |
| 267 | 102 | pthread_barrierattr_destroy(&barrierattr); | |
| 268 | } | ||
| 269 | 102 | pthread_rwlock_unlock(&barrier->rwlock); | |
| 270 | |||
| 271 |
2/2✓ Branch 0 taken 47 times.
✓ Branch 1 taken 2 times.
|
49 | } else if (barrier != NULL){ |
| 272 | /* Barrier was created by another participant, attach. | ||
| 273 | * (timedlock is used to avoid potential deadlocks) */ | ||
| 274 | struct timespec timeout; | ||
| 275 | 47 | get_time_real(&timeout); | |
| 276 | 47 | timeout.tv_sec += SHMEM_TIMEOUT_SECONDS; | |
| 277 | 47 | int timedwrlock_error = pthread_rwlock_timedwrlock(&barrier->rwlock, &timeout); | |
| 278 |
1/2✓ Branch 0 taken 47 times.
✗ Branch 1 not taken.
|
47 | if (likely(timedwrlock_error == 0)) { |
| 279 | /* Update participants */ | ||
| 280 | 47 | participants = ++barrier->participants; | |
| 281 | |||
| 282 | /* Create new barrier with the number of participants updated */ | ||
| 283 | 47 | pthread_barrier_destroy(&barrier->barrier); | |
| 284 | pthread_barrierattr_t attr; | ||
| 285 | 47 | pthread_barrierattr_init(&attr); | |
| 286 | 47 | pthread_barrierattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); | |
| 287 | 47 | pthread_barrier_init(&barrier->barrier, &attr, barrier->participants); | |
| 288 | 47 | pthread_barrierattr_destroy(&attr); | |
| 289 | |||
| 290 | 47 | pthread_rwlock_unlock(&barrier->rwlock); | |
| 291 | |||
| 292 | ✗ | } else if (timedwrlock_error == ETIMEDOUT || timedwrlock_error == EDEADLK) { | |
| 293 | ✗ | shmem_unlock(shm_handler); | |
| 294 | ✗ | fatal("Timed out while creating shmem_barrier.\n" | |
| 295 | "Please, report at " PACKAGE_BUGREPORT); | ||
| 296 | ✗ | } else if (timedwrlock_error == EINVAL) { | |
| 297 | ✗ | shmem_unlock(shm_handler); | |
| 298 | ✗ | fatal("Error acquiring timedwrlock while creating shmem_barrier.\n" | |
| 299 | "Please, report at " PACKAGE_BUGREPORT); | ||
| 300 | } | ||
| 301 | } | ||
| 302 | } | ||
| 303 | 151 | shmem_unlock(shm_handler); | |
| 304 | |||
| 305 |
2/2✓ Branch 0 taken 149 times.
✓ Branch 1 taken 2 times.
|
151 | if (barrier != NULL) { |
| 306 | /* Attach if needed and print number of participants */ | ||
| 307 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 149 times.
|
149 | verbose(VB_BARRIER, "Attached to barrier %s. Participants: %d", |
| 308 | barrier->name, participants); | ||
| 309 | } | ||
| 310 | |||
| 311 | 151 | return barrier; | |
| 312 | } | ||
| 313 | |||
| 314 | /* The attach function should not have any races with the global shared memory. | ||
| 315 | * At most, 'barrier' points to an unitialized barrier and error is returned. | ||
| 316 | * */ | ||
| 317 | 11 | int shmem_barrier__attach(barrier_t *barrier) { | |
| 318 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 11 times.
|
11 | if (barrier == NULL) return DLB_ERR_UNKNOWN; |
| 319 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 11 times.
|
11 | if (shm_handler == NULL) return DLB_ERR_NOSHMEM; |
| 320 | |||
| 321 | #ifdef DEBUG_VERSION | ||
| 322 | int count; | ||
| 323 | #endif | ||
| 324 | int participants; | ||
| 325 | 11 | pthread_rwlock_wrlock(&barrier->rwlock); | |
| 326 | { | ||
| 327 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 1 times.
|
11 | if (!barrier->flags.initialized |
| 328 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
10 | || barrier->participants == 0) { |
| 329 | 1 | pthread_rwlock_unlock(&barrier->rwlock); | |
| 330 | 1 | return DLB_ERR_PERM; | |
| 331 | } | ||
| 332 | |||
| 333 | /* Update participants */ | ||
| 334 | 10 | participants = ++barrier->participants; | |
| 335 | |||
| 336 | /* Create new barrier with the number of participants updated */ | ||
| 337 | 10 | pthread_barrier_destroy(&barrier->barrier); | |
| 338 | pthread_barrierattr_t attr; | ||
| 339 | 10 | pthread_barrierattr_init(&attr); | |
| 340 | 10 | pthread_barrierattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); | |
| 341 | 10 | pthread_barrier_init(&barrier->barrier, &attr, barrier->participants); | |
| 342 | 10 | pthread_barrierattr_destroy(&attr); | |
| 343 | |||
| 344 | #ifdef DEBUG_VERSION | ||
| 345 | 10 | count = barrier->count; | |
| 346 | #endif | ||
| 347 | } | ||
| 348 | 10 | pthread_rwlock_unlock(&barrier->rwlock); | |
| 349 | |||
| 350 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
10 | ensure(count == 0, "Barrier Shared memory inconsistency while attaching " |
| 351 | "barrier (count = %d).\n" "Please, report at " PACKAGE_BUGREPORT, | ||
| 352 | count); | ||
| 353 | |||
| 354 | 10 | return participants; | |
| 355 | } | ||
| 356 | |||
| 357 | /* The detach function may remove the barrier if 'participants' reaches 0 and | ||
| 358 | * compete with a barrier creation. This function needs to acquire both locks. */ | ||
| 359 | 1341 | int shmem_barrier__detach(barrier_t *barrier) { | |
| 360 |
2/2✓ Branch 0 taken 1182 times.
✓ Branch 1 taken 159 times.
|
1341 | if (barrier == NULL) return DLB_ERR_UNKNOWN; |
| 361 |
2/2✓ Branch 0 taken 45 times.
✓ Branch 1 taken 114 times.
|
159 | if (shm_handler == NULL) return DLB_ERR_NOSHMEM; |
| 362 | |||
| 363 | #ifdef DEBUG_VERSION | ||
| 364 | 114 | int count = -1; | |
| 365 | #endif | ||
| 366 | 114 | int participants = -1; | |
| 367 | |||
| 368 | 114 | shmem_lock(shm_handler); | |
| 369 | { | ||
| 370 | struct timespec timeout; | ||
| 371 | 114 | get_time_real(&timeout); | |
| 372 | 114 | timeout.tv_sec += SHMEM_TIMEOUT_SECONDS; | |
| 373 | 114 | int timedwrlock_error = pthread_rwlock_timedwrlock(&barrier->rwlock, &timeout); | |
| 374 |
1/2✓ Branch 0 taken 114 times.
✗ Branch 1 not taken.
|
114 | if (likely(timedwrlock_error == 0)) { |
| 375 | |||
| 376 |
2/2✓ Branch 0 taken 113 times.
✓ Branch 1 taken 1 times.
|
114 | if (!barrier->flags.initialized |
| 377 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 113 times.
|
113 | || barrier->participants == 0) { |
| 378 | 1 | pthread_rwlock_unlock(&barrier->rwlock); | |
| 379 | 1 | shmem_unlock(shm_handler); | |
| 380 | 1 | return DLB_ERR_PERM; | |
| 381 | } | ||
| 382 | |||
| 383 | #ifdef DEBUG_VERSION | ||
| 384 | 113 | count = barrier->count; | |
| 385 | #endif | ||
| 386 | /* Both locks are acquired and barrier is valid, detach: */ | ||
| 387 | |||
| 388 | /* Update participants */ | ||
| 389 | 113 | participants = --barrier->participants; | |
| 390 | |||
| 391 |
2/2✓ Branch 0 taken 42 times.
✓ Branch 1 taken 71 times.
|
113 | if (participants > 0) { |
| 392 | /* Create new barrier with the number of participants updated */ | ||
| 393 | 42 | pthread_barrier_destroy(&barrier->barrier); | |
| 394 | pthread_barrierattr_t attr; | ||
| 395 | 42 | pthread_barrierattr_init(&attr); | |
| 396 | 42 | pthread_barrierattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); | |
| 397 | 42 | pthread_barrier_init(&barrier->barrier, &attr, barrier->participants); | |
| 398 | 42 | pthread_barrierattr_destroy(&attr); | |
| 399 | |||
| 400 | 42 | pthread_rwlock_unlock(&barrier->rwlock); | |
| 401 | } else { | ||
| 402 | /* If this is the last participant, uninitialize barrier */ | ||
| 403 | 71 | pthread_rwlock_unlock(&barrier->rwlock); | |
| 404 | 71 | pthread_barrier_destroy(&barrier->barrier); | |
| 405 | 71 | pthread_rwlock_destroy(&barrier->rwlock); | |
| 406 | 71 | *barrier = (const barrier_t){}; | |
| 407 | |||
| 408 | /* Try to compact barrier list */ | ||
| 409 |
1/2✓ Branch 0 taken 94 times.
✗ Branch 1 not taken.
|
94 | for (int i = shdata->num_barriers - 1; i >= 0; --i) { |
| 410 |
2/2✓ Branch 0 taken 23 times.
✓ Branch 1 taken 71 times.
|
94 | if (shdata->barriers[i].flags.initialized) { |
| 411 | 23 | --shdata->num_barriers; | |
| 412 | } else { | ||
| 413 | 71 | break; | |
| 414 | } | ||
| 415 | } | ||
| 416 | } | ||
| 417 | ✗ | } else if (timedwrlock_error == ETIMEDOUT || timedwrlock_error == EDEADLK) { | |
| 418 | ✗ | shmem_unlock(shm_handler); | |
| 419 | ✗ | fatal("Timed out while detaching barrier.\n" | |
| 420 | "Please, report at " PACKAGE_BUGREPORT); | ||
| 421 | ✗ | } else if (timedwrlock_error == EINVAL) { | |
| 422 | ✗ | shmem_unlock(shm_handler); | |
| 423 | ✗ | fatal("Error acquiring timedwrlock while detaching barrier.\n" | |
| 424 | "Please, report at " PACKAGE_BUGREPORT); | ||
| 425 | } | ||
| 426 | } | ||
| 427 | 113 | shmem_unlock(shm_handler); | |
| 428 | |||
| 429 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 113 times.
|
113 | ensure(count == 0, "Barrier Shared memory inconsistency while attaching " |
| 430 | "barrier (count = %d).\n" "Please, report at " PACKAGE_BUGREPORT, | ||
| 431 | count); | ||
| 432 | |||
| 433 | 113 | return participants; | |
| 434 | } | ||
| 435 | |||
| 436 | 49 | void shmem_barrier__barrier(barrier_t *barrier) { | |
| 437 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 49 times.
|
49 | if (unlikely(shm_handler == NULL)) return; |
| 438 | |||
| 439 | 49 | pthread_rwlock_rdlock(&barrier->rwlock); | |
| 440 | { | ||
| 441 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 48 times.
|
49 | if (unlikely(!barrier->flags.initialized)) { |
| 442 | 1 | warning("Trying to use a non initialized barrier"); | |
| 443 | 1 | pthread_rwlock_unlock(&barrier->rwlock); | |
| 444 | 1 | return; | |
| 445 | } | ||
| 446 | |||
| 447 | 48 | unsigned int participant_number = DLB_ATOMIC_ADD_FETCH(&barrier->count, 1); | |
| 448 | 48 | bool last_in = participant_number == barrier->participants; | |
| 449 | |||
| 450 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
48 | verbose(VB_BARRIER, "Entering barrier %s%s", barrier->name, last_in ? " (last)" : ""); |
| 451 | |||
| 452 |
2/2✓ Branch 0 taken 33 times.
✓ Branch 1 taken 15 times.
|
48 | if (last_in) { |
| 453 | // Barrier | ||
| 454 | 33 | pthread_barrier_wait(&barrier->barrier); | |
| 455 | |||
| 456 | // Increase ntimes counter | ||
| 457 | 33 | DLB_ATOMIC_ADD_RLX(&barrier->ntimes, 1); | |
| 458 | } else { | ||
| 459 | // Only if this process is not the last one, act as a blocking call | ||
| 460 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 8 times.
|
15 | if (barrier->flags.lewi) { |
| 461 | 7 | sync_call_flags_t mpi_flags = (const sync_call_flags_t) { | |
| 462 | .is_dlb_barrier = true, | ||
| 463 | .is_blocking = true, | ||
| 464 | .is_collective = true, | ||
| 465 | .do_lewi = true, | ||
| 466 | }; | ||
| 467 | 7 | into_sync_call(mpi_flags); | |
| 468 | } | ||
| 469 | |||
| 470 | // Barrier | ||
| 471 | 15 | pthread_barrier_wait(&barrier->barrier); | |
| 472 | |||
| 473 | // Recover resources for those processes that simulated a blocking call | ||
| 474 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 8 times.
|
15 | if (barrier->flags.lewi) { |
| 475 | 7 | sync_call_flags_t mpi_flags = (const sync_call_flags_t) { | |
| 476 | .is_dlb_barrier = true, | ||
| 477 | .is_blocking = true, | ||
| 478 | .is_collective = true, | ||
| 479 | .do_lewi = true, | ||
| 480 | }; | ||
| 481 | 7 | out_of_sync_call(mpi_flags); | |
| 482 | } | ||
| 483 | } | ||
| 484 | |||
| 485 | 48 | unsigned int participants_left = DLB_ATOMIC_SUB_FETCH(&barrier->count, 1); | |
| 486 | 48 | bool last_out = participants_left == 0; | |
| 487 | |||
| 488 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 48 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
48 | verbose(VB_BARRIER, "Leaving barrier %s%s", barrier->name, last_out ? " (last)" : ""); |
| 489 | |||
| 490 | /* WARNING: There may be a race condition with the 'count' value in | ||
| 491 | * consecutive barriers (A -> B), if one process increases the | ||
| 492 | * 'count' value after entering B, while another process still | ||
| 493 | * hasn't decreased it in B. Anyway, this is completely harmless | ||
| 494 | * since it only affects the verbose message. | ||
| 495 | */ | ||
| 496 | } | ||
| 497 | 48 | pthread_rwlock_unlock(&barrier->rwlock); | |
| 498 | } | ||
| 499 | |||
| 500 | 8 | void shmem_barrier__print_info(const char *shmem_key, int shmem_size_multiplier) { | |
| 501 | |||
| 502 | /* If the shmem is not opened, obtain a temporary fd */ | ||
| 503 | 8 | bool temporary_shmem = shm_handler == NULL; | |
| 504 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 7 times.
|
8 | if (temporary_shmem) { |
| 505 | 1 | shmem_barrier_ext__init(shmem_key, shmem_size_multiplier); | |
| 506 | } | ||
| 507 | |||
| 508 | /* Make a full copy of the shared memory */ | ||
| 509 | 8 | shdata_t *shdata_copy = malloc(shmem_barrier__size()); | |
| 510 | 8 | shmem_lock(shm_handler); | |
| 511 | { | ||
| 512 | 8 | memcpy(shdata_copy, shdata, shmem_barrier__size()); | |
| 513 | } | ||
| 514 | 8 | shmem_unlock(shm_handler); | |
| 515 | |||
| 516 | /* Close shmem if needed */ | ||
| 517 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 7 times.
|
8 | if (temporary_shmem) { |
| 518 | 1 | shmem_barrier_ext__finalize(); | |
| 519 | } | ||
| 520 | |||
| 521 | /* Initialize buffer */ | ||
| 522 | print_buffer_t buffer; | ||
| 523 | 8 | printbuffer_init(&buffer); | |
| 524 | |||
| 525 | /* Set up line buffer */ | ||
| 526 | enum { MAX_LINE_LEN = 128 }; | ||
| 527 | char line[MAX_LINE_LEN]; | ||
| 528 | |||
| 529 | 8 | int num_barriers = shdata_copy->num_barriers; | |
| 530 |
2/2✓ Branch 0 taken 11 times.
✓ Branch 1 taken 8 times.
|
19 | for (int i = 0; i < num_barriers; ++i) { |
| 531 | 11 | barrier_t *barrier = &shdata_copy->barriers[i]; | |
| 532 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 1 times.
|
11 | if (barrier->flags.initialized) { |
| 533 | |||
| 534 | /* Append line to buffer */ | ||
| 535 | 10 | snprintf(line, MAX_LINE_LEN, | |
| 536 | " | %14s | %12u | %12u | %12u |", | ||
| 537 | 10 | barrier->name, barrier->participants, barrier->count, barrier->ntimes); | |
| 538 | 10 | printbuffer_append(&buffer, line); | |
| 539 | } | ||
| 540 | } | ||
| 541 | |||
| 542 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 1 times.
|
8 | if (buffer.addr[0] != '\0' ) { |
| 543 | 7 | info0("=== Barriers ===\n" | |
| 544 | " | Barrier Name | Participants | Num. blocked | Times compl. |\n" | ||
| 545 | "%s", buffer.addr); | ||
| 546 | } | ||
| 547 | 8 | printbuffer_destroy(&buffer); | |
| 548 | 8 | free(shdata_copy); | |
| 549 | 8 | } | |
| 550 | |||
| 551 | 8 | bool shmem_barrier__exists(void) { | |
| 552 | 8 | return shm_handler != NULL; | |
| 553 | } | ||
| 554 | |||
| 555 | 1 | int shmem_barrier__version(void) { | |
| 556 | 1 | return SHMEM_BARRIER_VERSION; | |
| 557 | } | ||
| 558 | |||
| 559 | 81 | size_t shmem_barrier__size(void) { | |
| 560 | // max_barriers contains a value once shmem is initialized, | ||
| 561 | // otherwise return default size | ||
| 562 | 81 | return sizeof(shdata_t) + sizeof(barrier_t) * ( | |
| 563 |
2/2✓ Branch 0 taken 80 times.
✓ Branch 1 taken 1 times.
|
81 | max_barriers > 0 ? max_barriers : mu_get_system_size()); |
| 564 | } | ||
| 565 |