| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /*********************************************************************************/ | ||
| 2 | /* Copyright 2009-2022 Barcelona Supercomputing Center */ | ||
| 3 | /* */ | ||
| 4 | /* This file is part of the DLB library. */ | ||
| 5 | /* */ | ||
| 6 | /* DLB is free software: you can redistribute it and/or modify */ | ||
| 7 | /* it under the terms of the GNU Lesser General Public License as published by */ | ||
| 8 | /* the Free Software Foundation, either version 3 of the License, or */ | ||
| 9 | /* (at your option) any later version. */ | ||
| 10 | /* */ | ||
| 11 | /* DLB is distributed in the hope that it will be useful, */ | ||
| 12 | /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ | ||
| 13 | /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ | ||
| 14 | /* GNU Lesser General Public License for more details. */ | ||
| 15 | /* */ | ||
| 16 | /* You should have received a copy of the GNU Lesser General Public License */ | ||
| 17 | /* along with DLB. If not, see <https://www.gnu.org/licenses/>. */ | ||
| 18 | /*********************************************************************************/ | ||
| 19 | |||
| 20 | #ifdef HAVE_CONFIG_H | ||
| 21 | #include <config.h> | ||
| 22 | #endif | ||
| 23 | |||
| 24 | #include "LB_comm/shmem_barrier.h" | ||
| 25 | |||
| 26 | #include "LB_core/DLB_kernel.h" | ||
| 27 | #include "LB_comm/shmem.h" | ||
| 28 | #include "apis/dlb_errors.h" | ||
| 29 | #include "support/atomic.h" | ||
| 30 | #include "support/debug.h" | ||
| 31 | #include "support/mask_utils.h" | ||
| 32 | #include "support/mytime.h" | ||
| 33 | #include "talp/talp.h" | ||
| 34 | |||
| 35 | #include <errno.h> | ||
| 36 | #include <pthread.h> | ||
| 37 | #include <stdio.h> | ||
| 38 | #include <string.h> | ||
| 39 | #include <stdlib.h> | ||
| 40 | |||
| 41 | typedef struct barrier_flags { | ||
| 42 | bool initialized:1; | ||
| 43 | bool lewi:1; | ||
| 44 | } barrier_flags_t; | ||
| 45 | |||
| 46 | typedef struct barrier_t { | ||
| 47 | char name[BARRIER_NAME_MAX]; | ||
| 48 | barrier_flags_t flags; | ||
| 49 | unsigned int participants; | ||
| 50 | atomic_uint ntimes; | ||
| 51 | atomic_uint count; | ||
| 52 | pthread_barrier_t barrier; | ||
| 53 | pthread_rwlock_t rwlock; | ||
| 54 | } barrier_t; | ||
| 55 | |||
| 56 | typedef struct { | ||
| 57 | bool initialized; | ||
| 58 | int max_barriers; // capacity | ||
| 59 | int num_barriers; // size, although detached may be counted | ||
| 60 | barrier_t barriers[]; | ||
| 61 | } shdata_t; | ||
| 62 | |||
| 63 | enum { SHMEM_BARRIER_VERSION = 7 }; | ||
| 64 | enum { SHMEM_TIMEOUT_SECONDS = 1 }; | ||
| 65 | |||
| 66 | static int max_barriers = 0; | ||
| 67 | static shmem_handler_t *shm_handler = NULL; | ||
| 68 | static shdata_t *shdata = NULL; | ||
| 69 | static const char *shmem_name = "barrier"; | ||
| 70 | |||
| 71 | 1 | static void cleanup_shmem(void *shdata_ptr, int pid) { | |
| 72 | |||
| 73 | 1 | bool shmem_empty = true; | |
| 74 | 1 | shdata_t *shared_data = shdata_ptr; | |
| 75 | 1 | int num_barriers = shared_data->num_barriers; | |
| 76 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | for (int i = 0; i < num_barriers; i++) { |
| 77 | 1 | barrier_t *barrier = &shared_data->barriers[i]; | |
| 78 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (--barrier->participants == 0) { |
| 79 | 1 | *barrier = (const barrier_t){}; | |
| 80 | } else { | ||
| 81 | ✗ | shmem_empty = false; | |
| 82 | } | ||
| 83 | } | ||
| 84 | |||
| 85 | /* If there are no registered barriers, make sure shmem is reset */ | ||
| 86 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (shmem_empty) { |
| 87 | 1 | memset(shared_data, 0, shmem_barrier__size()); | |
| 88 | } | ||
| 89 | 1 | } | |
| 90 | |||
| 91 | 62 | static void open_shmem(const char *shmem_key, int shmem_size_multiplier) { | |
| 92 | |||
| 93 | 62 | max_barriers = mu_get_system_size() * shmem_size_multiplier; | |
| 94 | 124 | shm_handler = shmem_init((void**)&shdata, | |
| 95 | 62 | &(const shmem_props_t) { | |
| 96 | 62 | .size = shmem_barrier__size(), | |
| 97 | .name = shmem_name, | ||
| 98 | .key = shmem_key, | ||
| 99 | .version = SHMEM_BARRIER_VERSION, | ||
| 100 | .cleanup_fn = cleanup_shmem, | ||
| 101 | }); | ||
| 102 | 62 | } | |
| 103 | |||
| 104 | 91 | int shmem_barrier__init(const char *shmem_key, int shmem_size_multiplier) { | |
| 105 | |||
| 106 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 61 times.
|
91 | if (shm_handler != NULL) return DLB_ERR_NOMEM; |
| 107 | |||
| 108 | 61 | open_shmem(shmem_key, shmem_size_multiplier); | |
| 109 | |||
| 110 | 61 | int error = DLB_SUCCESS; | |
| 111 | 61 | shmem_lock(shm_handler); | |
| 112 | { | ||
| 113 | // Initialize some values if this is the 1st process attached to the shmem | ||
| 114 |
1/2✓ Branch 0 taken 61 times.
✗ Branch 1 not taken.
|
61 | if (!shdata->initialized) { |
| 115 | 61 | shdata->initialized = true; | |
| 116 | 61 | shdata->num_barriers = 0; | |
| 117 | 61 | shdata->max_barriers = max_barriers; | |
| 118 | } else { | ||
| 119 | ✗ | if (shdata->max_barriers != max_barriers) { | |
| 120 | ✗ | error = DLB_ERR_INIT; | |
| 121 | } | ||
| 122 | } | ||
| 123 | } | ||
| 124 | 61 | shmem_unlock(shm_handler); | |
| 125 | |||
| 126 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 61 times.
|
61 | if (error == DLB_ERR_INIT) { |
| 127 | ✗ | warning("Cannot attach to Barrier shmem because existing size differ." | |
| 128 | " Existing shmem size: %d, expected: %d." | ||
| 129 | " Check for DLB_ARGS consistency among processes or clean up shared memory.", | ||
| 130 | ✗ | shdata->max_barriers, max_barriers); | |
| 131 | } | ||
| 132 | |||
| 133 |
1/2✓ Branch 0 taken 61 times.
✗ Branch 1 not taken.
|
61 | if (error == DLB_SUCCESS) { |
| 134 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 61 times.
|
61 | verbose(VB_BARRIER, "Barrier Module initialized."); |
| 135 | } | ||
| 136 | |||
| 137 | 61 | return error; | |
| 138 | } | ||
| 139 | |||
| 140 | 1 | void shmem_barrier_ext__init(const char *shmem_key, int shmem_size_multiplier) { | |
| 141 | |||
| 142 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (shm_handler != NULL) return; |
| 143 | |||
| 144 | 1 | open_shmem(shmem_key, shmem_size_multiplier); | |
| 145 | } | ||
| 146 | |||
| 147 | 92 | void shmem_barrier__finalize(const char *shmem_key, int shmem_size_multiplier) { | |
| 148 |
2/2✓ Branch 0 taken 31 times.
✓ Branch 1 taken 61 times.
|
92 | if (shm_handler == NULL) { |
| 149 | /* barrier_finalize may be called to finalize existing process | ||
| 150 | * even if the file descriptor is not opened. (DLB_PreInit + forc-exec case) */ | ||
| 151 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 31 times.
|
31 | if (shmem_exists(shmem_name, shmem_key)) { |
| 152 | ✗ | shmem_barrier_ext__init(shmem_key, shmem_size_multiplier); | |
| 153 | } else { | ||
| 154 | 31 | return; | |
| 155 | } | ||
| 156 | } | ||
| 157 | |||
| 158 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 61 times.
|
61 | verbose(VB_BARRIER, "Finalizing Barrier Module"); |
| 159 | |||
| 160 | 61 | shmem_finalize(shm_handler, NULL /* do not check if empty */); | |
| 161 | 61 | shm_handler = NULL; | |
| 162 | } | ||
| 163 | |||
| 164 | 1 | int shmem_barrier_ext__finalize(void) { | |
| 165 | // Protect double finalization | ||
| 166 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (shm_handler == NULL) { |
| 167 | ✗ | return DLB_ERR_NOSHMEM; | |
| 168 | } | ||
| 169 | |||
| 170 | // Shared memory destruction | ||
| 171 | 1 | shmem_finalize(shm_handler, NULL /* do not check if empty */); | |
| 172 | 1 | shm_handler = NULL; | |
| 173 | 1 | shdata = NULL; | |
| 174 | |||
| 175 | 1 | return DLB_SUCCESS; | |
| 176 | } | ||
| 177 | |||
| 178 | 106 | int shmem_barrier__get_max_barriers(void) { | |
| 179 | 106 | return max_barriers; | |
| 180 | } | ||
| 181 | |||
| 182 | // Basically for testing purposes, num_barriers is not decremented if a barrier | ||
| 183 | // in the middle is detached, so we count the number of initialized barriers. | ||
| 184 | 6 | int shmem_barrier__get_num_barriers(void) { | |
| 185 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
|
6 | if (shdata == NULL) return -1; |
| 186 | |||
| 187 | 6 | int real_num_barriers = 0; | |
| 188 | 6 | shmem_lock(shm_handler); | |
| 189 | { | ||
| 190 | 6 | int num_barriers = shdata->num_barriers; | |
| 191 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 6 times.
|
16 | for (int i = 0; i < num_barriers; ++i) { |
| 192 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | if (shdata->barriers[i].flags.initialized) { |
| 193 | 10 | ++real_num_barriers; | |
| 194 | } | ||
| 195 | } | ||
| 196 | } | ||
| 197 | 6 | shmem_unlock(shm_handler); | |
| 198 | |||
| 199 | 6 | return real_num_barriers; | |
| 200 | } | ||
| 201 | |||
| 202 | /* Given a barrier_name, find whether the barrier is registered in the shared | ||
| 203 | * memory. Note that this function is not thread-safe */ | ||
| 204 | 10 | barrier_t* shmem_barrier__find(const char *barrier_name) { | |
| 205 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 9 times.
|
10 | if (shm_handler == NULL) return NULL; |
| 206 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 6 times.
|
9 | if (barrier_name == NULL) return NULL; |
| 207 | |||
| 208 | 6 | int num_barriers = shdata->num_barriers; | |
| 209 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 3 times.
|
8 | for (int i = 0; i < num_barriers; ++i) { |
| 210 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
5 | if (shdata->barriers[i].participants > 0 |
| 211 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
5 | && shdata->barriers[i].flags.initialized |
| 212 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 2 times.
|
5 | && strncmp(shdata->barriers[i].name, barrier_name, |
| 213 | BARRIER_NAME_MAX-1) == 0) { | ||
| 214 | 3 | return &shdata->barriers[i]; | |
| 215 | } | ||
| 216 | } | ||
| 217 | |||
| 218 | 3 | return NULL; | |
| 219 | } | ||
| 220 | |||
| 221 | /* Register and attach process to barrier. | ||
| 222 | * barrier_name: barrier name | ||
| 223 | * lewi: whether this barrier does lewi | ||
| 224 | */ | ||
| 225 | 120 | barrier_t* shmem_barrier__register(const char *barrier_name, bool lewi) { | |
| 226 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 120 times.
|
120 | if (shm_handler == NULL) return NULL; |
| 227 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 119 times.
|
120 | if (barrier_name == NULL) return NULL; |
| 228 | |||
| 229 | /* Obtain the shared memory lock to find the appropriate place for the new | ||
| 230 | * barrier. If the barrier is not created, the first process must | ||
| 231 | * initialize it before releasing the lock. If the barrier was already | ||
| 232 | * created, new processes may attach after acquiring both the shmem and the | ||
| 233 | * barrier specific lock. */ | ||
| 234 | 119 | int participants = -1; | |
| 235 | 119 | barrier_t *barrier = NULL; | |
| 236 | 119 | shmem_lock(shm_handler); | |
| 237 | { | ||
| 238 | /* Find a new spot, or an already registered barrier */ | ||
| 239 | 119 | barrier_t *empty_spot = NULL; | |
| 240 | 119 | int num_barriers = shdata->num_barriers; | |
| 241 |
2/2✓ Branch 0 taken 66 times.
✓ Branch 1 taken 80 times.
|
146 | for (int i = 0; i < num_barriers; ++i) { |
| 242 |
1/2✓ Branch 0 taken 66 times.
✗ Branch 1 not taken.
|
66 | if (empty_spot == NULL |
| 243 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 66 times.
|
66 | && shdata->barriers[i].participants == 0 |
| 244 | ✗ | && !shdata->barriers[i].flags.initialized) { | |
| 245 | ✗ | empty_spot = &shdata->barriers[i]; | |
| 246 | } | ||
| 247 |
1/2✓ Branch 0 taken 66 times.
✗ Branch 1 not taken.
|
66 | else if (shdata->barriers[i].participants > 0 |
| 248 |
1/2✓ Branch 0 taken 66 times.
✗ Branch 1 not taken.
|
66 | && shdata->barriers[i].flags.initialized |
| 249 |
2/2✓ Branch 0 taken 39 times.
✓ Branch 1 taken 27 times.
|
66 | && strncmp(shdata->barriers[i].name, barrier_name, |
| 250 | BARRIER_NAME_MAX-1) == 0) { | ||
| 251 | 39 | barrier = &shdata->barriers[i]; | |
| 252 | 39 | break; | |
| 253 | } | ||
| 254 | } | ||
| 255 | |||
| 256 | /* If barrier nor empty_spot are found, try to get a new position from the end */ | ||
| 257 |
5/6✓ Branch 0 taken 80 times.
✓ Branch 1 taken 39 times.
✓ Branch 2 taken 80 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 78 times.
✓ Branch 5 taken 2 times.
|
119 | if (barrier == NULL && empty_spot == NULL && num_barriers < max_barriers) { |
| 258 | 78 | empty_spot = &shdata->barriers[num_barriers]; | |
| 259 | 78 | ++shdata->num_barriers; | |
| 260 | } | ||
| 261 | |||
| 262 | /* New barrier, lock first and initialize required fields */ | ||
| 263 |
4/4✓ Branch 0 taken 80 times.
✓ Branch 1 taken 39 times.
✓ Branch 2 taken 78 times.
✓ Branch 3 taken 2 times.
|
197 | if (barrier == NULL && empty_spot != NULL) { |
| 264 | 78 | barrier = empty_spot; | |
| 265 | 78 | *barrier = (const barrier_t){}; | |
| 266 | |||
| 267 | pthread_rwlockattr_t rwlockattr; | ||
| 268 | 78 | pthread_rwlockattr_init(&rwlockattr); | |
| 269 | 78 | pthread_rwlockattr_setpshared(&rwlockattr, PTHREAD_PROCESS_SHARED); | |
| 270 | 78 | pthread_rwlock_init(&barrier->rwlock, &rwlockattr); | |
| 271 | 78 | pthread_rwlockattr_destroy(&rwlockattr); | |
| 272 | |||
| 273 | /* Initialize only once the lock is acquired */ | ||
| 274 | 78 | pthread_rwlock_wrlock(&barrier->rwlock); | |
| 275 | { | ||
| 276 | 78 | barrier->flags = (const barrier_flags_t) { | |
| 277 | .initialized = true, | ||
| 278 | .lewi = lewi, | ||
| 279 | }; | ||
| 280 | 78 | barrier->participants = 1; | |
| 281 | 78 | participants = 1; | |
| 282 | 78 | snprintf(barrier->name, BARRIER_NAME_MAX, "%s", barrier_name); | |
| 283 | pthread_barrierattr_t barrierattr; | ||
| 284 | 78 | pthread_barrierattr_init(&barrierattr); | |
| 285 | 78 | pthread_barrierattr_setpshared(&barrierattr, PTHREAD_PROCESS_SHARED); | |
| 286 | 78 | pthread_barrier_init(&barrier->barrier, &barrierattr, barrier->participants); | |
| 287 | 78 | pthread_barrierattr_destroy(&barrierattr); | |
| 288 | } | ||
| 289 | 78 | pthread_rwlock_unlock(&barrier->rwlock); | |
| 290 | |||
| 291 |
2/2✓ Branch 0 taken 39 times.
✓ Branch 1 taken 2 times.
|
41 | } else if (barrier != NULL){ |
| 292 | /* Barrier was created by another participant, attach. | ||
| 293 | * (timedlock is used to avoid potential deadlocks) */ | ||
| 294 | struct timespec timeout; | ||
| 295 | 39 | get_time_real(&timeout); | |
| 296 | 39 | timeout.tv_sec += SHMEM_TIMEOUT_SECONDS; | |
| 297 | 39 | int timedwrlock_error = pthread_rwlock_timedwrlock(&barrier->rwlock, &timeout); | |
| 298 |
1/2✓ Branch 0 taken 39 times.
✗ Branch 1 not taken.
|
39 | if (likely(timedwrlock_error == 0)) { |
| 299 | /* Update participants */ | ||
| 300 | 39 | participants = ++barrier->participants; | |
| 301 | |||
| 302 | /* Create new barrier with the number of participants updated */ | ||
| 303 | 39 | pthread_barrier_destroy(&barrier->barrier); | |
| 304 | pthread_barrierattr_t attr; | ||
| 305 | 39 | pthread_barrierattr_init(&attr); | |
| 306 | 39 | pthread_barrierattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); | |
| 307 | 39 | pthread_barrier_init(&barrier->barrier, &attr, barrier->participants); | |
| 308 | 39 | pthread_barrierattr_destroy(&attr); | |
| 309 | |||
| 310 | 39 | pthread_rwlock_unlock(&barrier->rwlock); | |
| 311 | |||
| 312 | ✗ | } else if (timedwrlock_error == ETIMEDOUT || timedwrlock_error == EDEADLK) { | |
| 313 | ✗ | shmem_unlock(shm_handler); | |
| 314 | ✗ | fatal("Timed out while creating shmem_barrier.\n" | |
| 315 | "Please, report at " PACKAGE_BUGREPORT); | ||
| 316 | ✗ | } else if (timedwrlock_error == EINVAL) { | |
| 317 | ✗ | shmem_unlock(shm_handler); | |
| 318 | ✗ | fatal("Error acquiring timedwrlock while creating shmem_barrier.\n" | |
| 319 | "Please, report at " PACKAGE_BUGREPORT); | ||
| 320 | } | ||
| 321 | } | ||
| 322 | } | ||
| 323 | 119 | shmem_unlock(shm_handler); | |
| 324 | |||
| 325 |
2/2✓ Branch 0 taken 117 times.
✓ Branch 1 taken 2 times.
|
119 | if (barrier != NULL) { |
| 326 | /* Attach if needed and print number of participants */ | ||
| 327 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 117 times.
|
117 | verbose(VB_BARRIER, "Attached to barrier %s. Participants: %d", |
| 328 | barrier->name, participants); | ||
| 329 | } | ||
| 330 | |||
| 331 | 119 | return barrier; | |
| 332 | } | ||
| 333 | |||
| 334 | /* The attach function should not have any races with the global shared memory. | ||
| 335 | * At most, 'barrier' points to an unitialized barrier and error is returned. | ||
| 336 | * */ | ||
| 337 | 8 | int shmem_barrier__attach(barrier_t *barrier) { | |
| 338 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
|
8 | if (barrier == NULL) return DLB_ERR_UNKNOWN; |
| 339 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
|
8 | if (shm_handler == NULL) return DLB_ERR_NOSHMEM; |
| 340 | |||
| 341 | #ifdef DEBUG_VERSION | ||
| 342 | int count; | ||
| 343 | #endif | ||
| 344 | int participants; | ||
| 345 | 8 | pthread_rwlock_wrlock(&barrier->rwlock); | |
| 346 | { | ||
| 347 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 1 times.
|
8 | if (!barrier->flags.initialized |
| 348 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.
|
7 | || barrier->participants == 0) { |
| 349 | 1 | pthread_rwlock_unlock(&barrier->rwlock); | |
| 350 | 1 | return DLB_ERR_PERM; | |
| 351 | } | ||
| 352 | |||
| 353 | /* Update participants */ | ||
| 354 | 7 | participants = ++barrier->participants; | |
| 355 | |||
| 356 | /* Create new barrier with the number of participants updated */ | ||
| 357 | 7 | pthread_barrier_destroy(&barrier->barrier); | |
| 358 | pthread_barrierattr_t attr; | ||
| 359 | 7 | pthread_barrierattr_init(&attr); | |
| 360 | 7 | pthread_barrierattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); | |
| 361 | 7 | pthread_barrier_init(&barrier->barrier, &attr, barrier->participants); | |
| 362 | 7 | pthread_barrierattr_destroy(&attr); | |
| 363 | |||
| 364 | #ifdef DEBUG_VERSION | ||
| 365 | 7 | count = barrier->count; | |
| 366 | #endif | ||
| 367 | } | ||
| 368 | 7 | pthread_rwlock_unlock(&barrier->rwlock); | |
| 369 | |||
| 370 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.
|
7 | ensure(count == 0, "Barrier Shared memory inconsistency while attaching " |
| 371 | "barrier (count = %d).\n" "Please, report at " PACKAGE_BUGREPORT, | ||
| 372 | count); | ||
| 373 | |||
| 374 | 7 | return participants; | |
| 375 | } | ||
| 376 | |||
| 377 | /* The detach function may remove the barrier if 'participants' reaches 0 and | ||
| 378 | * compete with a barrier creation. This function needs to acquire both locks. */ | ||
| 379 | 939 | int shmem_barrier__detach(barrier_t *barrier) { | |
| 380 |
2/2✓ Branch 0 taken 815 times.
✓ Branch 1 taken 124 times.
|
939 | if (barrier == NULL) return DLB_ERR_UNKNOWN; |
| 381 |
2/2✓ Branch 0 taken 39 times.
✓ Branch 1 taken 85 times.
|
124 | if (shm_handler == NULL) return DLB_ERR_NOSHMEM; |
| 382 | |||
| 383 | #ifdef DEBUG_VERSION | ||
| 384 | 85 | int count = -1; | |
| 385 | #endif | ||
| 386 | 85 | int participants = -1; | |
| 387 | |||
| 388 | 85 | shmem_lock(shm_handler); | |
| 389 | { | ||
| 390 | struct timespec timeout; | ||
| 391 | 85 | get_time_real(&timeout); | |
| 392 | 85 | timeout.tv_sec += SHMEM_TIMEOUT_SECONDS; | |
| 393 | 85 | int timedwrlock_error = pthread_rwlock_timedwrlock(&barrier->rwlock, &timeout); | |
| 394 |
1/2✓ Branch 0 taken 85 times.
✗ Branch 1 not taken.
|
85 | if (likely(timedwrlock_error == 0)) { |
| 395 | |||
| 396 |
2/2✓ Branch 0 taken 84 times.
✓ Branch 1 taken 1 times.
|
85 | if (!barrier->flags.initialized |
| 397 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 84 times.
|
84 | || barrier->participants == 0) { |
| 398 | 1 | pthread_rwlock_unlock(&barrier->rwlock); | |
| 399 | 1 | shmem_unlock(shm_handler); | |
| 400 | 1 | return DLB_ERR_PERM; | |
| 401 | } | ||
| 402 | |||
| 403 | #ifdef DEBUG_VERSION | ||
| 404 | 84 | count = barrier->count; | |
| 405 | #endif | ||
| 406 | /* Both locks are acquired and barrier is valid, detach: */ | ||
| 407 | |||
| 408 | /* Update participants */ | ||
| 409 | 84 | participants = --barrier->participants; | |
| 410 | |||
| 411 |
2/2✓ Branch 0 taken 37 times.
✓ Branch 1 taken 47 times.
|
84 | if (participants > 0) { |
| 412 | /* Create new barrier with the number of participants updated */ | ||
| 413 | 37 | pthread_barrier_destroy(&barrier->barrier); | |
| 414 | pthread_barrierattr_t attr; | ||
| 415 | 37 | pthread_barrierattr_init(&attr); | |
| 416 | 37 | pthread_barrierattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); | |
| 417 | 37 | pthread_barrier_init(&barrier->barrier, &attr, barrier->participants); | |
| 418 | 37 | pthread_barrierattr_destroy(&attr); | |
| 419 | |||
| 420 | 37 | pthread_rwlock_unlock(&barrier->rwlock); | |
| 421 | } else { | ||
| 422 | /* If this is the last participant, uninitialize barrier */ | ||
| 423 | 47 | pthread_rwlock_unlock(&barrier->rwlock); | |
| 424 | 47 | pthread_barrier_destroy(&barrier->barrier); | |
| 425 | 47 | pthread_rwlock_destroy(&barrier->rwlock); | |
| 426 | 47 | *barrier = (const barrier_t){}; | |
| 427 | |||
| 428 | /* Try to compact barrier list */ | ||
| 429 |
2/2✓ Branch 0 taken 53 times.
✓ Branch 1 taken 41 times.
|
94 | for (int i = shdata->num_barriers - 1; i >= 0; --i) { |
| 430 |
2/2✓ Branch 0 taken 47 times.
✓ Branch 1 taken 6 times.
|
53 | if (!shdata->barriers[i].flags.initialized) { |
| 431 | 47 | --shdata->num_barriers; | |
| 432 | } else { | ||
| 433 | 6 | break; | |
| 434 | } | ||
| 435 | } | ||
| 436 | } | ||
| 437 | ✗ | } else if (timedwrlock_error == ETIMEDOUT || timedwrlock_error == EDEADLK) { | |
| 438 | ✗ | shmem_unlock(shm_handler); | |
| 439 | ✗ | fatal("Timed out while detaching barrier.\n" | |
| 440 | "Please, report at " PACKAGE_BUGREPORT); | ||
| 441 | ✗ | } else if (timedwrlock_error == EINVAL) { | |
| 442 | ✗ | shmem_unlock(shm_handler); | |
| 443 | ✗ | fatal("Error acquiring timedwrlock while detaching barrier.\n" | |
| 444 | "Please, report at " PACKAGE_BUGREPORT); | ||
| 445 | } | ||
| 446 | } | ||
| 447 | 84 | shmem_unlock(shm_handler); | |
| 448 | |||
| 449 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 84 times.
|
84 | ensure(count == 0, "Barrier Shared memory inconsistency while attaching " |
| 450 | "barrier (count = %d).\n" "Please, report at " PACKAGE_BUGREPORT, | ||
| 451 | count); | ||
| 452 | |||
| 453 | 84 | return participants; | |
| 454 | } | ||
| 455 | |||
| 456 | 42 | void shmem_barrier__barrier(barrier_t *barrier) { | |
| 457 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 42 times.
|
42 | if (unlikely(shm_handler == NULL)) return; |
| 458 | |||
| 459 | 42 | pthread_rwlock_rdlock(&barrier->rwlock); | |
| 460 | { | ||
| 461 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 41 times.
|
42 | if (unlikely(!barrier->flags.initialized)) { |
| 462 | 1 | warning("Trying to use a non initialized barrier"); | |
| 463 | 1 | pthread_rwlock_unlock(&barrier->rwlock); | |
| 464 | 1 | return; | |
| 465 | } | ||
| 466 | |||
| 467 | 41 | unsigned int participant_number = DLB_ATOMIC_ADD_FETCH(&barrier->count, 1); | |
| 468 | 41 | bool last_in = participant_number == barrier->participants; | |
| 469 | |||
| 470 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 41 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
41 | verbose(VB_BARRIER, "Entering barrier %s%s", barrier->name, last_in ? " (last)" : ""); |
| 471 | |||
| 472 |
2/2✓ Branch 0 taken 26 times.
✓ Branch 1 taken 15 times.
|
41 | if (last_in) { |
| 473 | // Barrier | ||
| 474 | 26 | pthread_barrier_wait(&barrier->barrier); | |
| 475 | |||
| 476 | // Increase ntimes counter | ||
| 477 | 26 | DLB_ATOMIC_ADD_RLX(&barrier->ntimes, 1); | |
| 478 | } else { | ||
| 479 | // Only if this process is not the last one, act as a blocking call | ||
| 480 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 8 times.
|
15 | if (barrier->flags.lewi) { |
| 481 | 7 | sync_call_flags_t mpi_flags = (const sync_call_flags_t) { | |
| 482 | .is_dlb_barrier = true, | ||
| 483 | .is_blocking = true, | ||
| 484 | .is_collective = true, | ||
| 485 | .do_lewi = true, | ||
| 486 | }; | ||
| 487 | 7 | into_sync_call(mpi_flags); | |
| 488 | } | ||
| 489 | |||
| 490 | // Barrier | ||
| 491 | 15 | pthread_barrier_wait(&barrier->barrier); | |
| 492 | |||
| 493 | // Recover resources for those processes that simulated a blocking call | ||
| 494 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 8 times.
|
15 | if (barrier->flags.lewi) { |
| 495 | 7 | sync_call_flags_t mpi_flags = (const sync_call_flags_t) { | |
| 496 | .is_dlb_barrier = true, | ||
| 497 | .is_blocking = true, | ||
| 498 | .is_collective = true, | ||
| 499 | .do_lewi = true, | ||
| 500 | }; | ||
| 501 | 7 | out_of_sync_call(mpi_flags); | |
| 502 | } | ||
| 503 | } | ||
| 504 | |||
| 505 | 41 | unsigned int participants_left = DLB_ATOMIC_SUB_FETCH(&barrier->count, 1); | |
| 506 | 41 | bool last_out = participants_left == 0; | |
| 507 | |||
| 508 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 41 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
41 | verbose(VB_BARRIER, "Leaving barrier %s%s", barrier->name, last_out ? " (last)" : ""); |
| 509 | |||
| 510 | /* WARNING: There may be a race condition with the 'count' value in | ||
| 511 | * consecutive barriers (A -> B), if one process increases the | ||
| 512 | * 'count' value after entering B, while another process still | ||
| 513 | * hasn't decreased it in B. Anyway, this is completely harmless | ||
| 514 | * since it only affects the verbose message. | ||
| 515 | */ | ||
| 516 | } | ||
| 517 | 41 | pthread_rwlock_unlock(&barrier->rwlock); | |
| 518 | } | ||
| 519 | |||
| 520 | 6 | void shmem_barrier__print_info(const char *shmem_key, int shmem_size_multiplier) { | |
| 521 | |||
| 522 | /* If the shmem is not opened, obtain a temporary fd */ | ||
| 523 | 6 | bool temporary_shmem = shm_handler == NULL; | |
| 524 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 5 times.
|
6 | if (temporary_shmem) { |
| 525 | 1 | shmem_barrier_ext__init(shmem_key, shmem_size_multiplier); | |
| 526 | } | ||
| 527 | |||
| 528 | /* Make a full copy of the shared memory */ | ||
| 529 | 6 | shdata_t *shdata_copy = malloc(shmem_barrier__size()); | |
| 530 | 6 | shmem_lock(shm_handler); | |
| 531 | { | ||
| 532 | 6 | memcpy(shdata_copy, shdata, shmem_barrier__size()); | |
| 533 | } | ||
| 534 | 6 | shmem_unlock(shm_handler); | |
| 535 | |||
| 536 | /* Close shmem if needed */ | ||
| 537 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 5 times.
|
6 | if (temporary_shmem) { |
| 538 | 1 | shmem_barrier_ext__finalize(); | |
| 539 | } | ||
| 540 | |||
| 541 | /* Initialize buffer */ | ||
| 542 | print_buffer_t buffer; | ||
| 543 | 6 | printbuffer_init(&buffer); | |
| 544 | |||
| 545 | /* Set up line buffer */ | ||
| 546 | enum { MAX_LINE_LEN = 128 }; | ||
| 547 | char line[MAX_LINE_LEN]; | ||
| 548 | |||
| 549 | 6 | int num_barriers = shdata_copy->num_barriers; | |
| 550 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
|
12 | for (int i = 0; i < num_barriers; ++i) { |
| 551 | 6 | barrier_t *barrier = &shdata_copy->barriers[i]; | |
| 552 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | if (barrier->flags.initialized) { |
| 553 | |||
| 554 | /* Append line to buffer */ | ||
| 555 | 6 | snprintf(line, MAX_LINE_LEN, | |
| 556 | " | %14s | %12u | %12u | %12u |", | ||
| 557 | 6 | barrier->name, barrier->participants, barrier->count, barrier->ntimes); | |
| 558 | 6 | printbuffer_append(&buffer, line); | |
| 559 | } | ||
| 560 | } | ||
| 561 | |||
| 562 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 1 times.
|
6 | if (buffer.addr[0] != '\0' ) { |
| 563 | 5 | info0("=== Barriers ===\n" | |
| 564 | " | Barrier Name | Participants | Num. blocked | Times compl. |\n" | ||
| 565 | "%s", buffer.addr); | ||
| 566 | } | ||
| 567 | 6 | printbuffer_destroy(&buffer); | |
| 568 | 6 | free(shdata_copy); | |
| 569 | 6 | } | |
| 570 | |||
| 571 | 8 | bool shmem_barrier__exists(void) { | |
| 572 | 8 | return shm_handler != NULL; | |
| 573 | } | ||
| 574 | |||
| 575 | 1 | int shmem_barrier__version(void) { | |
| 576 | 1 | return SHMEM_BARRIER_VERSION; | |
| 577 | } | ||
| 578 | |||
| 579 | 76 | size_t shmem_barrier__size(void) { | |
| 580 | // max_barriers contains a value once shmem is initialized, | ||
| 581 | // otherwise return default size | ||
| 582 | 76 | return sizeof(shdata_t) + sizeof(barrier_t) * ( | |
| 583 |
2/2✓ Branch 0 taken 75 times.
✓ Branch 1 taken 1 times.
|
76 | max_barriers > 0 ? max_barriers : mu_get_system_size()); |
| 584 | } | ||
| 585 |