GCC Code Coverage Report


Directory: src/
File: src/LB_comm/shmem.c
Date: 2024-11-22 17:07:10
Exec Total Coverage
Lines: 105 148 70.9%
Functions: 12 17 70.6%
Branches: 61 100 61.0%

Line Branch Exec Source
1 /*********************************************************************************/
2 /* Copyright 2009-2021 Barcelona Supercomputing Center */
3 /* */
4 /* This file is part of the DLB library. */
5 /* */
6 /* DLB is free software: you can redistribute it and/or modify */
7 /* it under the terms of the GNU Lesser General Public License as published by */
8 /* the Free Software Foundation, either version 3 of the License, or */
9 /* (at your option) any later version. */
10 /* */
11 /* DLB is distributed in the hope that it will be useful, */
12 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
13 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
14 /* GNU Lesser General Public License for more details. */
15 /* */
16 /* You should have received a copy of the GNU Lesser General Public License */
17 /* along with DLB. If not, see <https://www.gnu.org/licenses/>. */
18 /*********************************************************************************/
19
20 #ifdef HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23
24 #include "LB_comm/shmem.h"
25
26 #include "support/debug.h"
27
28 #include <unistd.h>
29 #include <sys/mman.h>
30 #include <sys/wait.h>
31 #include <sys/types.h>
32 #include <sys/stat.h> /* For mode constants */
33 #include <fcntl.h> /* For O_* constants */
34 #include <signal.h>
35 #include <dirent.h>
36 #include <stdlib.h>
37 #include <stdio.h>
38 #include <errno.h>
39 #include <string.h>
40 #include <pthread.h>
41
42 #ifndef _POSIX_THREAD_PROCESS_SHARED
43 #error This system does not support process shared mutexes
44 #endif
45
46 #include "LB_comm/shmem.h"
47 #include "support/atomic.h"
48 #include "support/debug.h"
49 #include "support/options.h"
50 #include "support/mytime.h"
51 #include "support/mask_utils.h"
52
53 #define SHMEM_TIMEOUT_SECONDS 1
54
55 288 static bool shmem_consistency_check_pids(pid_t *pidlist, pid_t pid,
56 void (*cleanup_fn)(void*,int), void *shdata) {
57 288 bool registered = false;
58 int i;
59
2/2
✓ Branch 1 taken 3204 times.
✓ Branch 2 taken 288 times.
3492 for(i=0; i<mu_get_system_size(); ++i) {
60
2/2
✓ Branch 0 taken 3183 times.
✓ Branch 1 taken 21 times.
3204 if (pidlist[i] == 0) {
61
2/2
✓ Branch 0 taken 288 times.
✓ Branch 1 taken 2895 times.
3183 if (!registered) {
62 288 pidlist[i] = pid;
63 288 registered = true;
64 }
65 } else {
66
2/2
✓ Branch 1 taken 5 times.
✓ Branch 2 taken 16 times.
21 if (kill(pidlist[i], 0) == -1) {
67 /* Process pidlist[i] is registered and does not exist */
68
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
5 if (cleanup_fn) {
69
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
5 verbose(VB_SHMEM,
70 "Process %d is registered in DLB but does not exist, probably"
71 " due to a bad termination of such process.\n"
72 "DLB is cleaning up the shared memory. If it fails,"
73 " please run 'dlb_shm --delete' and try again.", pidlist[i]);
74 5 cleanup_fn(shdata, pidlist[i]);
75 5 pidlist[i] = 0;
76 } else {
77 verbose(VB_SHMEM, "Process %d attached to shmem not found, "
78 "you may want to run \"dlb_shm -d\"", pidlist[i]);
79 }
80 }
81 }
82 }
83 288 return registered;
84 }
85
86 286 static bool shmem_consistency_remove_pid(pid_t *pidlist, pid_t pid) {
87 286 bool last_one = true;
88 int i;
89
2/2
✓ Branch 1 taken 3188 times.
✓ Branch 2 taken 286 times.
3474 for(i=0; i<mu_get_system_size(); ++i) {
90
2/2
✓ Branch 0 taken 288 times.
✓ Branch 1 taken 2900 times.
3188 if (pidlist[i] == pid) {
91 288 pidlist[i] = 0;
92
2/2
✓ Branch 0 taken 13 times.
✓ Branch 1 taken 2887 times.
2900 } else if (pidlist[i] != 0) {
93 13 last_one = false;
94 }
95 }
96 286 return last_one;
97 }
98
99 576 static void shmem_consistency_check_version(unsigned int creator_version,
100 unsigned int process_version) {
101
2/2
✓ Branch 0 taken 545 times.
✓ Branch 1 taken 31 times.
576 if (creator_version != SHMEM_VERSION_IGNORE) {
102
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 545 times.
545 fatal_cond(creator_version != process_version,
103 "The existing DLB shared memory version differs from the expected one.\n"
104 "This may have been caused by a DLB version upgrade in between runs.\n"
105 "Please, run 'dlb_shm --delete' and try again.\n"
106 "Contact us at " PACKAGE_BUGREPORT " if the issue persists.");
107 }
108 576 }
109
110 288 static void get_shmem_filename(char *filename, const char *shmem_module,
111 const char *shmem_key, int shmem_color) {
112
3/4
✓ Branch 0 taken 288 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 279 times.
✓ Branch 3 taken 9 times.
288 if (shmem_key && shmem_key[0] != '\0') {
113
2/2
✓ Branch 0 taken 276 times.
✓ Branch 1 taken 3 times.
279 if (shmem_color <= 0) {
114 276 snprintf(filename, SHM_NAME_LENGTH, "/DLB_%s_%s",
115 shmem_module, shmem_key);
116 } else {
117 3 snprintf(filename, SHM_NAME_LENGTH, "/DLB_%s_%d_%s",
118 shmem_module, shmem_color, shmem_key);
119 }
120 } else {
121
1/2
✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
9 if (shmem_color <= 0) {
122 9 snprintf(filename, SHM_NAME_LENGTH, "/DLB_%s_%d",
123 shmem_module, getuid());
124 } else {
125 snprintf(filename, SHM_NAME_LENGTH, "/DLB_%s_%d_%d",
126 shmem_module, shmem_color, getuid());
127 }
128 }
129 288 }
130
131
132 288 shmem_handler_t* shmem_init(void **shdata, const shmem_props_t *shmem_props) {
133 288 pid_t pid = getpid();
134 288 const char *shmem_module = shmem_props->name;
135
2/2
✓ Branch 0 taken 17 times.
✓ Branch 1 taken 271 times.
288 verbose(VB_SHMEM, "Shared Memory Init: pid(%d), module(%s)", pid, shmem_module);
136
137 /* Allocate new Shared Memory handler */
138 288 shmem_handler_t *handler = malloc(sizeof(shmem_handler_t));
139
140 /* Calculate total shmem size:
141 * shmem = shsync + shdata
142 * shsync and shdata are both variable in size
143 */
144 288 size_t shsync_size = shmem_shsync__size();
145 288 size_t shdata_size = shmem_props->size;
146 288 handler->shm_size = shsync_size + shdata_size;
147
148 /* Get /dev/shm/ file names to create */
149 288 const char *shmem_key = shmem_props->key;
150 288 int shmem_color = shmem_props->color;
151 288 get_shmem_filename(handler->shm_filename, shmem_module, shmem_key, shmem_color);
152
153 /* Obtain a file descriptor for the shmem */
154 288 int fd = shm_open(handler->shm_filename, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
155
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 288 times.
288 if (fd == -1) {
156 fatal("shm_open error: %s", strerror(errno));
157 }
158
159 /* Truncate the regular file to a precise size */
160
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 288 times.
288 if (ftruncate(fd, handler->shm_size) == -1) {
161 fatal("ftruncate error: %s", strerror(errno));
162 }
163
164 /* Map shared memory object */
165 288 handler->shm_addr = mmap(NULL, handler->shm_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
166
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 288 times.
288 if (handler->shm_addr == MAP_FAILED) {
167 fatal("mmap error: %s", strerror(errno));
168 }
169
170 /* Set the address for both structs */
171 288 handler->shsync = (shmem_sync_t*) handler->shm_addr;
172 288 *shdata = handler->shm_addr + shsync_size;
173
174
2/2
✓ Branch 0 taken 252 times.
✓ Branch 1 taken 36 times.
288 if (__sync_bool_compare_and_swap(&handler->shsync->initializing, 0, 1)) {
175 /* Shared Memory creator */
176
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 238 times.
252 verbose(VB_SHMEM, "Initializing Shared Memory (%s)", shmem_module);
177
178 /* Init pthread mutex */
179 pthread_mutexattr_t attr;
180
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 252 times.
252 if (pthread_mutexattr_init(&attr) != 0) {
181 fatal("pthread_mutexattr_init error: %s", strerror(errno));
182 }
183
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 252 times.
252 if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED) != 0) {
184 fatal("pthread_mutexattr_setpshared error: %s", strerror(errno));
185 }
186
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 252 times.
252 if (pthread_mutex_init(&handler->shsync->shmem_mutex, &attr) != 0) {
187 fatal("pthread_mutex_init error: %s", strerror(errno));
188 }
189
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 252 times.
252 if (pthread_mutexattr_destroy(&attr) != 0) {
190 fatal("pthread_mutexattr_destroy error: %s", strerror(errno));
191 }
192
193 /* Set Shared Memory version */
194 252 handler->shsync->shmem_version = shmem_props->version;
195 252 handler->shsync->shsync_version = SHMEM_SYNC_VERSION;
196
197 252 handler->shsync->initialized = 1;
198 } else {
199 /* Shared Memory already created */
200
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 36 times.
36 while(!handler->shsync->initialized) __sync_synchronize();
201
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 36 times.
36 verbose(VB_SHMEM, "Attached to Shared Memory (%s)", shmem_module);
202 }
203
204 /* Check consistency */
205
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 274 times.
288 verbose(VB_SHMEM, "Checking shared memory consistency (%s)", shmem_module);
206 struct timespec timeout;
207 288 get_time_real(&timeout);
208 288 timeout.tv_sec += SHMEM_TIMEOUT_SECONDS;
209 288 int error = pthread_mutex_timedlock(&handler->shsync->shmem_mutex, &timeout);
210
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 288 times.
288 if (error == ETIMEDOUT) {
211 fatal("DLB cannot obtain the lock for the shared memory.\n"
212 "This may have been caused by a previous process crashing"
213 " while acquiring the DLB shared memory lock.\n"
214 "Please, run 'dlb_shm --delete' and try again.\n"
215 "Contact us at " PACKAGE_BUGREPORT " if the issue persists.");
216
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 288 times.
288 } else if (error != 0) {
217 fatal("pthread_mutex_timedlock error: %s", strerror(error));
218 }
219 288 shmem_consistency_check_version(handler->shsync->shsync_version, SHMEM_SYNC_VERSION);
220 288 shmem_consistency_check_version(handler->shsync->shmem_version, shmem_props->version);
221 288 shmem_consistency_check_pids(handler->shsync->pidlist, pid, shmem_props->cleanup_fn, *shdata);
222 288 pthread_mutex_unlock(&handler->shsync->shmem_mutex);
223
224 288 return handler;
225 }
226
227 286 void shmem_finalize(shmem_handler_t* handler, bool (*is_empty_fn)(void)) {
228 #ifdef IS_BGQ_MACHINE
229 // BG/Q have some problems deallocating shmem
230 // It will be cleaned after the job completion anyway
231 return;
232 #endif
233
234 286 shmem_lock(handler);
235
4/4
✓ Branch 0 taken 202 times.
✓ Branch 1 taken 84 times.
✓ Branch 3 taken 170 times.
✓ Branch 4 taken 32 times.
286 bool is_empty = is_empty_fn ? is_empty_fn() : true;
236 286 bool is_last_one = shmem_consistency_remove_pid(handler->shsync->pidlist, getpid());
237
4/4
✓ Branch 0 taken 254 times.
✓ Branch 1 taken 32 times.
✓ Branch 2 taken 253 times.
✓ Branch 3 taken 1 times.
286 bool delete_shmem = is_empty && is_last_one;
238 286 shmem_unlock(handler);
239
240 /* Here we should destroy the pthread mutex but another process may open
241 * the shared memory in this precise moment causing an invalid access to
242 * the mutex. */
243
244 /* All processes must unmap shmem */
245
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 286 times.
286 if (munmap(handler->shm_addr, handler->shm_size) != 0) {
246 fatal("munmap error: %s", strerror(errno));
247 }
248
249 /* Only the last process unlinks shmem */
250
2/2
✓ Branch 0 taken 253 times.
✓ Branch 1 taken 33 times.
286 if (delete_shmem) {
251
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 239 times.
253 verbose(VB_SHMEM, "Removing shared memory %s", handler->shm_filename);
252
1/4
✗ Branch 1 not taken.
✓ Branch 2 taken 253 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
253 if (shm_unlink(handler->shm_filename) != 0 && errno != ENOENT) {
253 fatal("shm_unlink error: %s", strerror(errno));
254 }
255 }
256
257 286 free(handler);
258 286 }
259
260 2870 void shmem_lock( shmem_handler_t* handler ) {
261 2870 pthread_mutex_lock(&handler->shsync->shmem_mutex);
262 2870 }
263
264 2870 void shmem_unlock( shmem_handler_t* handler ) {
265 2870 pthread_mutex_unlock(&handler->shsync->shmem_mutex);
266 2870 }
267
268 /* Shared memory states (BUSY(0-n) <- READY(0-n) -> MAINTENANCE(1)):
269 * - READY: the shared memory can be locked or moved to another state.
270 * - BUSY: the shared memory is being used, each process still needs to
271 * use atomic operations or locks to access the data.
272 * This state prevents going to maintenance mode.
273 * - MAINTENANCE: only one process can set the state to maintenance,
274 * processes trying to set BUSY state will have to wait
275 *
276 * This system is useful if all processes want to begin a group operation
277 * (like a barrier) entering the BUSY state, and we want to prevent a new
278 * process to join the group until the shared memory goes back to READY.
279 */
280
281 enum { SHMEM_TRYAQUIRE_USECS = 100 };
282
283 /* Busy wait until the shmem can be locked with the state MAINTENANCE */
284 void shmem_lock_maintenance( shmem_handler_t* handler ) {
285 volatile shmem_state_t *state = &handler->shsync->state;
286 while(1) {
287 pthread_mutex_lock(&handler->shsync->shmem_mutex);
288 switch(*state) {
289 case SHMEM_READY:
290 /* Lock successfully acquired: READY -> MAINTENANCE */
291 *state = SHMEM_MAINTENANCE;
292 return;
293 case SHMEM_BUSY:
294 /* Shmem cannot be put in maintenance while BUSY */
295 pthread_mutex_unlock(&handler->shsync->shmem_mutex);
296 usleep(SHMEM_TRYAQUIRE_USECS);
297 break;
298 case SHMEM_MAINTENANCE:
299 /* This should not happen */
300 pthread_mutex_unlock(&handler->shsync->shmem_mutex);
301 fatal("Shared memory lock inconsistency. Please report to " PACKAGE_BUGREPORT);
302 break;
303 }
304 }
305 }
306
307 /* Unlock a previoulsy shmem in the MAINTENANCE state */
308 void shmem_unlock_maintenance( shmem_handler_t* handler ) {
309 /* Unlock MAINTENANCE -> READY */
310 int error = handler->shsync->state != SHMEM_MAINTENANCE;
311 handler->shsync->state = SHMEM_READY;
312 pthread_mutex_unlock(&handler->shsync->shmem_mutex);
313
314 /* This should not happen */
315 fatal_cond(error, "Shared memory lock inconsistency. Please report to " PACKAGE_BUGREPORT);
316 }
317
318 /* Busy wait until the shmem can be set READY -> BUSY */
319 void shmem_acquire_busy( shmem_handler_t* handler ) {
320 volatile shmem_state_t *state = &handler->shsync->state;
321 while ( unlikely(
322 *state != SHMEM_BUSY
323 && !__sync_bool_compare_and_swap(state, SHMEM_READY, SHMEM_BUSY)
324 )) {
325 usleep(SHMEM_TRYAQUIRE_USECS);
326 }
327 }
328
329 /* Set shmm state BUSY -> READY */
330 void shmem_release_busy( shmem_handler_t* handler ) {
331 fatal_cond(
332 !__sync_bool_compare_and_swap(&handler->shsync->state, SHMEM_BUSY, SHMEM_READY),
333 "Shared memory lock inconsistency. Please report to " PACKAGE_BUGREPORT);
334 }
335
336 1 char *get_shm_filename( shmem_handler_t* handler ) {
337 1 return handler->shm_filename;
338 }
339
340 51 bool shmem_exists(const char *shmem_module, const char *shmem_key) {
341 char shm_filename[SHM_NAME_LENGTH*2];
342
3/4
✓ Branch 0 taken 51 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 50 times.
✓ Branch 3 taken 1 times.
51 if (shmem_key && shmem_key[0] != '\0') {
343 50 snprintf(shm_filename, SHM_NAME_LENGTH*2, "/dev/shm/DLB_%s_%s", shmem_module, shmem_key);
344 } else {
345 1 snprintf(shm_filename, SHM_NAME_LENGTH*2, "/dev/shm/DLB_%s_%d", shmem_module, getuid());
346 }
347 51 return access(shm_filename, F_OK) != -1;
348 }
349
350 void shmem_destroy(const char *shmem_module, const char *shmem_key) {
351 char shm_filename[SHM_NAME_LENGTH*2];
352 if (shmem_key && shmem_key[0] != '\0') {
353 snprintf(shm_filename, SHM_NAME_LENGTH*2, "/dev/shm/DLB_%s_%s", shmem_module, shmem_key);
354 } else {
355 snprintf(shm_filename, SHM_NAME_LENGTH*2, "/dev/shm/DLB_%s_%d", shmem_module, getuid());
356 }
357 shm_unlink(shm_filename);
358 }
359
360 1 int shmem_shsync__version(void) {
361 1 return SHMEM_SYNC_VERSION;
362 }
363
364 289 size_t shmem_shsync__size(void) {
365 289 size_t shsync_size = sizeof(shmem_sync_t) + sizeof(pid_t) * mu_get_system_size();
366 289 size_t alignment = DLB_CACHE_LINE; // in bytes
367 289 shsync_size = (shsync_size + (alignment - 1)) & ~(alignment - 1); // round up
368 289 return shsync_size;
369 }
370