GCC Code Coverage Report


Directory: src/
File: src/LB_core/DLB_kernel.c
Date: 2024-11-22 17:07:10
Exec Total Coverage
Lines: 302 377 80.1%
Functions: 33 34 97.1%
Branches: 182 282 64.5%

Line Branch Exec Source
1 /*********************************************************************************/
2 /* Copyright 2009-2022 Barcelona Supercomputing Center */
3 /* */
4 /* This file is part of the DLB library. */
5 /* */
6 /* DLB is free software: you can redistribute it and/or modify */
7 /* it under the terms of the GNU Lesser General Public License as published by */
8 /* the Free Software Foundation, either version 3 of the License, or */
9 /* (at your option) any later version. */
10 /* */
11 /* DLB is distributed in the hope that it will be useful, */
12 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
13 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
14 /* GNU Lesser General Public License for more details. */
15 /* */
16 /* You should have received a copy of the GNU Lesser General Public License */
17 /* along with DLB. If not, see <https://www.gnu.org/licenses/>. */
18 /*********************************************************************************/
19
20 #ifdef HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23
24 #include "LB_core/DLB_kernel.h"
25
26 #include "LB_core/DLB_talp.h"
27 #include "LB_core/node_barrier.h"
28 #include "LB_core/spd.h"
29 #include "LB_numThreads/numThreads.h"
30 #include "LB_numThreads/omptool.h"
31 #include "LB_comm/shmem_async.h"
32 #include "LB_comm/shmem_barrier.h"
33 #include "LB_comm/shmem_cpuinfo.h"
34 #include "LB_comm/shmem_procinfo.h"
35 #include "LB_comm/shmem_talp.h"
36 #include "apis/dlb_errors.h"
37 #include "apis/dlb_talp.h"
38 #include "support/debug.h"
39 #include "support/mytime.h"
40 #include "support/tracing.h"
41 #include "support/options.h"
42 #include "support/mask_utils.h"
43 #ifdef MPI_LIB
44 #include "LB_MPI/process_MPI.h"
45 #endif
46
47 #include <limits.h>
48 #include <sched.h>
49 #include <string.h>
50
51
52 /* By default all threads are participants.
53 * A thread may change this value to avoid participating in LeWI and TALP metrics. */
54 __thread bool thread_is_observer = false;
55
56
57 /* Status */
58
59 81 int Initialize(subprocess_descriptor_t *spd, pid_t id, int ncpus,
60 const cpu_set_t *mask, const char *lb_args) {
61
62 81 int error = DLB_SUCCESS;
63
64 // Set it to false in case one thread is an observer but then Initializes DLB
65 81 thread_is_observer = false;
66
67 // Initialize common modules (spd->id and instrumentation module ASAP)
68 81 *spd = (const subprocess_descriptor_t) {
69 .id = id,
70 81 .dlb_initialized = spd->dlb_initialized,
71 81 .dlb_preinitialized = spd->dlb_preinitialized,
72 };
73 81 options_init(&spd->options, lb_args);
74 81 debug_init(&spd->options);
75 init_tracing(&spd->options);
76 instrument_event(RUNTIME_EVENT, EVENT_INIT, EVENT_BEGIN);
77 81 mu_init();
78 81 timer_init();
79
80 // Infer LeWI mode
81 81 spd->lb_policy =
82
4/4
✓ Branch 0 taken 36 times.
✓ Branch 1 taken 45 times.
✓ Branch 2 taken 12 times.
✓ Branch 3 taken 31 times.
126 !spd->options.lewi ? POLICY_NONE :
83
1/2
✓ Branch 0 taken 45 times.
✗ Branch 1 not taken.
45 spd->options.lewi_affinity == LEWI_AFFINITY_NONE ? POLICY_LEWI :
84
1/2
✓ Branch 0 taken 45 times.
✗ Branch 1 not taken.
45 spd->options.lewi_affinity != LEWI_AFFINITY_AUTO ? POLICY_LEWI_MASK :
85
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 45 times.
45 spd->options.ompt ? POLICY_LEWI_MASK :
86
2/2
✓ Branch 0 taken 43 times.
✓ Branch 1 taken 2 times.
45 spd->options.preinit_pid ? POLICY_LEWI_MASK :
87 mask ? POLICY_LEWI_MASK :
88 POLICY_LEWI;
89
90
2/2
✓ Branch 0 taken 31 times.
✓ Branch 1 taken 50 times.
81 if (spd->lb_policy == POLICY_LEWI
91
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 27 times.
31 && spd->options.mode == MODE_ASYNC) {
92 4 spd->lb_policy = POLICY_LEWI_ASYNC;
93 }
94
95 // Check if real process mask is needed and possible incompatibilities
96 // (Basically, always except if classic LeWI)
97 81 bool mask_is_needed = (
98 81 spd->lb_policy == POLICY_LEWI_MASK
99
2/2
✓ Branch 0 taken 50 times.
✓ Branch 1 taken 17 times.
67 || spd->options.drom
100
4/4
✓ Branch 0 taken 67 times.
✓ Branch 1 taken 14 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 49 times.
148 || spd->options.preinit_pid);
101
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 49 times.
81 if (mask_is_needed &&
102
2/4
✓ Branch 0 taken 32 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 32 times.
32 (spd->lb_policy == POLICY_LEWI || spd->lb_policy == POLICY_LEWI_ASYNC)) {
103 warning("Classic LeWI support with no cpuset binding is not compatible"
104 " with newer DLB modules. DLB_Init cannot continue.");
105 return DLB_ERR_NOCOMP;
106 }
107
108 // Initialize the rest of the subprocess descriptor
109 81 pm_init(&spd->pm);
110 81 set_lb_funcs(&spd->lb_funcs, spd->lb_policy);
111
2/2
✓ Branch 0 taken 42 times.
✓ Branch 1 taken 39 times.
81 if (mask) {
112 // Preferred case, mask is provided by the user
113 42 memcpy(&spd->process_mask, mask, sizeof(cpu_set_t));
114 } else {
115 // Best effort querying the system
116 // (it may be late if DLB_Init is called after other RT sets this thread's affinity)
117 39 sched_getaffinity(0, sizeof(cpu_set_t), &spd->process_mask);
118 }
119
120 // ncpus is only used for classic LeWI
121
2/2
✓ Branch 0 taken 54 times.
✓ Branch 1 taken 27 times.
81 if (spd->lb_policy == POLICY_LEWI
122
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 50 times.
54 || spd->lb_policy == POLICY_LEWI_ASYNC) {
123
2/2
✓ Branch 0 taken 27 times.
✓ Branch 1 taken 4 times.
31 spd->lewi_ncpus = ncpus > 0 ? ncpus : pm_get_num_threads();
124 }
125
126 // Initialize shared memories
127
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 49 times.
81 if (mask_is_needed) {
128 // Initialize procinfo
129
1/2
✓ Branch 0 taken 32 times.
✗ Branch 1 not taken.
32 if (spd->options.lewi_color == 0) {
130 cpu_set_t new_process_mask;
131 32 error = shmem_procinfo__init(spd->id, spd->options.preinit_pid,
132 32 &spd->process_mask, &new_process_mask, spd->options.shm_key);
133
134 // If the process has been pre-initialized (error=DLB_NOTED),
135 // the mask provided by shmem_procinfo__init must overwrite the process mask
136
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 24 times.
32 if (error == DLB_NOTED) {
137 8 set_process_mask(&spd->pm, &new_process_mask);
138 8 memcpy(&spd->process_mask, &new_process_mask, sizeof(cpu_set_t));
139 8 error = DLB_SUCCESS;
140 }
141 } else {
142 // If using --lewi-color, we also need to initialize procinfo with cpu sharing
143 error = shmem_procinfo__init_with_cpu_sharing(spd->id, spd->options.preinit_pid,
144 &spd->process_mask, spd->options.shm_key);
145 }
146
147
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 30 times.
32 if (error != DLB_SUCCESS) return error;
148
149 // Initialize cpuinfo
150 30 error = shmem_cpuinfo__init(spd->id, spd->options.preinit_pid,
151 30 &spd->process_mask, spd->options.shm_key, spd->options.lewi_color);
152
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 30 times.
30 if (error != DLB_SUCCESS) return error;
153
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 44 times.
49 } else if (spd->options.talp) {
154 // If mask is not needed but TALP is enabled, we still need to
155 // initialize shmem_procinfo but allowing CPU sharing
156 5 error = shmem_procinfo__init_with_cpu_sharing(spd->id, spd->options.preinit_pid,
157 5 &spd->process_mask, spd->options.shm_key);
158 }
159
2/2
✓ Branch 0 taken 76 times.
✓ Branch 1 taken 3 times.
79 if (spd->options.barrier) {
160 76 shmem_barrier__init(spd->options.shm_key);
161 76 node_barrier_init(spd);
162 }
163
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 75 times.
79 if (spd->options.mode == MODE_ASYNC) {
164 4 error = shmem_async_init(spd->id, &spd->pm, &spd->process_mask, spd->options.shm_key);
165
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 if (error != DLB_SUCCESS) return error;
166 }
167
168 // Initialise LeWI
169 79 error = spd->lb_funcs.init(spd);
170
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 79 times.
79 if (error != DLB_SUCCESS) return error;
171
172 // Initialize TALP
173
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 73 times.
79 if (spd->options.talp) {
174 6 talp_init(spd);
175 } else {
176 73 spd->talp_info = NULL;
177 }
178
179 // Print initialization summary
180 79 info0("%s %s", PACKAGE, VERSION);
181
2/2
✓ Branch 0 taken 45 times.
✓ Branch 1 taken 34 times.
79 if (spd->lb_policy != POLICY_NONE) {
182 45 info0("Balancing policy: %s", policy_tostr(spd->lb_policy));
183 45 options_print_lewi_flags(&spd->options);
184 }
185 instrument_print_flags();
186
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 79 times.
79 verbose(VB_API, "Enabled verbose mode for DLB API");
187
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 79 times.
79 verbose(VB_MPI_API, "Enabled verbose mode for MPI API");
188
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 79 times.
79 verbose(VB_MPI_INT, "Enabled verbose mode for MPI Interception");
189
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 79 times.
79 verbose(VB_SHMEM, "Enabled verbose mode for Shared Memory");
190
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 79 times.
79 verbose(VB_DROM, "Enabled verbose mode for DROM");
191
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 79 times.
79 verbose(VB_STATS, "Enabled verbose mode for STATS");
192
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 79 times.
79 verbose(VB_MICROLB, "Enabled verbose mode for microLB policies");
193
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 79 times.
79 verbose(VB_ASYNC, "Enabled verbose mode for Asynchronous thread");
194
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 79 times.
79 verbose(VB_OMPT, "Enabled verbose mode for OMPT experimental features");
195
196 // Print number of cpus or mask
197
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 49 times.
79 if (mask_is_needed) {
198 30 info("Process CPU affinity mask: %s", mu_to_str(&spd->process_mask));
199 }
200
201 79 spd->lewi_enabled = true;
202 instrument_event(RUNTIME_EVENT, EVENT_INIT, EVENT_END);
203 instrument_event(DLB_MODE_EVENT, EVENT_ENABLED, EVENT_BEGIN);
204
205 79 return error;
206 }
207
208 79 int Finish(subprocess_descriptor_t *spd) {
209 79 int error = DLB_SUCCESS;
210 instrument_event(RUNTIME_EVENT, EVENT_FINALIZE, EVENT_BEGIN);
211
212 #if MPI_LIB
213 /* If DLB_Finalize is called preemptively, we need to finalize also the MPI
214 * module */
215 process_MPI__finalize();
216 #endif
217
218 79 spd->lewi_enabled = false;
219
220 79 pm_finalize(&spd->pm);
221
222
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 73 times.
79 if (spd->options.talp) {
223 6 talp_finalize(spd);
224 }
225
1/2
✓ Branch 0 taken 79 times.
✗ Branch 1 not taken.
79 if (spd->lb_funcs.finalize) {
226 79 spd->lb_funcs.finalize(spd);
227 79 spd->lb_funcs.finalize = NULL;
228 }
229
2/2
✓ Branch 0 taken 76 times.
✓ Branch 1 taken 3 times.
79 if (spd->options.barrier) {
230 76 node_barrier_finalize(spd);
231 76 shmem_barrier__finalize(spd->options.shm_key);
232 }
233
2/2
✓ Branch 0 taken 65 times.
✓ Branch 1 taken 14 times.
79 if (spd->lb_policy == POLICY_LEWI_MASK
234
2/2
✓ Branch 0 taken 50 times.
✓ Branch 1 taken 15 times.
65 || spd->options.drom
235
2/2
✓ Branch 0 taken 45 times.
✓ Branch 1 taken 5 times.
50 || spd->options.talp
236
2/2
✓ Branch 0 taken 41 times.
✓ Branch 1 taken 4 times.
45 || spd->options.ompt
237
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 40 times.
41 || spd->options.preinit_pid) {
238 39 shmem_cpuinfo__finalize(spd->id, spd->options.shm_key, spd->options.lewi_color);
239 39 shmem_procinfo__finalize(spd->id, spd->options.debug_opts & DBG_RETURNSTOLEN,
240 39 spd->options.shm_key);
241 }
242
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 75 times.
79 if (spd->options.mode == MODE_ASYNC) {
243 4 shmem_async_finalize(spd->id);
244 }
245 79 timer_finalize();
246 instrument_event(RUNTIME_EVENT, EVENT_FINALIZE, EVENT_END);
247 instrument_finalize();
248 79 options_finalize(&spd->options);
249 79 return error;
250 }
251
252 5 int PreInitialize(subprocess_descriptor_t *spd, const cpu_set_t *mask,
253 const char *lb_args) {
254 // Initialize options
255 5 options_init(&spd->options, lb_args);
256
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
5 if (spd->options.preinit_pid == 0) return DLB_ERR_INIT;
257
258 5 debug_init(&spd->options);
259
260 // Initialize subprocess descriptor
261 5 spd->lb_policy = POLICY_NONE;
262 5 pm_init(&spd->pm);
263 5 set_lb_funcs(&spd->lb_funcs, spd->lb_policy);
264 5 spd->id = spd->options.preinit_pid;
265 5 memcpy(&spd->process_mask, mask, sizeof(cpu_set_t));
266
267 // Initialize modules
268 5 int error = DLB_SUCCESS;
269
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
5 error = error ? error : shmem_cpuinfo_ext__init(spd->options.shm_key, spd->options.lewi_color);
270
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
5 error = error ? error : shmem_procinfo_ext__init(spd->options.shm_key);
271
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
5 error = error ? error : shmem_procinfo_ext__preinit(spd->id, mask, 0);
272
1/2
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
5 error = error ? error : shmem_cpuinfo_ext__preinit(spd->id, mask, 0);
273 // Close shmems even if there was an error
274 5 int cpuinfo_finalize_err = shmem_cpuinfo_ext__finalize();
275
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
5 error = error ? error : cpuinfo_finalize_err;
276 5 int procinfo_finalize_err = shmem_procinfo_ext__finalize();
277
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
5 error = error ? error : procinfo_finalize_err;
278 5 return error;
279 }
280
281 12 int set_lewi_enabled(subprocess_descriptor_t *spd, bool enabled) {
282 12 int error = DLB_SUCCESS;
283
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 2 times.
12 if (__sync_bool_compare_and_swap(&spd->lewi_enabled, !enabled, enabled)) {
284
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 5 times.
10 if (enabled) {
285 5 spd->lb_funcs.enable(spd);
286 instrument_event(DLB_MODE_EVENT, EVENT_ENABLED, EVENT_BEGIN);
287 } else {
288 5 spd->lb_funcs.disable(spd);
289 instrument_event(DLB_MODE_EVENT, EVENT_DISABLED, EVENT_BEGIN);
290 }
291 } else {
292 2 error = DLB_NOUPDT;
293 }
294 12 return error;
295 }
296
297 4 int set_max_parallelism(subprocess_descriptor_t *spd, int max) {
298 int error;
299
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 if (!spd->options.lewi) {
300 3 error = DLB_ERR_NOLEWI;
301
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 } else if (!spd->lewi_enabled) {
302 error = DLB_ERR_DISBLD;
303 } else {
304 instrument_event(RUNTIME_EVENT, EVENT_MAX_PARALLELISM, EVENT_BEGIN);
305 instrument_event(MAX_PAR_EVENT, 0, EVENT_END);
306 instrument_event(MAX_PAR_EVENT, max, EVENT_BEGIN);
307 1 error = spd->lb_funcs.set_max_parallelism(spd, max);
308 instrument_event(RUNTIME_EVENT, EVENT_MAX_PARALLELISM, EVENT_END);
309 }
310 4 return error;
311 }
312
313 2 int unset_max_parallelism(subprocess_descriptor_t *spd) {
314 int error;
315
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 if (!spd->options.lewi) {
316 1 error = DLB_ERR_NOLEWI;
317
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 } else if (!spd->lewi_enabled) {
318 error = DLB_ERR_DISBLD;
319 } else {
320 instrument_event(RUNTIME_EVENT, EVENT_MAX_PARALLELISM, EVENT_BEGIN);
321 instrument_event(MAX_PAR_EVENT, 0, EVENT_END);
322 1 error = spd->lb_funcs.unset_max_parallelism(spd);
323 instrument_event(RUNTIME_EVENT, EVENT_MAX_PARALLELISM, EVENT_END);
324 }
325 2 return error;
326 }
327
328
329 /* Sync-call specific (MPI, DLB_Barrier, etc.) */
330
331 16 void into_sync_call(sync_call_flags_t flags) {
332 /* Observer threads do not trigger LeWI nor TALP on sync calls */
333
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 15 times.
16 if (unlikely(thread_is_observer)) return;
334
335 15 const subprocess_descriptor_t *spd = thread_spd;
336
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 15 times.
15 if (unlikely(spd == NULL)) return;
337
338
3/6
✓ Branch 0 taken 15 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 15 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 15 times.
✗ Branch 5 not taken.
15 if (spd->options.lewi && spd->lewi_enabled && flags.do_lewi) {
339 15 spd->lb_funcs.into_blocking_call(spd);
340 15 omptool__into_blocking_call();
341 }
342
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 15 times.
15 if(spd->options.talp) {
343 talp_into_sync_call(spd, flags.is_blocking && flags.is_collective);
344 }
345 }
346
347 16 void out_of_sync_call(sync_call_flags_t flags) {
348 /* Observer threads do not trigger LeWI nor TALP on MPI calls */
349
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 15 times.
16 if (unlikely(thread_is_observer)) return;
350
351 15 const subprocess_descriptor_t *spd = thread_spd;
352
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 15 times.
15 if (unlikely(spd == NULL)) return;
353
354
3/6
✓ Branch 0 taken 15 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 15 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 15 times.
✗ Branch 5 not taken.
15 if (spd->options.lewi && spd->lewi_enabled && flags.do_lewi) {
355 15 spd->lb_funcs.out_of_blocking_call(spd);
356 15 omptool__outof_blocking_call();
357 }
358
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 15 times.
15 if(spd->options.talp) {
359 talp_out_of_sync_call(spd, flags.is_blocking && flags.is_collective);
360 }
361 }
362
363
364 /* Lend */
365
366 7 int lend(const subprocess_descriptor_t *spd) {
367 int error;
368
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 1 times.
7 if (!spd->options.lewi) {
369 6 error = DLB_ERR_NOLEWI;
370
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 } else if (!spd->lewi_enabled) {
371 1 error = DLB_ERR_DISBLD;
372 } else {
373 instrument_event(RUNTIME_EVENT, EVENT_LEND, EVENT_BEGIN);
374 instrument_event(GIVE_CPUS_EVENT, CPU_SETSIZE, EVENT_BEGIN);
375 omptool__lend_from_api();
376 error = spd->lb_funcs.lend(spd);
377 instrument_event(GIVE_CPUS_EVENT, 0, EVENT_END);
378 instrument_event(RUNTIME_EVENT, EVENT_LEND, EVENT_END);
379 }
380 7 return error;
381 }
382
383 23 int lend_cpu(const subprocess_descriptor_t *spd, int cpuid) {
384 int error;
385
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 20 times.
23 if (!spd->options.lewi) {
386 3 error = DLB_ERR_NOLEWI;
387
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 20 times.
20 } else if (!spd->lewi_enabled) {
388 error = DLB_ERR_DISBLD;
389 } else {
390 instrument_event(RUNTIME_EVENT, EVENT_LEND, EVENT_BEGIN);
391 instrument_event(GIVE_CPUS_EVENT, 1, EVENT_BEGIN);
392 20 error = spd->lb_funcs.lend_cpu(spd, cpuid);
393 instrument_event(GIVE_CPUS_EVENT, 0, EVENT_END);
394 instrument_event(RUNTIME_EVENT, EVENT_LEND, EVENT_END);
395 }
396 23 return error;
397 }
398
399 6 int lend_cpus(const subprocess_descriptor_t *spd, int ncpus) {
400 int error;
401
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 if (!spd->options.lewi) {
402 3 error = DLB_ERR_NOLEWI;
403
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 } else if (!spd->lewi_enabled) {
404 error = DLB_ERR_DISBLD;
405 } else {
406 instrument_event(RUNTIME_EVENT, EVENT_LEND, EVENT_BEGIN);
407 instrument_event(GIVE_CPUS_EVENT, ncpus, EVENT_BEGIN);
408 3 error = spd->lb_funcs.lend_cpus(spd, ncpus);
409 instrument_event(GIVE_CPUS_EVENT, 0, EVENT_END);
410 instrument_event(RUNTIME_EVENT, EVENT_LEND, EVENT_END);
411 }
412 6 return error;
413 }
414
415 11 int lend_cpu_mask(const subprocess_descriptor_t *spd, const cpu_set_t *mask) {
416 int error;
417
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 8 times.
11 if (!spd->options.lewi) {
418 3 error = DLB_ERR_NOLEWI;
419
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
8 } else if (!spd->lewi_enabled) {
420 error = DLB_ERR_DISBLD;
421 } else {
422 instrument_event(RUNTIME_EVENT, EVENT_LEND, EVENT_BEGIN);
423 instrument_event(GIVE_CPUS_EVENT, CPU_COUNT(mask), EVENT_BEGIN);
424 8 error = spd->lb_funcs.lend_cpu_mask(spd, mask);
425 instrument_event(GIVE_CPUS_EVENT, 0, EVENT_END);
426 instrument_event(RUNTIME_EVENT, EVENT_LEND, EVENT_END);
427 }
428 11 return error;
429 }
430
431
432 /* Reclaim */
433
434 6 int reclaim(const subprocess_descriptor_t *spd) {
435 int error;
436
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 if (!spd->options.lewi) {
437 3 error = DLB_ERR_NOLEWI;
438
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 } else if (!spd->lewi_enabled) {
439 error = DLB_ERR_DISBLD;
440 } else {
441 instrument_event(RUNTIME_EVENT, EVENT_RECLAIM, EVENT_BEGIN);
442 instrument_event(WANT_CPUS_EVENT, CPU_SETSIZE, EVENT_BEGIN);
443 3 error = spd->lb_funcs.reclaim(spd);
444 instrument_event(WANT_CPUS_EVENT, 0, EVENT_END);
445 instrument_event(RUNTIME_EVENT, EVENT_RECLAIM, EVENT_END);
446 }
447 6 return error;
448 }
449
450 4 int reclaim_cpu(const subprocess_descriptor_t *spd, int cpuid) {
451 int error;
452
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 if (!spd->options.lewi) {
453 3 error = DLB_ERR_NOLEWI;
454
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 } else if (!spd->lewi_enabled) {
455 error = DLB_ERR_DISBLD;
456 } else {
457 instrument_event(RUNTIME_EVENT, EVENT_RECLAIM, EVENT_BEGIN);
458 instrument_event(WANT_CPUS_EVENT, 1, EVENT_BEGIN);
459 1 error = spd->lb_funcs.reclaim_cpu(spd, cpuid);
460 instrument_event(WANT_CPUS_EVENT, 0, EVENT_END);
461 instrument_event(RUNTIME_EVENT, EVENT_RECLAIM, EVENT_END);
462 }
463 4 return error;
464 }
465
466 3 int reclaim_cpus(const subprocess_descriptor_t *spd, int ncpus) {
467 int error;
468
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (!spd->options.lewi) {
469 3 error = DLB_ERR_NOLEWI;
470 } else if (!spd->lewi_enabled) {
471 error = DLB_ERR_DISBLD;
472 } else {
473 instrument_event(RUNTIME_EVENT, EVENT_RECLAIM, EVENT_BEGIN);
474 instrument_event(WANT_CPUS_EVENT, ncpus, EVENT_BEGIN);
475 error = spd->lb_funcs.reclaim_cpus(spd, ncpus);
476 instrument_event(WANT_CPUS_EVENT, 0, EVENT_END);
477 instrument_event(RUNTIME_EVENT, EVENT_RECLAIM, EVENT_END);
478 }
479 3 return error;
480 }
481
482 6 int reclaim_cpu_mask(const subprocess_descriptor_t *spd, const cpu_set_t *mask) {
483 int error;
484
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 if (!spd->options.lewi) {
485 3 error = DLB_ERR_NOLEWI;
486
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 } else if (!spd->lewi_enabled) {
487 error = DLB_ERR_DISBLD;
488 } else {
489 instrument_event(RUNTIME_EVENT, EVENT_RECLAIM, EVENT_BEGIN);
490 instrument_event(WANT_CPUS_EVENT, CPU_COUNT(mask), EVENT_BEGIN);
491 3 error = spd->lb_funcs.reclaim_cpu_mask(spd, mask);
492 instrument_event(WANT_CPUS_EVENT, 0, EVENT_END);
493 instrument_event(RUNTIME_EVENT, EVENT_RECLAIM, EVENT_END);
494 }
495 6 return error;
496 }
497
498
499 /* Acquire */
500
501 9 int acquire_cpu(const subprocess_descriptor_t *spd, int cpuid) {
502 int error;
503
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 6 times.
9 if (!spd->options.lewi) {
504 3 error = DLB_ERR_NOLEWI;
505
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 } else if (!spd->lewi_enabled) {
506 error = DLB_ERR_DISBLD;
507 } else {
508 instrument_event(RUNTIME_EVENT, EVENT_ACQUIRE, EVENT_BEGIN);
509 instrument_event(WANT_CPUS_EVENT, 1, EVENT_BEGIN);
510 6 error = spd->lb_funcs.acquire_cpu(spd, cpuid);
511 instrument_event(WANT_CPUS_EVENT, 0, EVENT_END);
512 instrument_event(RUNTIME_EVENT, EVENT_ACQUIRE, EVENT_END);
513 }
514 9 return error;
515 }
516
517 46 int acquire_cpus(const subprocess_descriptor_t *spd, int ncpus) {
518 int error;
519
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 43 times.
46 if (!spd->options.lewi) {
520 3 error = DLB_ERR_NOLEWI;
521
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 43 times.
43 } else if (!spd->lewi_enabled) {
522 error = DLB_ERR_DISBLD;
523 } else {
524 instrument_event(RUNTIME_EVENT, EVENT_ACQUIRE, EVENT_BEGIN);
525 instrument_event(WANT_CPUS_EVENT, ncpus, EVENT_BEGIN);
526 43 error = spd->lb_funcs.acquire_cpus(spd, ncpus);
527 instrument_event(WANT_CPUS_EVENT, 0, EVENT_END);
528 instrument_event(RUNTIME_EVENT, EVENT_ACQUIRE, EVENT_END);
529 }
530 46 return error;
531 }
532
533 7 int acquire_cpu_mask(const subprocess_descriptor_t *spd, const cpu_set_t *mask) {
534 int error;
535
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 4 times.
7 if (!spd->options.lewi) {
536 3 error = DLB_ERR_NOLEWI;
537
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 } else if (!spd->lewi_enabled) {
538 error = DLB_ERR_DISBLD;
539 } else {
540 instrument_event(RUNTIME_EVENT, EVENT_ACQUIRE, EVENT_BEGIN);
541 instrument_event(WANT_CPUS_EVENT, CPU_COUNT(mask), EVENT_BEGIN);
542 4 error = spd->lb_funcs.acquire_cpu_mask(spd, mask);
543 instrument_event(WANT_CPUS_EVENT, 0, EVENT_END);
544 instrument_event(RUNTIME_EVENT, EVENT_ACQUIRE, EVENT_END);
545 }
546 7 return error;
547 }
548
549 12 int acquire_cpus_in_mask(const subprocess_descriptor_t *spd, int ncpus, const cpu_set_t *mask) {
550 int error;
551
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 9 times.
12 if (!spd->options.lewi) {
552 3 error = DLB_ERR_NOLEWI;
553
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 9 times.
9 } else if (!spd->lewi_enabled) {
554 error = DLB_ERR_DISBLD;
555 } else {
556 instrument_event(RUNTIME_EVENT, EVENT_ACQUIRE, EVENT_BEGIN);
557 instrument_event(WANT_CPUS_EVENT, ncpus, EVENT_BEGIN);
558 9 error = spd->lb_funcs.acquire_cpus_in_mask(spd, ncpus, mask);
559 instrument_event(WANT_CPUS_EVENT, 0, EVENT_END);
560 instrument_event(RUNTIME_EVENT, EVENT_ACQUIRE, EVENT_END);
561 }
562 12 return error;
563 }
564
565
566 /* Borrow */
567
568 5 int borrow(const subprocess_descriptor_t *spd) {
569 int error;
570
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 2 times.
5 if (!spd->options.lewi) {
571 3 error = DLB_ERR_NOLEWI;
572
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 } else if (!spd->lewi_enabled) {
573 error = DLB_ERR_DISBLD;
574 } else {
575 instrument_event(RUNTIME_EVENT, EVENT_BORROW, EVENT_BEGIN);
576 instrument_event(WANT_CPUS_EVENT, CPU_SETSIZE, EVENT_BEGIN);
577 2 error = spd->lb_funcs.borrow(spd);
578 instrument_event(WANT_CPUS_EVENT, 0, EVENT_END);
579 instrument_event(RUNTIME_EVENT, EVENT_BORROW, EVENT_END);
580 }
581 5 return error;
582 }
583
584 3 int borrow_cpu(const subprocess_descriptor_t *spd, int cpuid) {
585 int error;
586
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (!spd->options.lewi) {
587 3 error = DLB_ERR_NOLEWI;
588 } else if (!spd->lewi_enabled) {
589 error = DLB_ERR_DISBLD;
590 } else {
591 instrument_event(RUNTIME_EVENT, EVENT_BORROW, EVENT_BEGIN);
592 instrument_event(WANT_CPUS_EVENT, 1, EVENT_BEGIN);
593 error = spd->lb_funcs.borrow_cpu(spd, cpuid);
594 instrument_event(WANT_CPUS_EVENT, 0, EVENT_END);
595 instrument_event(RUNTIME_EVENT, EVENT_BORROW, EVENT_END);
596 }
597 3 return error;
598 }
599
600 3 int borrow_cpus(const subprocess_descriptor_t *spd, int ncpus) {
601 int error;
602
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (!spd->options.lewi) {
603 3 error = DLB_ERR_NOLEWI;
604 } else if (!spd->lewi_enabled) {
605 error = DLB_ERR_DISBLD;
606 } else {
607 instrument_event(RUNTIME_EVENT, EVENT_BORROW, EVENT_BEGIN);
608 instrument_event(WANT_CPUS_EVENT, ncpus, EVENT_BEGIN);
609 error = spd->lb_funcs.borrow_cpus(spd, ncpus);
610 instrument_event(WANT_CPUS_EVENT, 0, EVENT_END);
611 instrument_event(RUNTIME_EVENT, EVENT_BORROW, EVENT_END);
612 }
613 3 return error;
614 }
615
616 3 int borrow_cpu_mask(const subprocess_descriptor_t *spd, const cpu_set_t *mask) {
617 int error;
618
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (!spd->options.lewi) {
619 3 error = DLB_ERR_NOLEWI;
620 } else if (!spd->lewi_enabled) {
621 error = DLB_ERR_DISBLD;
622 } else {
623 instrument_event(RUNTIME_EVENT, EVENT_BORROW, EVENT_BEGIN);
624 instrument_event(WANT_CPUS_EVENT, CPU_COUNT(mask), EVENT_BEGIN);
625 error = spd->lb_funcs.borrow_cpu_mask(spd, mask);
626 instrument_event(WANT_CPUS_EVENT, 0, EVENT_END);
627 instrument_event(RUNTIME_EVENT, EVENT_BORROW, EVENT_END);
628 }
629 3 return error;
630 }
631
632 5 int borrow_cpus_in_mask(const subprocess_descriptor_t *spd, int ncpus, const cpu_set_t *mask) {
633 int error;
634
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 2 times.
5 if (!spd->options.lewi) {
635 3 error = DLB_ERR_NOLEWI;
636
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 } else if (!spd->lewi_enabled) {
637 error = DLB_ERR_DISBLD;
638 } else {
639 instrument_event(RUNTIME_EVENT, EVENT_BORROW, EVENT_BEGIN);
640 instrument_event(WANT_CPUS_EVENT, ncpus, EVENT_BEGIN);
641 2 error = spd->lb_funcs.borrow_cpus_in_mask(spd, ncpus, mask);
642 instrument_event(WANT_CPUS_EVENT, 0, EVENT_END);
643 instrument_event(RUNTIME_EVENT, EVENT_BORROW, EVENT_END);
644 }
645 5 return error;
646 }
647
648 /* Return */
649
650 3 int return_all(const subprocess_descriptor_t *spd) {
651 int error;
652
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (!spd->options.lewi) {
653 3 error = DLB_ERR_NOLEWI;
654 } else if (!spd->lewi_enabled) {
655 error = DLB_ERR_DISBLD;
656 } else {
657 instrument_event(RUNTIME_EVENT, EVENT_RETURN, EVENT_BEGIN);
658 error = spd->lb_funcs.return_all(spd);
659 instrument_event(RUNTIME_EVENT, EVENT_RETURN, EVENT_END);
660 }
661 3 return error;
662 }
663
664 12 int return_cpu(const subprocess_descriptor_t *spd, int cpuid) {
665 int error;
666
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 9 times.
12 if (!spd->options.lewi) {
667 3 error = DLB_ERR_NOLEWI;
668
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 9 times.
9 } else if (!spd->lewi_enabled) {
669 error = DLB_ERR_DISBLD;
670 } else {
671 instrument_event(RUNTIME_EVENT, EVENT_RETURN, EVENT_BEGIN);
672 9 error = spd->lb_funcs.return_cpu(spd, cpuid);
673 instrument_event(RUNTIME_EVENT, EVENT_RETURN, EVENT_END);
674 }
675 12 return error;
676 }
677
678 3 int return_cpu_mask(const subprocess_descriptor_t *spd, const cpu_set_t *mask) {
679 int error;
680
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (!spd->options.lewi) {
681 3 error = DLB_ERR_NOLEWI;
682 } else if (!spd->lewi_enabled) {
683 error = DLB_ERR_DISBLD;
684 } else {
685 instrument_event(RUNTIME_EVENT, EVENT_RETURN, EVENT_BEGIN);
686 error = spd->lb_funcs.return_cpu_mask(spd, mask);
687 instrument_event(RUNTIME_EVENT, EVENT_RETURN, EVENT_END);
688 }
689 3 return error;
690 }
691
692
693 /* Drom Responsive */
694
695 5 int poll_drom(const subprocess_descriptor_t *spd, int *new_cpus, cpu_set_t *new_mask) {
696 int error;
697
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 3 times.
5 if (!spd->options.drom) {
698 2 error = DLB_ERR_NOCOMP;
699 } else {
700 instrument_event(RUNTIME_EVENT, EVENT_POLLDROM, EVENT_BEGIN);
701 // Use a local mask if new_mask was not provided
702 cpu_set_t local_mask;
703
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 cpu_set_t *mask = new_mask ? new_mask : &local_mask;
704
705 3 error = shmem_procinfo__polldrom(spd->id, new_cpus, mask);
706
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
3 if (error == DLB_SUCCESS) {
707
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (spd->options.lewi) {
708 /* If LeWI, resolve reclaimed CPUs */
709 spd->lb_funcs.update_ownership(spd, mask);
710 } else {
711 /* Otherwise, udate owner and guest data */
712 1 shmem_cpuinfo__update_ownership(spd->id, mask, NULL);
713 }
714 }
715 instrument_event(RUNTIME_EVENT, EVENT_POLLDROM, EVENT_END);
716 }
717 5 return error;
718 }
719
720 int poll_drom_update(const subprocess_descriptor_t *spd) {
721 cpu_set_t new_mask;
722 int error = poll_drom(spd, NULL, &new_mask);
723 if (error == DLB_SUCCESS) {
724 set_process_mask(&spd->pm, &new_mask);
725 }
726 return error;
727 }
728
729 13 int drom_setprocessmask(int pid, const_dlb_cpu_set_t mask, dlb_drom_flags_t flags) {
730 cpu_set_t free_cpu_mask;
731 13 int error = shmem_procinfo__setprocessmask(pid, mask, flags, &free_cpu_mask);
732
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 7 times.
13 if (error == DLB_SUCCESS
733
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 && thread_spd->dlb_initialized
734
3/4
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
3 && (pid == 0 || pid == thread_spd->id)
735
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 && !(flags & DLB_NO_SYNC)) {
736 /* Mask has been successfully set by own process, do like a poll_drom_update */
737
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (thread_spd->options.lewi) {
738 /* If LeWI, resolve reclaimed CPUs */
739 thread_spd->lb_funcs.update_ownership(thread_spd, mask);
740 } else {
741 /* Otherwise, udate owner and guest data */
742 2 shmem_cpuinfo__update_ownership(thread_spd->id, mask, NULL);
743 }
744 2 set_process_mask(&thread_spd->pm, mask);
745 }
746
3/4
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 7 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 6 times.
13 if (error == DLB_SUCCESS && (flags & DLB_FREE_CPUS_SLURM)) {
747 // Slurm freeing
748 char *mask_str = mu_parse_to_slurm_format(&free_cpu_mask);
749 if (mask_str == NULL) {
750 warning("error parsing mask %s to Slurm format", mu_to_str(&free_cpu_mask));
751 return DLB_ERR_UNKNOWN;
752 }
753 if (!secure_getenv("SLURM_JOBID")) {
754 warning("SLURM_JOBID is mandatory");
755 return DLB_ERR_UNKNOWN;
756 }
757 char hostname[HOST_NAME_MAX];
758 gethostname(hostname, HOST_NAME_MAX);
759 char *args[5];
760 asprintf(&args[0], "scontrol");
761 asprintf(&args[1], "update");
762 asprintf(&args[2], "jobid=%s", secure_getenv("SLURM_JOBID"));
763 asprintf(&args[3], "dealloc=%s:%s", hostname, mask_str);
764 args[4] = NULL;
765
766 int res_pid = fork();
767 if (res_pid < 0) {
768 warning("fork error while invoking scontrol");
769 return DLB_ERR_UNKNOWN;
770 } else if (res_pid == 0) {
771 verbose(VB_DROM, "%s %s %s %s", args[0], args[1], args[2], args[3]);
772 execvp("scontrol", args);
773 }
774
775 for (int i = 0; i < 5; ++i) {
776 free(args[i]);
777 }
778 free(mask_str);
779 }
780
781 13 return error;
782 }
783
784
785 /* Misc */
786
787 24 int check_cpu_availability(const subprocess_descriptor_t *spd, int cpuid) {
788 24 int error = DLB_SUCCESS;
789
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 23 times.
24 if (!spd->options.lewi) {
790 1 error = DLB_ERR_NOLEWI;
791
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 23 times.
23 } else if (!spd->lewi_enabled) {
792 error = DLB_ERR_DISBLD;
793 } else {
794 23 error = spd->lb_funcs.check_cpu_availability(spd, cpuid);
795 }
796 24 return error;
797 }
798
799 6 int print_shmem(subprocess_descriptor_t *spd, int num_columns,
800 dlb_printshmem_flags_t print_flags) {
801
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 if (!spd->dlb_initialized) {
802 options_init(&spd->options, NULL);
803 debug_init(&spd->options);
804 }
805
806 6 shmem_cpuinfo__print_info(spd->options.shm_key, spd->options.lewi_color,
807 num_columns, print_flags);
808 6 shmem_procinfo__print_info(spd->options.shm_key);
809 6 shmem_barrier__print_info(spd->options.shm_key);
810 6 shmem_talp__print_info(spd->options.shm_key, 0);
811
812
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 if (!spd->dlb_initialized) {
813 options_finalize(&spd->options);
814 }
815
816 6 return DLB_SUCCESS;
817 }
818
819 9 int set_observer_role(bool is_observer) {
820 9 thread_is_observer = is_observer;
821 9 return DLB_SUCCESS;
822 }
823