GCC Code Coverage Report


Directory: src/
File: src/LB_numThreads/omptm_free_agents.c
Date: 2024-11-22 17:07:10
Exec Total Coverage
Lines: 273 315 86.7%
Functions: 32 35 91.4%
Branches: 154 239 64.4%

Line Branch Exec Source
1 /*********************************************************************************/
2 /* Copyright 2009-2024 Barcelona Supercomputing Center */
3 /* */
4 /* This file is part of the DLB library. */
5 /* */
6 /* DLB is free software: you can redistribute it and/or modify */
7 /* it under the terms of the GNU Lesser General Public License as published by */
8 /* the Free Software Foundation, either version 3 of the License, or */
9 /* (at your option) any later version. */
10 /* */
11 /* DLB is distributed in the hope that it will be useful, */
12 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
13 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
14 /* GNU Lesser General Public License for more details. */
15 /* */
16 /* You should have received a copy of the GNU Lesser General Public License */
17 /* along with DLB. If not, see <https://www.gnu.org/licenses/>. */
18 /*********************************************************************************/
19
20 #include "LB_numThreads/omptm_free_agents.h"
21
22 #include "apis/dlb.h"
23 #include "support/atomic.h"
24 #include "support/debug.h"
25 #include "support/mask_utils.h"
26 #include "LB_comm/shmem_cpuinfo.h"
27 #include "LB_comm/shmem_procinfo.h"
28 #include "LB_core/spd.h"
29 #include "LB_numThreads/omptool.h"
30
31 #include <sched.h>
32 #include <unistd.h>
33 #include <string.h>
34 #include <pthread.h>
35 #include <inttypes.h>
36
37 /* OpenMP symbols */
38 int __kmp_get_free_agent_id(void) __attribute__((weak));
39 int __kmp_get_num_free_agent_threads(void) __attribute__((weak));
40 void __kmp_set_free_agent_thread_active_status(
41 unsigned int thread_num, bool active) __attribute__((weak));
42
43 /* Enum for ompt_data_t *parallel_data to detect level 1 (non nested) parallel
44 * regions */
45 enum {
46 PARALLEL_UNSET,
47 PARALLEL_LEVEL_1,
48 };
49
50
51 /*** Static variables ***/
52
53 static bool lewi = false;
54 static pid_t pid;
55 static omptool_opts_t omptool_opts;
56 static int system_size;
57
58 /* Masks */
59 static cpu_set_t active_mask;
60 static cpu_set_t process_mask;
61 static cpu_set_t primary_thread_mask;
62 static cpu_set_t worker_threads_mask;
63
64 /* Atomic variables */
65 static atomic_bool DLB_ALIGN_CACHE in_parallel = false;
66 static atomic_uint DLB_ALIGN_CACHE pending_tasks = 0;
67 static atomic_int DLB_ALIGN_CACHE num_enabled_free_agents = 0;
68
69 /* Thread local */
70 __thread int __free_agent_id = -1;
71 __thread int __worker_binding = -1;
72
73 /*********************************************************************************/
74 /* Free agent CPU lists for fast indexing */
75 /*********************************************************************************/
76
77 /* Free agent lists */
78 static int num_free_agents;
79 static int *free_agent_id_by_cpuid = NULL; /* indexed by CPU id */
80 static int *free_agent_cpuid_by_id = NULL; /* indexed by free agent id */
81 static int *free_agent_cpu_list = NULL; /* CPUs where a free agent is bound, owned first */
82
83 /* Lock for all the above lists, they should only be written on thread creation */
84 static pthread_rwlock_t free_agent_list_lock;
85
86 4 static void free_agent_lists_init(void) {
87 4 pthread_rwlock_init(&free_agent_list_lock, NULL);
88 4 pthread_rwlock_wrlock(&free_agent_list_lock);
89 4 free_agent_id_by_cpuid = malloc(sizeof(int)*system_size);
90 4 free_agent_cpuid_by_id = malloc(sizeof(int)*num_free_agents);
91 4 free_agent_cpu_list = malloc(sizeof(int)*num_free_agents);
92 int i;
93
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 4 times.
36 for (i=0; i<system_size; ++i) free_agent_id_by_cpuid[i] = -1;
94
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 4 times.
32 for (i=0; i<num_free_agents; ++i) free_agent_cpuid_by_id[i] = -1;
95
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 4 times.
32 for (i=0; i<num_free_agents; ++i) free_agent_cpu_list[i] = -1;
96 4 pthread_rwlock_unlock(&free_agent_list_lock);
97 4 }
98
99 4 static void free_agent_lists_destroy(void) {
100 4 pthread_rwlock_wrlock(&free_agent_list_lock);
101 4 free(free_agent_id_by_cpuid);
102 4 free_agent_id_by_cpuid = NULL;
103 4 free(free_agent_cpuid_by_id);
104 4 free_agent_cpuid_by_id = NULL;
105 4 free(free_agent_cpu_list);
106 4 free_agent_cpu_list = NULL;
107 4 pthread_rwlock_unlock(&free_agent_list_lock);
108 4 pthread_rwlock_destroy(&free_agent_list_lock);
109 4 }
110
111 7 static void free_agent_lists_register(int thread_id, int cpuid) {
112 7 pthread_rwlock_wrlock(&free_agent_list_lock);
113 /* Insert id's */
114 7 free_agent_id_by_cpuid[cpuid] = thread_id;
115 7 free_agent_cpuid_by_id[thread_id] = cpuid;
116
117 /* Insert CPUid and reorder */
118 int i;
119
1/2
✓ Branch 0 taken 28 times.
✗ Branch 1 not taken.
28 for (i=0; i<num_free_agents; ++i) {
120
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 21 times.
28 if (free_agent_cpu_list[i] == -1) {
121 7 free_agent_cpu_list[i] = cpuid;
122 7 break;
123 }
124 }
125 7 qsort_r(free_agent_cpu_list, i+1, sizeof(int),
126 mu_cmp_cpuids_by_ownership, &process_mask);
127
128 7 pthread_rwlock_unlock(&free_agent_list_lock);
129 7 }
130
131 77 static inline int get_free_agent_id_by_cpuid(int cpuid) {
132 77 int id = -1;
133 77 pthread_rwlock_rdlock(&free_agent_list_lock);
134
1/2
✓ Branch 0 taken 77 times.
✗ Branch 1 not taken.
77 if (likely(free_agent_id_by_cpuid != NULL)) {
135 77 id = free_agent_id_by_cpuid[cpuid];
136 }
137 77 pthread_rwlock_unlock(&free_agent_list_lock);
138 77 return id;
139 }
140
141 15 static inline int get_free_agent_cpuid_by_id(int thread_id) {
142 15 int cpuid = -1;
143 15 pthread_rwlock_rdlock(&free_agent_list_lock);
144
1/2
✓ Branch 0 taken 15 times.
✗ Branch 1 not taken.
15 if (likely(free_agent_cpuid_by_id != NULL)) {
145 15 cpuid = free_agent_cpuid_by_id[thread_id];
146 }
147 15 pthread_rwlock_unlock(&free_agent_list_lock);
148 15 return cpuid;
149 }
150
151 /*********************************************************************************/
152 /* CPU Data structures and helper atomic flags functions */
153 /*********************************************************************************/
154
155 /* Current state of the CPU (what is being used for) */
156 typedef enum CPUState {
157 CPU_STATE_UNKNOWN = 0,
158 CPU_STATE_IDLE = 1 << 0,
159 CPU_STATE_LENT = 1 << 1,
160 CPU_STATE_RECLAIMED = 1 << 2,
161 CPU_STATE_IN_PARALLEL = 1 << 3,
162 CPU_STATE_FREE_AGENT_ENABLED = 1 << 4,
163 } cpu_state_t;
164
165 /* Possible OpenMP roles that the CPU can take */
166 typedef enum OpenMP_Roles {
167 ROLE_NONE = 0,
168 ROLE_PRIMARY = 1 << 0,
169 ROLE_WORKER = 1 << 1,
170 ROLE_FREE_AGENT = 1 << 2,
171 } openmp_roles_t;
172
173 typedef struct DLB_ALIGN_CACHE CPU_Data {
174 openmp_roles_t roles;
175 _Atomic(cpu_state_t) state;
176 atomic_bool wanted_for_parallel;
177 } cpu_data_t;
178
179 static cpu_data_t *cpu_data = NULL;
180
181
182 /*********************************************************************************/
183 /* DLB callbacks */
184 /*********************************************************************************/
185
186 16 static void cb_enable_cpu(int cpuid, void *arg) {
187 /* Skip callback if this CPU is required for a parallel region */
188
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
16 if (DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].wanted_for_parallel)) {
189 return;
190 }
191
192 /* If this CPU is reclaimed, set IDLE */
193
2/2
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 13 times.
16 if (cas_bit((atomic_int*)&cpu_data[cpuid].state,
194 CPU_STATE_RECLAIMED, CPU_STATE_IDLE)) {
195 3 return;
196 }
197
198 13 int free_agent_id = get_free_agent_id_by_cpuid(cpuid);
199
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 13 times.
13 if (unlikely(free_agent_id == -1)) {
200 /* probably too early? */
201 DLB_LendCpu(cpuid);
202 } else {
203 /* Enable associated free agent thread if not already */
204
1/2
✓ Branch 1 taken 13 times.
✗ Branch 2 not taken.
13 if(test_set_clear_bit((atomic_int*)&cpu_data[cpuid].state,
205 CPU_STATE_FREE_AGENT_ENABLED, CPU_STATE_IDLE)) {
206 13 DLB_ATOMIC_ADD(&num_enabled_free_agents, 1);
207
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 13 times.
13 verbose(VB_OMPT, "Enabling free agent %d", free_agent_id);
208 13 __kmp_set_free_agent_thread_active_status(free_agent_id, true);
209 }
210 }
211 }
212
213 10 static void cb_disable_cpu(int cpuid, void *arg) {
214 10 int free_agent_id = get_free_agent_id_by_cpuid(cpuid);
215
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
10 if (unlikely(free_agent_id == -1)) {
216 /* Probably a callback after ompt_finalize has been called, ignore */
217 return;
218 }
219
220 /* If CPU is not needed, set IDLE */
221
5/6
✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 4 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 4 times.
10 if (CPU_ISSET(cpuid, &process_mask)
222
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 && !DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].wanted_for_parallel)) {
223 3 set_bit((atomic_int*)&cpu_data[cpuid].state, CPU_STATE_IDLE);
224 }
225
226 /* If CPU was assigned to a free agent thread, disable it */
227
1/2
✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
10 if (clear_bit((atomic_int*)&cpu_data[cpuid].state, CPU_STATE_FREE_AGENT_ENABLED)) {
228 10 DLB_ATOMIC_SUB(&num_enabled_free_agents, 1);
229
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
10 verbose(VB_OMPT, "Disabling free agent %d", free_agent_id);
230 10 __kmp_set_free_agent_thread_active_status(free_agent_id, false);
231 }
232 }
233
234 static void cb_set_process_mask(const cpu_set_t *mask, void *arg) {
235 memcpy(&process_mask, mask, sizeof(cpu_set_t));
236 memcpy(&active_mask, mask, sizeof(cpu_set_t));
237 }
238
239 /*********************************************************************************/
240 /* Other static functions */
241 /*********************************************************************************/
242
243 /* Actions to do when --lewi-ompt=lend */
244 5 static void omptm_free_agents__lend(void) {
245
2/4
✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.
5 if (lewi && omptool_opts & OMPTOOL_OPTS_LEND) {
246 /* Lend all IDLE worker CPUs */
247 cpu_set_t mask;
248 int cpuid;
249 for (cpuid=0; cpuid<system_size; ++cpuid) {
250 if (CPU_ISSET(cpuid, &worker_threads_mask) &&
251 cas_bit((atomic_int*)&cpu_data[cpuid].state,
252 CPU_STATE_IDLE, CPU_STATE_LENT)) {
253 CPU_SET(cpuid, &mask);
254 }
255 }
256 DLB_LendCpuMask(&mask);
257
258 /* The active mask should only be the primary mask */
259 memcpy(&active_mask, &primary_thread_mask, sizeof(cpu_set_t));
260
261 verbose(VB_OMPT, "Release - Setting new mask to %s", mu_to_str(&active_mask));
262 }
263 5 }
264
265 /* Look for local available CPUs, if none is found ask LeWI */
266 16 static void acquire_one_free_agent(void) {
267
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
16 if (num_enabled_free_agents == num_free_agents) {
268 return;
269 }
270
271 cpu_set_t cpus_to_ask;
272 16 CPU_ZERO(&cpus_to_ask);
273
274 /* Iterate only CPUs where free agents are assigned */
275 int i;
276 16 pthread_rwlock_rdlock(&free_agent_list_lock);
277
2/2
✓ Branch 0 taken 67 times.
✓ Branch 1 taken 7 times.
74 for (i=0; i<num_free_agents; ++i) {
278 67 int cpuid = free_agent_cpu_list[i];
279
280 /* It is safe to just make a copy, we either skip the CPU or
281 * call enable_cpu which will do an atomic exchange */
282 67 cpu_state_t cpu_state = DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].state);
283
284 /* Skip CPU if it's already busy */
285
2/2
✓ Branch 0 taken 36 times.
✓ Branch 1 taken 31 times.
67 if (cpu_state & (CPU_STATE_IN_PARALLEL | CPU_STATE_FREE_AGENT_ENABLED)) {
286 // FIXME: check other flags? wanted_for_parallel?
287 36 continue;
288 }
289
290 /* If some CPU is IDLE, try enabling it */
291
2/2
✓ Branch 0 taken 9 times.
✓ Branch 1 taken 22 times.
31 if (cpu_state == CPU_STATE_IDLE) {
292 9 cb_enable_cpu(cpuid, NULL);
293 9 CPU_ZERO(&cpus_to_ask);
294 9 break;
295 } else {
296
1/2
✓ Branch 0 taken 22 times.
✗ Branch 1 not taken.
22 CPU_SET(cpuid, &cpus_to_ask);
297 }
298 }
299 16 pthread_rwlock_unlock(&free_agent_list_lock);
300
301 /* Call LeWI if we didn't find any IDLE CPU */
302
3/4
✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 7 times.
✓ Branch 4 taken 9 times.
16 if (lewi && CPU_COUNT(&cpus_to_ask) > 0) {
303 7 DLB_AcquireCpusInMask(1, &cpus_to_ask);
304 }
305 }
306
307 /* Obtain a CPU id for a given free agent id */
308 38 static int get_free_agent_binding(int thread_id) {
309 /* Find out a CPU for the free agent thread */
310 38 int cpuid = -1;
311
312 /* FIXME: If the process mask, and number of workers and free agent threads
313 * are immutable, this function could be called only once, and
314 * save all the possible bindings for each free agent thread.
315 * (but not during init, we may not have all CPUs registered yet)
316 */
317
318 /* FIXME: let's assume that all processes are already registered when the
319 * first free agent starts. Otherwise, we wouldn't be able to
320 * take the right decision here.
321 */
322
323 cpu_set_t available_process_cpus;
324 38 mu_substract(&available_process_cpus, &process_mask, &primary_thread_mask);
325 38 mu_substract(&available_process_cpus, &available_process_cpus, &worker_threads_mask);
326 38 int num_free_agents_in_available_cpus = mu_count(&available_process_cpus);
327
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 33 times.
38 if (thread_id < num_free_agents_in_available_cpus) {
328 /* Simpler scenario: the default workers mask plus primary does not cover
329 * all the CPUs in the process mask and the free agent thread_id is low
330 * enough to use one of those free CPUs.
331 */
332 5 int cpus_found = 0;
333
1/2
✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
24 for (cpuid=0; cpuid<system_size; ++cpuid) {
334
5/6
✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 9 times.
✓ Branch 3 taken 15 times.
✓ Branch 4 taken 9 times.
✓ Branch 5 taken 15 times.
24 if (CPU_ISSET(cpuid, &available_process_cpus)) {
335
2/2
✓ Branch 0 taken 5 times.
✓ Branch 1 taken 4 times.
9 if (cpus_found++ == thread_id) {
336 5 break;
337 }
338 }
339 }
340 }
341
342 else {
343 33 int num_non_owned_cpus = shmem_cpuinfo__get_number_of_non_owned_cpus(pid);
344
2/2
✓ Branch 0 taken 20 times.
✓ Branch 1 taken 13 times.
33 if (thread_id < (num_non_owned_cpus + num_free_agents_in_available_cpus)) {
345 /* Second case: Find a CPU in another process */
346 20 cpuid = shmem_cpuinfo__get_nth_non_owned_cpu(pid,
347 thread_id - num_free_agents_in_available_cpus);
348 26 } else if (thread_id < (num_non_owned_cpus + num_free_agents_in_available_cpus
349
2/2
✓ Branch 1 taken 10 times.
✓ Branch 2 taken 3 times.
13 + CPU_COUNT(&worker_threads_mask))) {
350 /* Third case: Share a CPU with some worker thread */
351 10 int victim_worker = thread_id
352 10 - num_non_owned_cpus - num_free_agents_in_available_cpus;
353 10 int workers_found = 0;
354
1/2
✓ Branch 0 taken 41 times.
✗ Branch 1 not taken.
41 for (cpuid=0; cpuid<system_size; ++cpuid) {
355
5/6
✓ Branch 0 taken 41 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 19 times.
✓ Branch 3 taken 22 times.
✓ Branch 4 taken 19 times.
✓ Branch 5 taken 22 times.
41 if (CPU_ISSET(cpuid, &worker_threads_mask)) {
356
2/2
✓ Branch 0 taken 10 times.
✓ Branch 1 taken 9 times.
19 if (workers_found++ == victim_worker) {
357 10 break;
358 }
359 }
360 }
361 } else {
362 /* Last case: no CPUs left? */
363 }
364 }
365 38 return cpuid;
366 }
367
368
369 /*********************************************************************************/
370 /* Init & Finalize module */
371 /*********************************************************************************/
372
373 4 void omptm_free_agents__init(pid_t process_id, const options_t *options) {
374
375 /* Initialize static variables */
376 4 system_size = mu_get_system_size();
377 4 lewi = options->lewi;
378 4 omptool_opts = options->lewi_ompt;
379 4 pid = process_id;
380 4 num_free_agents = __kmp_get_num_free_agent_threads();
381 4 shmem_procinfo__getprocessmask(pid, &process_mask, 0);
382
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, "Process mask: %s", mu_to_str(&process_mask));
383
384 // omp_get_max_threads cannot be called here, try using the env. var.
385 4 const char *env_omp_num_threads = getenv("OMP_NUM_THREADS");
386 4 int default_num_threads =
387 4 env_omp_num_threads ? atoi(env_omp_num_threads)
388
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 : CPU_COUNT(&process_mask);
389
390 /* Initialize atomic variables */
391 4 DLB_ATOMIC_ST(&num_enabled_free_agents, 0);
392
393 /* Initialize CPU Data array */
394 4 cpu_data = malloc(sizeof(cpu_data_t)*system_size);
395
396 /* Construct Primary and Worker threads masks */
397 4 CPU_ZERO(&primary_thread_mask);
398 4 CPU_ZERO(&worker_threads_mask);
399 int cpuid;
400 4 int encountered_cpus = 0;
401
2/2
✓ Branch 0 taken 32 times.
✓ Branch 1 taken 4 times.
36 for (cpuid=0; cpuid<system_size; ++cpuid) {
402
5/6
✓ Branch 0 taken 32 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 16 times.
✓ Branch 4 taken 16 times.
✓ Branch 5 taken 16 times.
32 if (CPU_ISSET(cpuid, &process_mask)) {
403
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 12 times.
16 if (++encountered_cpus == 1) {
404 /* First encountered CPU belongs to the primary thread */
405
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 CPU_SET(cpuid, &primary_thread_mask);
406 4 cpu_data[cpuid].roles = ROLE_PRIMARY;
407 4 cpu_data[cpuid].state = CPU_STATE_IN_PARALLEL;
408
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 5 times.
12 } else if (encountered_cpus-1 < default_num_threads) {
409 /* Infer the worker threads CPUS */
410
1/2
✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
7 CPU_SET(cpuid, &worker_threads_mask);
411 7 cpu_data[cpuid].roles = ROLE_WORKER;
412 7 cpu_data[cpuid].state = CPU_STATE_IDLE;
413 }
414 } else {
415 16 cpu_data[cpuid].roles = ROLE_NONE;
416 16 cpu_data[cpuid].state = CPU_STATE_UNKNOWN;
417 }
418 32 cpu_data[cpuid].wanted_for_parallel = false;
419 }
420 4 memcpy(&active_mask, &primary_thread_mask, sizeof(cpu_set_t));
421
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, "Primary thread mask: %s", mu_to_str(&primary_thread_mask));
422
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, "Worker threads mask: %s", mu_to_str(&worker_threads_mask));
423
424 /* Initialize free agent lists */
425 4 free_agent_lists_init();
426
427
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (lewi) {
428 int err;
429 4 err = DLB_CallbackSet(dlb_callback_enable_cpu, (dlb_callback_t)cb_enable_cpu, NULL);
430
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 if (err != DLB_SUCCESS) {
431 warning("DLB_CallbackSet enable_cpu: %s", DLB_Strerror(err));
432 }
433 4 err = DLB_CallbackSet(dlb_callback_disable_cpu, (dlb_callback_t)cb_disable_cpu, NULL);
434
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 if (err != DLB_SUCCESS) {
435 warning("DLB_CallbackSet disable_cpu: %s", DLB_Strerror(err));
436 }
437 4 err = DLB_CallbackSet(dlb_callback_set_process_mask,
438 (dlb_callback_t)cb_set_process_mask, NULL);
439
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 if (err != DLB_SUCCESS) {
440 warning("DLB_CallbackSet set_process_mask: %s", DLB_Strerror(err));
441 }
442
443 4 omptm_free_agents__lend();
444 }
445 4 }
446
447 4 void omptm_free_agents__finalize(void) {
448 /* Destrou CPU data */
449 4 free(cpu_data);
450 4 cpu_data = NULL;
451
452 /* Destroy free agent lists and lock */
453 4 free_agent_lists_destroy();
454 4 }
455
456
457 /*********************************************************************************/
458 /* Blocking calls specific functions */
459 /*********************************************************************************/
460
461 1 void omptm_free_agents__IntoBlockingCall(void) {
462
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (lewi) {
463
464 /* Don't know what to do if a Blocking Call is invoked inside a
465 * parallel region. We could ignore it, but then we should also ignore
466 * the associated OutOfBlockingCall, and how would we know it? */
467
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 fatal_cond(DLB_ATOMIC_LD(&in_parallel),
468 "Blocking Call inside a parallel region not supported");
469
470 /* Warning: the current CPU, hopefully the one assigned to the primary
471 * thread, has already been lent in the appropriate LeWI function
472 * for the IntoBlockingCall event. */
473
474 cpu_set_t cpus_to_lend;
475 1 CPU_ZERO(&cpus_to_lend);
476
477 /* Lend all CPUs not being used by workers or free agents */
478 /* All owned CPUs (except primary) go from IDLE to LENT */
479 int cpuid;
480
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1 times.
9 for (cpuid=0; cpuid<system_size; ++cpuid) {
481
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 1 times.
8 if (cpu_data[cpuid].roles & (ROLE_WORKER | ROLE_FREE_AGENT)
482
5/6
✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 4 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 4 times.
7 && CPU_ISSET(cpuid, &process_mask)
483
1/2
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
3 && cas_bit((atomic_int*)&cpu_data[cpuid].state,
484 CPU_STATE_IDLE, CPU_STATE_LENT)) {
485
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 CPU_SET(cpuid, &cpus_to_lend);
486 }
487 }
488 1 DLB_LendCpuMask(&cpus_to_lend);
489 }
490 1 }
491
492
493 1 void omptm_free_agents__OutOfBlockingCall(void) {
494
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (lewi) {
495
1/2
✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
1 if (omptool_opts & OMPTOOL_OPTS_LEND) {
496 /* Do nothing.
497 * Do not reclaim since going out of a blocking call is not
498 * an indication that the CPUs may be needed. */
499 }
500 else {
501 cpu_set_t cpus_to_reclaim;
502 1 CPU_ZERO(&cpus_to_reclaim);
503
504 /* All owned CPUs (except primary) go from LENT to RECLAIMED */
505 int cpuid;
506
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1 times.
9 for (cpuid=0; cpuid<system_size; ++cpuid) {
507
2/2
✓ Branch 0 taken 7 times.
✓ Branch 1 taken 1 times.
8 if (cpu_data[cpuid].roles & (ROLE_WORKER | ROLE_FREE_AGENT)
508
5/6
✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 4 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 4 times.
7 && CPU_ISSET(cpuid, &process_mask)
509
1/2
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
3 && cas_bit((atomic_int*)&cpu_data[cpuid].state,
510 CPU_STATE_LENT, CPU_STATE_RECLAIMED)) {
511
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 CPU_SET(cpuid, &cpus_to_reclaim);
512 }
513 }
514 1 DLB_ReclaimCpuMask(&cpus_to_reclaim);
515 }
516 }
517 1 }
518
519
520 /*********************************************************************************/
521 /* OMPT registered callbacks */
522 /*********************************************************************************/
523
524 7 void omptm_free_agents__thread_begin(ompt_thread_t thread_type) {
525 /* Set up thread local spd */
526 7 spd_enter_dlb(thread_spd);
527
528
1/2
✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
7 if (thread_type == ompt_thread_other) {
529 7 int thread_id = __kmp_get_free_agent_id();
530
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.
7 fatal_cond(thread_id < 0, "free agent id < 0");
531 7 __free_agent_id = thread_id;
532
533 7 int cpuid = get_free_agent_binding(thread_id);
534
535
1/2
✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
7 if (cpuid >=0) {
536 /* Set up free agent in free agent lists */
537 7 free_agent_lists_register(thread_id, cpuid);
538
539 /* Set up new CPU role */
540 /* FIXME: not atomic */
541 7 cpu_data[cpuid].roles |= ROLE_FREE_AGENT;
542
543 /* TODO: set up cpu state? */
544
545 /* Free agent threads start in disabled status */
546 7 __kmp_set_free_agent_thread_active_status(thread_id, false);
547
548 /* Bind free agent thread to cpuid */
549 cpu_set_t thread_mask;
550 7 CPU_ZERO(&thread_mask);
551
1/2
✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
7 CPU_SET(cpuid, &thread_mask);
552 7 int err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &thread_mask);
553
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.
7 verbose(VB_OMPT, "Detected free agent thread %d. Pinning to CPU: %d, err: %d",
554 thread_id, cpuid, err);
555 } else {
556 warning("Could not find a suitable CPU bind for free agent thread id: %d",
557 thread_id);
558 }
559 }
560 7 }
561
562 2 void omptm_free_agents__parallel_begin(omptool_parallel_data_t *parallel_data) {
563
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 if (parallel_data->level == 1) {
564 1 DLB_ATOMIC_ST(&in_parallel, true);
565
566 /* Only if requested_parallelism == process_mask, reclaim all our lent CPUs, if needed */
567 /* Otherwise, each thread will be responsible for reclaiming themselves */
568
1/2
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
1 if (parallel_data->requested_parallelism == (unsigned)CPU_COUNT(&process_mask)) {
569 1 int cpus_to_reclaim = 0;
570 1 int cpuid_to_reclaim = -1;
571 cpu_set_t mask_to_reclaim;
572 int cpuid;
573
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1 times.
9 for (cpuid = 0; cpuid<system_size; ++cpuid) {
574
5/6
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 4 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 4 times.
8 if (CPU_ISSET(cpuid, &process_mask)) {
575 4 DLB_ATOMIC_ST_RLX(&cpu_data[cpuid].wanted_for_parallel, true);
576
577 /* It is safe to just make a copy, we'll either call cb_disable_cpu
578 * which will do an atomic exchange or we'll call LeWI */
579 4 cpu_state_t cpu_state = DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].state);
580
581
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 if (cpu_state & CPU_STATE_FREE_AGENT_ENABLED) {
582 // Disable free agent thread from this CPU
583 3 cb_disable_cpu(cpuid, NULL);
584 }
585
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 else if (cpu_state & CPU_STATE_LENT) {
586 // Reclaim this CPU to LeWI
587 switch(++cpus_to_reclaim) {
588 case 1:
589 cpuid_to_reclaim = cpuid;
590 break;
591 case 2:
592 CPU_ZERO(&mask_to_reclaim);
593 CPU_SET(cpuid_to_reclaim, &mask_to_reclaim);
594 DLB_FALLTHROUGH;
595 default:
596 CPU_SET(cpuid, &mask_to_reclaim);
597 }
598 }
599 }
600 }
601
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (cpus_to_reclaim == 1) {
602 DLB_ReclaimCpu(cpuid_to_reclaim);
603
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 } else if (cpus_to_reclaim > 1) {
604 DLB_ReclaimCpuMask(&mask_to_reclaim);
605 }
606 }
607 }
608 2 }
609
610 2 void omptm_free_agents__parallel_end(omptool_parallel_data_t *parallel_data) {
611
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 if (parallel_data->level == 1) {
612 1 DLB_ATOMIC_ST(&in_parallel, false);
613 /* All workers in parallel go to IDLE */
614 int cpuid;
615
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1 times.
9 for (cpuid=0; cpuid<system_size; ++cpuid) {
616
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 5 times.
8 if (cpu_data[cpuid].roles & ROLE_WORKER) {
617 3 cas_bit((atomic_int*)&cpu_data[cpuid].state,
618 CPU_STATE_IN_PARALLEL, CPU_STATE_IDLE);
619 3 DLB_ATOMIC_ST_RLX(&cpu_data[cpuid].wanted_for_parallel, false);
620 }
621 }
622
623 1 omptm_free_agents__lend();
624 }
625 2 }
626
627 5 void omptm_free_agents__into_parallel_function(
628 omptool_parallel_data_t *parallel_data, unsigned int index) {
629
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1 times.
5 if (parallel_data->level == 1) {
630 /* Obtain CPU id */
631 /* FIXME: actually, we should test CPU binding every time we enter
632 * here, since the RT is free to rebind threads, but we need
633 * __worker_binding for testing */
634 4 int cpuid = __worker_binding;
635
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 if (cpuid == -1) {
636 cpu_set_t thread_mask;
637 pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &thread_mask);
638 cpuid = mu_get_single_cpu(&thread_mask);
639 fatal_cond(cpuid == -1,
640 "DLB does not currently support thread binding to more than one CPU,"
641 " current CPU affinity mask for thread %d: %s."
642 " Please, define OMP_PLACES=threads and run again.",
643 index, mu_to_str(&thread_mask));
644 __worker_binding = cpuid;
645 }
646
647
648 /* Reclaim CPU if needed and set the appropriate state */
649
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 if (DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].state) & CPU_STATE_LENT) {
650 DLB_ReclaimCpu(cpuid);
651 }
652 4 set_bit((atomic_int*)&cpu_data[cpuid].state, CPU_STATE_IN_PARALLEL);
653
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, "CPU %d starting an implicit task", cpuid);
654 }
655 5 }
656
657 void omptm_free_agents__task_create(void) {
658 /* Increment the amount of pending tasks */
659 DLB_ATOMIC_ADD(&pending_tasks, 1);
660
661 /* For now, let's assume that we always want to increase the number
662 * of active threads whenever a task is created
663 */
664 acquire_one_free_agent();
665 }
666
667 8 void omptm_free_agents__task_complete(void) {
668
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
8 if (__free_agent_id >= 0) {
669 8 int cpuid = get_free_agent_cpuid_by_id(__free_agent_id);
670
671 /* Disable free agent thread if this CPU is needed for a worker thread */
672
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
8 if (DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].state) & CPU_STATE_IN_PARALLEL
673
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
8 || DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].wanted_for_parallel)) {
674 cb_disable_cpu(cpuid, NULL);
675 }
676
677 /* Return CPU if reclaimed */
678
2/2
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 5 times.
8 else if (DLB_CheckCpuAvailability(cpuid) == DLB_ERR_PERM) {
679
1/2
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
3 if (DLB_ReturnCpu(cpuid) == DLB_ERR_PERM) {
680 cb_disable_cpu(cpuid, NULL);
681 }
682 }
683
684 /* Lend CPU if no more tasks */
685
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1 times.
5 else if (DLB_ATOMIC_LD(&pending_tasks) == 0) {
686 4 cb_disable_cpu(cpuid, NULL);
687
688 /* Lend only free agents not part of the process mask */
689 /* or, lend anyway if LEND policy */
690
5/6
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 1 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 1 times.
4 if (!CPU_ISSET(cpuid, &process_mask)
691
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
3 || omptool_opts & OMPTOOL_OPTS_LEND) {
692 1 DLB_LendCpu(cpuid);
693 }
694 }
695 }
696 8 }
697
698 void omptm_free_agents__task_switch(void) {
699 if (DLB_ATOMIC_SUB(&pending_tasks, 1) > 1) {
700 acquire_one_free_agent();
701 }
702 }
703
704
705 /*********************************************************************************/
706 /* Functions for testing purposes */
707 /*********************************************************************************/
708
709 4 void omptm_free_agents_testing__set_worker_binding(int cpuid) {
710 4 __worker_binding = cpuid;
711 4 }
712
713 8 void omptm_free_agents_testing__set_free_agent_id(int id) {
714 8 __free_agent_id = id;
715 8 }
716
717 5 void omptm_free_agents_testing__set_pending_tasks(unsigned int num_tasks) {
718 5 pending_tasks = num_tasks;
719 5 }
720
721 16 void omptm_free_agents_testing__acquire_one_free_agent(void) {
722 16 acquire_one_free_agent();
723 16 }
724
725 3 bool omptm_free_agents_testing__in_parallel(void) {
726 3 return DLB_ATOMIC_LD(&in_parallel);
727 }
728
729 4 bool omptm_free_agents_testing__check_cpu_in_parallel(int cpuid) {
730 4 return DLB_ATOMIC_LD(&cpu_data[cpuid].state) & CPU_STATE_IN_PARALLEL;
731 }
732
733 6 bool omptm_free_agents_testing__check_cpu_idle(int cpuid) {
734 6 return DLB_ATOMIC_LD(&cpu_data[cpuid].state) & CPU_STATE_IDLE;
735 }
736
737 3 bool omptm_free_agents_testing__check_cpu_free_agent_enabled(int cpuid) {
738 3 return DLB_ATOMIC_LD(&cpu_data[cpuid].state) & CPU_STATE_FREE_AGENT_ENABLED;
739 }
740
741 18 int omptm_free_agents_testing__get_num_enabled_free_agents(void) {
742 18 return DLB_ATOMIC_LD(&num_enabled_free_agents);
743 }
744
745 7 int omptm_free_agents_testing__get_free_agent_cpu(int thread_id) {
746 7 return free_agent_cpu_list[thread_id];
747 }
748
749 31 int omptm_free_agents_testing__get_free_agent_binding(int thread_id) {
750 31 return get_free_agent_binding(thread_id);
751 }
752
753 54 int omptm_free_agents_testing__get_free_agent_id_by_cpuid(int cpuid) {
754 54 return get_free_agent_id_by_cpuid(cpuid);
755 }
756
757 7 int omptm_free_agents_testing__get_free_agent_cpuid_by_id(int thread_id) {
758 7 return get_free_agent_cpuid_by_id(thread_id);
759 }
760