| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /*********************************************************************************/ | ||
| 2 | /* Copyright 2009-2024 Barcelona Supercomputing Center */ | ||
| 3 | /* */ | ||
| 4 | /* This file is part of the DLB library. */ | ||
| 5 | /* */ | ||
| 6 | /* DLB is free software: you can redistribute it and/or modify */ | ||
| 7 | /* it under the terms of the GNU Lesser General Public License as published by */ | ||
| 8 | /* the Free Software Foundation, either version 3 of the License, or */ | ||
| 9 | /* (at your option) any later version. */ | ||
| 10 | /* */ | ||
| 11 | /* DLB is distributed in the hope that it will be useful, */ | ||
| 12 | /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ | ||
| 13 | /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ | ||
| 14 | /* GNU Lesser General Public License for more details. */ | ||
| 15 | /* */ | ||
| 16 | /* You should have received a copy of the GNU Lesser General Public License */ | ||
| 17 | /* along with DLB. If not, see <https://www.gnu.org/licenses/>. */ | ||
| 18 | /*********************************************************************************/ | ||
| 19 | |||
| 20 | #include "LB_numThreads/omptm_free_agents.h" | ||
| 21 | |||
| 22 | #include "apis/dlb.h" | ||
| 23 | #include "support/atomic.h" | ||
| 24 | #include "support/debug.h" | ||
| 25 | #include "support/mask_utils.h" | ||
| 26 | #include "LB_comm/shmem_cpuinfo.h" | ||
| 27 | #include "LB_comm/shmem_procinfo.h" | ||
| 28 | #include "LB_core/spd.h" | ||
| 29 | #include "LB_numThreads/omptool.h" | ||
| 30 | |||
| 31 | #include <sched.h> | ||
| 32 | #include <unistd.h> | ||
| 33 | #include <string.h> | ||
| 34 | #include <pthread.h> | ||
| 35 | #include <inttypes.h> | ||
| 36 | |||
| 37 | /* OpenMP symbols */ | ||
| 38 | int __kmp_get_free_agent_id(void) __attribute__((weak)); | ||
| 39 | int __kmp_get_num_free_agent_threads(void) __attribute__((weak)); | ||
| 40 | void __kmp_set_free_agent_thread_active_status( | ||
| 41 | unsigned int thread_num, bool active) __attribute__((weak)); | ||
| 42 | |||
| 43 | /* Enum for ompt_data_t *parallel_data to detect level 1 (non nested) parallel | ||
| 44 | * regions */ | ||
| 45 | enum { | ||
| 46 | PARALLEL_UNSET, | ||
| 47 | PARALLEL_LEVEL_1, | ||
| 48 | }; | ||
| 49 | |||
| 50 | |||
| 51 | /*** Static variables ***/ | ||
| 52 | |||
| 53 | static bool lewi = false; | ||
| 54 | static pid_t pid; | ||
| 55 | static omptool_opts_t omptool_opts; | ||
| 56 | static int system_size; | ||
| 57 | |||
| 58 | /* Masks */ | ||
| 59 | static cpu_set_t active_mask; | ||
| 60 | static cpu_set_t process_mask; | ||
| 61 | static cpu_set_t primary_thread_mask; | ||
| 62 | static cpu_set_t worker_threads_mask; | ||
| 63 | |||
| 64 | /* Atomic variables */ | ||
| 65 | static atomic_bool DLB_ALIGN_CACHE in_parallel = false; | ||
| 66 | static atomic_uint DLB_ALIGN_CACHE pending_tasks = 0; | ||
| 67 | static atomic_int DLB_ALIGN_CACHE num_enabled_free_agents = 0; | ||
| 68 | |||
| 69 | /* Thread local */ | ||
| 70 | __thread int __free_agent_id = -1; | ||
| 71 | __thread int __worker_binding = -1; | ||
| 72 | |||
| 73 | /*********************************************************************************/ | ||
| 74 | /* Free agent CPU lists for fast indexing */ | ||
| 75 | /*********************************************************************************/ | ||
| 76 | |||
| 77 | /* Free agent lists */ | ||
| 78 | static int num_free_agents; | ||
| 79 | static int *free_agent_id_by_cpuid = NULL; /* indexed by CPU id */ | ||
| 80 | static int *free_agent_cpuid_by_id = NULL; /* indexed by free agent id */ | ||
| 81 | static int *free_agent_cpu_list = NULL; /* CPUs where a free agent is bound, owned first */ | ||
| 82 | |||
| 83 | /* Lock for all the above lists, they should only be written on thread creation */ | ||
| 84 | static pthread_rwlock_t free_agent_list_lock; | ||
| 85 | |||
| 86 | 4 | static void free_agent_lists_init(void) { | |
| 87 | 4 | pthread_rwlock_init(&free_agent_list_lock, NULL); | |
| 88 | 4 | pthread_rwlock_wrlock(&free_agent_list_lock); | |
| 89 | 4 | free_agent_id_by_cpuid = malloc(sizeof(int)*system_size); | |
| 90 | 4 | free_agent_cpuid_by_id = malloc(sizeof(int)*num_free_agents); | |
| 91 | 4 | free_agent_cpu_list = malloc(sizeof(int)*num_free_agents); | |
| 92 | int i; | ||
| 93 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 4 times.
|
36 | for (i=0; i<system_size; ++i) free_agent_id_by_cpuid[i] = -1; |
| 94 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 4 times.
|
32 | for (i=0; i<num_free_agents; ++i) free_agent_cpuid_by_id[i] = -1; |
| 95 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 4 times.
|
32 | for (i=0; i<num_free_agents; ++i) free_agent_cpu_list[i] = -1; |
| 96 | 4 | pthread_rwlock_unlock(&free_agent_list_lock); | |
| 97 | 4 | } | |
| 98 | |||
| 99 | 4 | static void free_agent_lists_destroy(void) { | |
| 100 | 4 | pthread_rwlock_wrlock(&free_agent_list_lock); | |
| 101 | 4 | free(free_agent_id_by_cpuid); | |
| 102 | 4 | free_agent_id_by_cpuid = NULL; | |
| 103 | 4 | free(free_agent_cpuid_by_id); | |
| 104 | 4 | free_agent_cpuid_by_id = NULL; | |
| 105 | 4 | free(free_agent_cpu_list); | |
| 106 | 4 | free_agent_cpu_list = NULL; | |
| 107 | 4 | pthread_rwlock_unlock(&free_agent_list_lock); | |
| 108 | 4 | pthread_rwlock_destroy(&free_agent_list_lock); | |
| 109 | 4 | } | |
| 110 | |||
| 111 | 7 | static void free_agent_lists_register(int thread_id, int cpuid) { | |
| 112 | 7 | pthread_rwlock_wrlock(&free_agent_list_lock); | |
| 113 | /* Insert id's */ | ||
| 114 | 7 | free_agent_id_by_cpuid[cpuid] = thread_id; | |
| 115 | 7 | free_agent_cpuid_by_id[thread_id] = cpuid; | |
| 116 | |||
| 117 | /* Insert CPUid and reorder */ | ||
| 118 | int i; | ||
| 119 |
1/2✓ Branch 0 taken 28 times.
✗ Branch 1 not taken.
|
28 | for (i=0; i<num_free_agents; ++i) { |
| 120 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 21 times.
|
28 | if (free_agent_cpu_list[i] == -1) { |
| 121 | 7 | free_agent_cpu_list[i] = cpuid; | |
| 122 | 7 | break; | |
| 123 | } | ||
| 124 | } | ||
| 125 | 7 | qsort_r(free_agent_cpu_list, i+1, sizeof(int), | |
| 126 | mu_cmp_cpuids_by_ownership, &process_mask); | ||
| 127 | |||
| 128 | 7 | pthread_rwlock_unlock(&free_agent_list_lock); | |
| 129 | 7 | } | |
| 130 | |||
| 131 | 77 | static inline int get_free_agent_id_by_cpuid(int cpuid) { | |
| 132 | 77 | int id = -1; | |
| 133 | 77 | pthread_rwlock_rdlock(&free_agent_list_lock); | |
| 134 |
1/2✓ Branch 0 taken 77 times.
✗ Branch 1 not taken.
|
77 | if (likely(free_agent_id_by_cpuid != NULL)) { |
| 135 | 77 | id = free_agent_id_by_cpuid[cpuid]; | |
| 136 | } | ||
| 137 | 77 | pthread_rwlock_unlock(&free_agent_list_lock); | |
| 138 | 77 | return id; | |
| 139 | } | ||
| 140 | |||
| 141 | 15 | static inline int get_free_agent_cpuid_by_id(int thread_id) { | |
| 142 | 15 | int cpuid = -1; | |
| 143 | 15 | pthread_rwlock_rdlock(&free_agent_list_lock); | |
| 144 |
1/2✓ Branch 0 taken 15 times.
✗ Branch 1 not taken.
|
15 | if (likely(free_agent_cpuid_by_id != NULL)) { |
| 145 | 15 | cpuid = free_agent_cpuid_by_id[thread_id]; | |
| 146 | } | ||
| 147 | 15 | pthread_rwlock_unlock(&free_agent_list_lock); | |
| 148 | 15 | return cpuid; | |
| 149 | } | ||
| 150 | |||
| 151 | /*********************************************************************************/ | ||
| 152 | /* CPU Data structures and helper atomic flags functions */ | ||
| 153 | /*********************************************************************************/ | ||
| 154 | |||
| 155 | /* Current state of the CPU (what is being used for) */ | ||
| 156 | typedef enum CPUState { | ||
| 157 | CPU_STATE_UNKNOWN = 0, | ||
| 158 | CPU_STATE_IDLE = 1 << 0, | ||
| 159 | CPU_STATE_LENT = 1 << 1, | ||
| 160 | CPU_STATE_RECLAIMED = 1 << 2, | ||
| 161 | CPU_STATE_IN_PARALLEL = 1 << 3, | ||
| 162 | CPU_STATE_FREE_AGENT_ENABLED = 1 << 4, | ||
| 163 | } cpu_state_t; | ||
| 164 | |||
| 165 | /* Possible OpenMP roles that the CPU can take */ | ||
| 166 | typedef enum OpenMP_Roles { | ||
| 167 | ROLE_NONE = 0, | ||
| 168 | ROLE_PRIMARY = 1 << 0, | ||
| 169 | ROLE_WORKER = 1 << 1, | ||
| 170 | ROLE_FREE_AGENT = 1 << 2, | ||
| 171 | } openmp_roles_t; | ||
| 172 | |||
| 173 | typedef struct DLB_ALIGN_CACHE CPU_Data { | ||
| 174 | openmp_roles_t roles; | ||
| 175 | _Atomic(cpu_state_t) state; | ||
| 176 | atomic_bool wanted_for_parallel; | ||
| 177 | } cpu_data_t; | ||
| 178 | |||
| 179 | static cpu_data_t *cpu_data = NULL; | ||
| 180 | |||
| 181 | |||
| 182 | /*********************************************************************************/ | ||
| 183 | /* DLB callbacks */ | ||
| 184 | /*********************************************************************************/ | ||
| 185 | |||
| 186 | 16 | static void cb_enable_cpu(int cpuid, void *arg) { | |
| 187 | /* Skip callback if this CPU is required for a parallel region */ | ||
| 188 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
|
16 | if (DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].wanted_for_parallel)) { |
| 189 | ✗ | return; | |
| 190 | } | ||
| 191 | |||
| 192 | /* If this CPU is reclaimed, set IDLE */ | ||
| 193 |
2/2✓ Branch 1 taken 3 times.
✓ Branch 2 taken 13 times.
|
16 | if (cas_bit((atomic_int*)&cpu_data[cpuid].state, |
| 194 | CPU_STATE_RECLAIMED, CPU_STATE_IDLE)) { | ||
| 195 | 3 | return; | |
| 196 | } | ||
| 197 | |||
| 198 | 13 | int free_agent_id = get_free_agent_id_by_cpuid(cpuid); | |
| 199 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 13 times.
|
13 | if (unlikely(free_agent_id == -1)) { |
| 200 | /* probably too early? */ | ||
| 201 | ✗ | DLB_LendCpu(cpuid); | |
| 202 | } else { | ||
| 203 | /* Enable associated free agent thread if not already */ | ||
| 204 |
1/2✓ Branch 1 taken 13 times.
✗ Branch 2 not taken.
|
13 | if(test_set_clear_bit((atomic_int*)&cpu_data[cpuid].state, |
| 205 | CPU_STATE_FREE_AGENT_ENABLED, CPU_STATE_IDLE)) { | ||
| 206 | 13 | DLB_ATOMIC_ADD(&num_enabled_free_agents, 1); | |
| 207 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 13 times.
|
13 | verbose(VB_OMPT, "Enabling free agent %d", free_agent_id); |
| 208 | 13 | __kmp_set_free_agent_thread_active_status(free_agent_id, true); | |
| 209 | } | ||
| 210 | } | ||
| 211 | } | ||
| 212 | |||
| 213 | 10 | static void cb_disable_cpu(int cpuid, void *arg) { | |
| 214 | 10 | int free_agent_id = get_free_agent_id_by_cpuid(cpuid); | |
| 215 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
10 | if (unlikely(free_agent_id == -1)) { |
| 216 | /* Probably a callback after ompt_finalize has been called, ignore */ | ||
| 217 | ✗ | return; | |
| 218 | } | ||
| 219 | |||
| 220 | /* If CPU is not needed, set IDLE */ | ||
| 221 |
5/6✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 6 times.
✓ Branch 3 taken 4 times.
✓ Branch 4 taken 6 times.
✓ Branch 5 taken 4 times.
|
10 | if (CPU_ISSET(cpuid, &process_mask) |
| 222 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
|
6 | && !DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].wanted_for_parallel)) { |
| 223 | 3 | set_bit((atomic_int*)&cpu_data[cpuid].state, CPU_STATE_IDLE); | |
| 224 | } | ||
| 225 | |||
| 226 | /* If CPU was assigned to a free agent thread, disable it */ | ||
| 227 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | if (clear_bit((atomic_int*)&cpu_data[cpuid].state, CPU_STATE_FREE_AGENT_ENABLED)) { |
| 228 | 10 | DLB_ATOMIC_SUB(&num_enabled_free_agents, 1); | |
| 229 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
10 | verbose(VB_OMPT, "Disabling free agent %d", free_agent_id); |
| 230 | 10 | __kmp_set_free_agent_thread_active_status(free_agent_id, false); | |
| 231 | } | ||
| 232 | } | ||
| 233 | |||
| 234 | ✗ | static void cb_set_process_mask(const cpu_set_t *mask, void *arg) { | |
| 235 | ✗ | memcpy(&process_mask, mask, sizeof(cpu_set_t)); | |
| 236 | ✗ | memcpy(&active_mask, mask, sizeof(cpu_set_t)); | |
| 237 | } | ||
| 238 | |||
| 239 | /*********************************************************************************/ | ||
| 240 | /* Other static functions */ | ||
| 241 | /*********************************************************************************/ | ||
| 242 | |||
| 243 | /* Actions to do when --lewi-ompt=lend */ | ||
| 244 | 5 | static void omptm_free_agents__lend(void) { | |
| 245 |
2/4✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.
|
5 | if (lewi && omptool_opts & OMPTOOL_OPTS_LEND) { |
| 246 | /* Lend all IDLE worker CPUs */ | ||
| 247 | cpu_set_t mask; | ||
| 248 | int cpuid; | ||
| 249 | ✗ | for (cpuid=0; cpuid<system_size; ++cpuid) { | |
| 250 | ✗ | if (CPU_ISSET(cpuid, &worker_threads_mask) && | |
| 251 | ✗ | cas_bit((atomic_int*)&cpu_data[cpuid].state, | |
| 252 | CPU_STATE_IDLE, CPU_STATE_LENT)) { | ||
| 253 | ✗ | CPU_SET(cpuid, &mask); | |
| 254 | } | ||
| 255 | } | ||
| 256 | ✗ | DLB_LendCpuMask(&mask); | |
| 257 | |||
| 258 | /* The active mask should only be the primary mask */ | ||
| 259 | ✗ | memcpy(&active_mask, &primary_thread_mask, sizeof(cpu_set_t)); | |
| 260 | |||
| 261 | ✗ | verbose(VB_OMPT, "Release - Setting new mask to %s", mu_to_str(&active_mask)); | |
| 262 | } | ||
| 263 | 5 | } | |
| 264 | |||
| 265 | /* Look for local available CPUs, if none is found ask LeWI */ | ||
| 266 | 16 | static void acquire_one_free_agent(void) { | |
| 267 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 16 times.
|
16 | if (num_enabled_free_agents == num_free_agents) { |
| 268 | ✗ | return; | |
| 269 | } | ||
| 270 | |||
| 271 | cpu_set_t cpus_to_ask; | ||
| 272 | 16 | CPU_ZERO(&cpus_to_ask); | |
| 273 | |||
| 274 | /* Iterate only CPUs where free agents are assigned */ | ||
| 275 | int i; | ||
| 276 | 16 | pthread_rwlock_rdlock(&free_agent_list_lock); | |
| 277 |
2/2✓ Branch 0 taken 67 times.
✓ Branch 1 taken 7 times.
|
74 | for (i=0; i<num_free_agents; ++i) { |
| 278 | 67 | int cpuid = free_agent_cpu_list[i]; | |
| 279 | |||
| 280 | /* It is safe to just make a copy, we either skip the CPU or | ||
| 281 | * call enable_cpu which will do an atomic exchange */ | ||
| 282 | 67 | cpu_state_t cpu_state = DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].state); | |
| 283 | |||
| 284 | /* Skip CPU if it's already busy */ | ||
| 285 |
2/2✓ Branch 0 taken 36 times.
✓ Branch 1 taken 31 times.
|
67 | if (cpu_state & (CPU_STATE_IN_PARALLEL | CPU_STATE_FREE_AGENT_ENABLED)) { |
| 286 | // FIXME: check other flags? wanted_for_parallel? | ||
| 287 | 36 | continue; | |
| 288 | } | ||
| 289 | |||
| 290 | /* If some CPU is IDLE, try enabling it */ | ||
| 291 |
2/2✓ Branch 0 taken 9 times.
✓ Branch 1 taken 22 times.
|
31 | if (cpu_state == CPU_STATE_IDLE) { |
| 292 | 9 | cb_enable_cpu(cpuid, NULL); | |
| 293 | 9 | CPU_ZERO(&cpus_to_ask); | |
| 294 | 9 | break; | |
| 295 | } else { | ||
| 296 |
1/2✓ Branch 0 taken 22 times.
✗ Branch 1 not taken.
|
22 | CPU_SET(cpuid, &cpus_to_ask); |
| 297 | } | ||
| 298 | } | ||
| 299 | 16 | pthread_rwlock_unlock(&free_agent_list_lock); | |
| 300 | |||
| 301 | /* Call LeWI if we didn't find any IDLE CPU */ | ||
| 302 |
3/4✓ Branch 0 taken 16 times.
✗ Branch 1 not taken.
✓ Branch 3 taken 7 times.
✓ Branch 4 taken 9 times.
|
16 | if (lewi && CPU_COUNT(&cpus_to_ask) > 0) { |
| 303 | 7 | DLB_AcquireCpusInMask(1, &cpus_to_ask); | |
| 304 | } | ||
| 305 | } | ||
| 306 | |||
| 307 | /* Obtain a CPU id for a given free agent id */ | ||
| 308 | 38 | static int get_free_agent_binding(int thread_id) { | |
| 309 | /* Find out a CPU for the free agent thread */ | ||
| 310 | 38 | int cpuid = -1; | |
| 311 | |||
| 312 | /* FIXME: If the process mask, and number of workers and free agent threads | ||
| 313 | * are immutable, this function could be called only once, and | ||
| 314 | * save all the possible bindings for each free agent thread. | ||
| 315 | * (but not during init, we may not have all CPUs registered yet) | ||
| 316 | */ | ||
| 317 | |||
| 318 | /* FIXME: let's assume that all processes are already registered when the | ||
| 319 | * first free agent starts. Otherwise, we wouldn't be able to | ||
| 320 | * take the right decision here. | ||
| 321 | */ | ||
| 322 | |||
| 323 | cpu_set_t available_process_cpus; | ||
| 324 | 38 | mu_subtract(&available_process_cpus, &process_mask, &primary_thread_mask); | |
| 325 | 38 | mu_subtract(&available_process_cpus, &available_process_cpus, &worker_threads_mask); | |
| 326 | 38 | int num_free_agents_in_available_cpus = mu_count(&available_process_cpus); | |
| 327 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 33 times.
|
38 | if (thread_id < num_free_agents_in_available_cpus) { |
| 328 | /* Simpler scenario: the default workers mask plus primary does not cover | ||
| 329 | * all the CPUs in the process mask and the free agent thread_id is low | ||
| 330 | * enough to use one of those free CPUs. | ||
| 331 | */ | ||
| 332 | 5 | int cpus_found = 0; | |
| 333 |
1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
|
24 | for (cpuid=0; cpuid<system_size; ++cpuid) { |
| 334 |
5/6✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 9 times.
✓ Branch 3 taken 15 times.
✓ Branch 4 taken 9 times.
✓ Branch 5 taken 15 times.
|
24 | if (CPU_ISSET(cpuid, &available_process_cpus)) { |
| 335 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 4 times.
|
9 | if (cpus_found++ == thread_id) { |
| 336 | 5 | break; | |
| 337 | } | ||
| 338 | } | ||
| 339 | } | ||
| 340 | } | ||
| 341 | |||
| 342 | else { | ||
| 343 | 33 | int num_non_owned_cpus = shmem_cpuinfo__get_number_of_non_owned_cpus(pid); | |
| 344 |
2/2✓ Branch 0 taken 20 times.
✓ Branch 1 taken 13 times.
|
33 | if (thread_id < (num_non_owned_cpus + num_free_agents_in_available_cpus)) { |
| 345 | /* Second case: Find a CPU in another process */ | ||
| 346 | 20 | cpuid = shmem_cpuinfo__get_nth_non_owned_cpu(pid, | |
| 347 | thread_id - num_free_agents_in_available_cpus); | ||
| 348 | 26 | } else if (thread_id < (num_non_owned_cpus + num_free_agents_in_available_cpus | |
| 349 |
2/2✓ Branch 1 taken 10 times.
✓ Branch 2 taken 3 times.
|
13 | + CPU_COUNT(&worker_threads_mask))) { |
| 350 | /* Third case: Share a CPU with some worker thread */ | ||
| 351 | 10 | int victim_worker = thread_id | |
| 352 | 10 | - num_non_owned_cpus - num_free_agents_in_available_cpus; | |
| 353 | 10 | int workers_found = 0; | |
| 354 |
1/2✓ Branch 0 taken 41 times.
✗ Branch 1 not taken.
|
41 | for (cpuid=0; cpuid<system_size; ++cpuid) { |
| 355 |
5/6✓ Branch 0 taken 41 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 19 times.
✓ Branch 3 taken 22 times.
✓ Branch 4 taken 19 times.
✓ Branch 5 taken 22 times.
|
41 | if (CPU_ISSET(cpuid, &worker_threads_mask)) { |
| 356 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 9 times.
|
19 | if (workers_found++ == victim_worker) { |
| 357 | 10 | break; | |
| 358 | } | ||
| 359 | } | ||
| 360 | } | ||
| 361 | } else { | ||
| 362 | /* Last case: no CPUs left? */ | ||
| 363 | } | ||
| 364 | } | ||
| 365 | 38 | return cpuid; | |
| 366 | } | ||
| 367 | |||
| 368 | |||
| 369 | /*********************************************************************************/ | ||
| 370 | /* Init & Finalize module */ | ||
| 371 | /*********************************************************************************/ | ||
| 372 | |||
| 373 | 4 | void omptm_free_agents__init(pid_t process_id, const options_t *options) { | |
| 374 | |||
| 375 | /* Initialize static variables */ | ||
| 376 | 4 | system_size = mu_get_system_size(); | |
| 377 | 4 | lewi = options->lewi; | |
| 378 | 4 | omptool_opts = options->lewi_ompt; | |
| 379 | 4 | pid = process_id; | |
| 380 | 4 | num_free_agents = __kmp_get_num_free_agent_threads(); | |
| 381 | 4 | shmem_procinfo__getprocessmask(pid, &process_mask, DLB_DROM_FLAGS_NONE); | |
| 382 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "Process mask: %s", mu_to_str(&process_mask)); |
| 383 | |||
| 384 | // omp_get_max_threads cannot be called here, try using the env. var. | ||
| 385 | 4 | const char *env_omp_num_threads = getenv("OMP_NUM_THREADS"); | |
| 386 | 4 | int default_num_threads = | |
| 387 | 4 | env_omp_num_threads ? atoi(env_omp_num_threads) | |
| 388 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | : CPU_COUNT(&process_mask); |
| 389 | |||
| 390 | /* Initialize atomic variables */ | ||
| 391 | 4 | DLB_ATOMIC_ST(&num_enabled_free_agents, 0); | |
| 392 | |||
| 393 | /* Initialize CPU Data array */ | ||
| 394 | 4 | cpu_data = malloc(sizeof(cpu_data_t)*system_size); | |
| 395 | |||
| 396 | /* Construct Primary and Worker threads masks */ | ||
| 397 | 4 | CPU_ZERO(&primary_thread_mask); | |
| 398 | 4 | CPU_ZERO(&worker_threads_mask); | |
| 399 | int cpuid; | ||
| 400 | 4 | int encountered_cpus = 0; | |
| 401 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 4 times.
|
36 | for (cpuid=0; cpuid<system_size; ++cpuid) { |
| 402 |
5/6✓ Branch 0 taken 32 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 16 times.
✓ Branch 4 taken 16 times.
✓ Branch 5 taken 16 times.
|
32 | if (CPU_ISSET(cpuid, &process_mask)) { |
| 403 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 12 times.
|
16 | if (++encountered_cpus == 1) { |
| 404 | /* First encountered CPU belongs to the primary thread */ | ||
| 405 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | CPU_SET(cpuid, &primary_thread_mask); |
| 406 | 4 | cpu_data[cpuid].roles = ROLE_PRIMARY; | |
| 407 | 4 | cpu_data[cpuid].state = CPU_STATE_IN_PARALLEL; | |
| 408 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 5 times.
|
12 | } else if (encountered_cpus-1 < default_num_threads) { |
| 409 | /* Infer the worker threads CPUS */ | ||
| 410 |
1/2✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
|
7 | CPU_SET(cpuid, &worker_threads_mask); |
| 411 | 7 | cpu_data[cpuid].roles = ROLE_WORKER; | |
| 412 | 7 | cpu_data[cpuid].state = CPU_STATE_IDLE; | |
| 413 | } | ||
| 414 | } else { | ||
| 415 | 16 | cpu_data[cpuid].roles = ROLE_NONE; | |
| 416 | 16 | cpu_data[cpuid].state = CPU_STATE_UNKNOWN; | |
| 417 | } | ||
| 418 | 32 | cpu_data[cpuid].wanted_for_parallel = false; | |
| 419 | } | ||
| 420 | 4 | memcpy(&active_mask, &primary_thread_mask, sizeof(cpu_set_t)); | |
| 421 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "Primary thread mask: %s", mu_to_str(&primary_thread_mask)); |
| 422 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "Worker threads mask: %s", mu_to_str(&worker_threads_mask)); |
| 423 | |||
| 424 | /* Initialize free agent lists */ | ||
| 425 | 4 | free_agent_lists_init(); | |
| 426 | |||
| 427 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (lewi) { |
| 428 | int err; | ||
| 429 | 4 | err = DLB_CallbackSet(dlb_callback_enable_cpu, (dlb_callback_t)cb_enable_cpu, NULL); | |
| 430 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (err != DLB_SUCCESS) { |
| 431 | ✗ | warning("DLB_CallbackSet enable_cpu: %s", DLB_Strerror(err)); | |
| 432 | } | ||
| 433 | 4 | err = DLB_CallbackSet(dlb_callback_disable_cpu, (dlb_callback_t)cb_disable_cpu, NULL); | |
| 434 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (err != DLB_SUCCESS) { |
| 435 | ✗ | warning("DLB_CallbackSet disable_cpu: %s", DLB_Strerror(err)); | |
| 436 | } | ||
| 437 | 4 | err = DLB_CallbackSet(dlb_callback_set_process_mask, | |
| 438 | (dlb_callback_t)cb_set_process_mask, NULL); | ||
| 439 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (err != DLB_SUCCESS) { |
| 440 | ✗ | warning("DLB_CallbackSet set_process_mask: %s", DLB_Strerror(err)); | |
| 441 | } | ||
| 442 | |||
| 443 | 4 | omptm_free_agents__lend(); | |
| 444 | } | ||
| 445 | 4 | } | |
| 446 | |||
| 447 | 4 | void omptm_free_agents__finalize(void) { | |
| 448 | /* Destrou CPU data */ | ||
| 449 | 4 | free(cpu_data); | |
| 450 | 4 | cpu_data = NULL; | |
| 451 | |||
| 452 | /* Destroy free agent lists and lock */ | ||
| 453 | 4 | free_agent_lists_destroy(); | |
| 454 | 4 | } | |
| 455 | |||
| 456 | |||
| 457 | /*********************************************************************************/ | ||
| 458 | /* Blocking calls specific functions */ | ||
| 459 | /*********************************************************************************/ | ||
| 460 | |||
| 461 | 1 | void omptm_free_agents__IntoBlockingCall(void) { | |
| 462 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (lewi) { |
| 463 | |||
| 464 | /* Don't know what to do if a Blocking Call is invoked inside a | ||
| 465 | * parallel region. We could ignore it, but then we should also ignore | ||
| 466 | * the associated OutOfBlockingCall, and how would we know it? */ | ||
| 467 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | fatal_cond(DLB_ATOMIC_LD(&in_parallel), |
| 468 | "Blocking Call inside a parallel region not supported"); | ||
| 469 | |||
| 470 | /* Warning: the current CPU, hopefully the one assigned to the primary | ||
| 471 | * thread, has already been lent in the appropriate LeWI function | ||
| 472 | * for the IntoBlockingCall event. */ | ||
| 473 | |||
| 474 | cpu_set_t cpus_to_lend; | ||
| 475 | 1 | CPU_ZERO(&cpus_to_lend); | |
| 476 | |||
| 477 | /* Lend all CPUs not being used by workers or free agents */ | ||
| 478 | /* All owned CPUs (except primary) go from IDLE to LENT */ | ||
| 479 | int cpuid; | ||
| 480 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1 times.
|
9 | for (cpuid=0; cpuid<system_size; ++cpuid) { |
| 481 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 1 times.
|
8 | if (cpu_data[cpuid].roles & (ROLE_WORKER | ROLE_FREE_AGENT) |
| 482 |
5/6✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 4 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 4 times.
|
7 | && CPU_ISSET(cpuid, &process_mask) |
| 483 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | && cas_bit((atomic_int*)&cpu_data[cpuid].state, |
| 484 | CPU_STATE_IDLE, CPU_STATE_LENT)) { | ||
| 485 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | CPU_SET(cpuid, &cpus_to_lend); |
| 486 | } | ||
| 487 | } | ||
| 488 | 1 | DLB_LendCpuMask(&cpus_to_lend); | |
| 489 | } | ||
| 490 | 1 | } | |
| 491 | |||
| 492 | |||
| 493 | 1 | void omptm_free_agents__OutOfBlockingCall(void) { | |
| 494 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (lewi) { |
| 495 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (omptool_opts & OMPTOOL_OPTS_LEND) { |
| 496 | /* Do nothing. | ||
| 497 | * Do not reclaim since going out of a blocking call is not | ||
| 498 | * an indication that the CPUs may be needed. */ | ||
| 499 | } | ||
| 500 | else { | ||
| 501 | cpu_set_t cpus_to_reclaim; | ||
| 502 | 1 | CPU_ZERO(&cpus_to_reclaim); | |
| 503 | |||
| 504 | /* All owned CPUs (except primary) go from LENT to RECLAIMED */ | ||
| 505 | int cpuid; | ||
| 506 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1 times.
|
9 | for (cpuid=0; cpuid<system_size; ++cpuid) { |
| 507 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 1 times.
|
8 | if (cpu_data[cpuid].roles & (ROLE_WORKER | ROLE_FREE_AGENT) |
| 508 |
5/6✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 4 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 4 times.
|
7 | && CPU_ISSET(cpuid, &process_mask) |
| 509 |
1/2✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
|
3 | && cas_bit((atomic_int*)&cpu_data[cpuid].state, |
| 510 | CPU_STATE_LENT, CPU_STATE_RECLAIMED)) { | ||
| 511 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | CPU_SET(cpuid, &cpus_to_reclaim); |
| 512 | } | ||
| 513 | } | ||
| 514 | 1 | DLB_ReclaimCpuMask(&cpus_to_reclaim); | |
| 515 | } | ||
| 516 | } | ||
| 517 | 1 | } | |
| 518 | |||
| 519 | |||
| 520 | /*********************************************************************************/ | ||
| 521 | /* OMPT registered callbacks */ | ||
| 522 | /*********************************************************************************/ | ||
| 523 | |||
| 524 | 7 | void omptm_free_agents__thread_begin(ompt_thread_t thread_type) { | |
| 525 | /* Set up thread local spd */ | ||
| 526 | 7 | spd_enter_dlb(thread_spd); | |
| 527 | |||
| 528 |
1/2✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
|
7 | if (thread_type == ompt_thread_other) { |
| 529 | 7 | int thread_id = __kmp_get_free_agent_id(); | |
| 530 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.
|
7 | fatal_cond(thread_id < 0, "free agent id < 0"); |
| 531 | 7 | __free_agent_id = thread_id; | |
| 532 | |||
| 533 | 7 | int cpuid = get_free_agent_binding(thread_id); | |
| 534 | |||
| 535 |
1/2✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
|
7 | if (cpuid >=0) { |
| 536 | /* Set up free agent in free agent lists */ | ||
| 537 | 7 | free_agent_lists_register(thread_id, cpuid); | |
| 538 | |||
| 539 | /* Set up new CPU role */ | ||
| 540 | /* FIXME: not atomic */ | ||
| 541 | 7 | cpu_data[cpuid].roles |= ROLE_FREE_AGENT; | |
| 542 | |||
| 543 | /* TODO: set up cpu state? */ | ||
| 544 | |||
| 545 | /* Free agent threads start in disabled status */ | ||
| 546 | 7 | __kmp_set_free_agent_thread_active_status(thread_id, false); | |
| 547 | |||
| 548 | /* Bind free agent thread to cpuid */ | ||
| 549 | cpu_set_t thread_mask; | ||
| 550 | 7 | CPU_ZERO(&thread_mask); | |
| 551 |
1/2✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
|
7 | CPU_SET(cpuid, &thread_mask); |
| 552 | 7 | int err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &thread_mask); | |
| 553 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 7 times.
|
7 | verbose(VB_OMPT, "Detected free agent thread %d. Pinning to CPU: %d, err: %d", |
| 554 | thread_id, cpuid, err); | ||
| 555 | } else { | ||
| 556 | ✗ | warning("Could not find a suitable CPU bind for free agent thread id: %d", | |
| 557 | thread_id); | ||
| 558 | } | ||
| 559 | } | ||
| 560 | 7 | } | |
| 561 | |||
| 562 | 2 | void omptm_free_agents__parallel_begin(omptool_parallel_data_t *parallel_data) { | |
| 563 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if (parallel_data->level == 1) { |
| 564 | 1 | DLB_ATOMIC_ST(&in_parallel, true); | |
| 565 | |||
| 566 | /* Only if requested_parallelism == process_mask, reclaim all our lent CPUs, if needed */ | ||
| 567 | /* Otherwise, each thread will be responsible for reclaiming themselves */ | ||
| 568 |
1/2✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
|
1 | if (parallel_data->requested_parallelism == (unsigned)CPU_COUNT(&process_mask)) { |
| 569 | 1 | int cpus_to_reclaim = 0; | |
| 570 | 1 | int cpuid_to_reclaim = -1; | |
| 571 | cpu_set_t mask_to_reclaim; | ||
| 572 | int cpuid; | ||
| 573 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1 times.
|
9 | for (cpuid = 0; cpuid<system_size; ++cpuid) { |
| 574 |
5/6✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 4 times.
✓ Branch 3 taken 4 times.
✓ Branch 4 taken 4 times.
✓ Branch 5 taken 4 times.
|
8 | if (CPU_ISSET(cpuid, &process_mask)) { |
| 575 | 4 | DLB_ATOMIC_ST_RLX(&cpu_data[cpuid].wanted_for_parallel, true); | |
| 576 | |||
| 577 | /* It is safe to just make a copy, we'll either call cb_disable_cpu | ||
| 578 | * which will do an atomic exchange or we'll call LeWI */ | ||
| 579 | 4 | cpu_state_t cpu_state = DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].state); | |
| 580 | |||
| 581 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | if (cpu_state & CPU_STATE_FREE_AGENT_ENABLED) { |
| 582 | // Disable free agent thread from this CPU | ||
| 583 | 3 | cb_disable_cpu(cpuid, NULL); | |
| 584 | } | ||
| 585 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | else if (cpu_state & CPU_STATE_LENT) { |
| 586 | // Reclaim this CPU to LeWI | ||
| 587 | ✗ | switch(++cpus_to_reclaim) { | |
| 588 | ✗ | case 1: | |
| 589 | ✗ | cpuid_to_reclaim = cpuid; | |
| 590 | ✗ | break; | |
| 591 | ✗ | case 2: | |
| 592 | ✗ | CPU_ZERO(&mask_to_reclaim); | |
| 593 | ✗ | CPU_SET(cpuid_to_reclaim, &mask_to_reclaim); | |
| 594 | DLB_FALLTHROUGH; | ||
| 595 | ✗ | default: | |
| 596 | ✗ | CPU_SET(cpuid, &mask_to_reclaim); | |
| 597 | } | ||
| 598 | } | ||
| 599 | } | ||
| 600 | } | ||
| 601 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (cpus_to_reclaim == 1) { |
| 602 | ✗ | DLB_ReclaimCpu(cpuid_to_reclaim); | |
| 603 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | } else if (cpus_to_reclaim > 1) { |
| 604 | ✗ | DLB_ReclaimCpuMask(&mask_to_reclaim); | |
| 605 | } | ||
| 606 | } | ||
| 607 | } | ||
| 608 | 2 | } | |
| 609 | |||
| 610 | 2 | void omptm_free_agents__parallel_end(omptool_parallel_data_t *parallel_data) { | |
| 611 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if (parallel_data->level == 1) { |
| 612 | 1 | DLB_ATOMIC_ST(&in_parallel, false); | |
| 613 | /* All workers in parallel go to IDLE */ | ||
| 614 | int cpuid; | ||
| 615 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1 times.
|
9 | for (cpuid=0; cpuid<system_size; ++cpuid) { |
| 616 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 5 times.
|
8 | if (cpu_data[cpuid].roles & ROLE_WORKER) { |
| 617 | 3 | cas_bit((atomic_int*)&cpu_data[cpuid].state, | |
| 618 | CPU_STATE_IN_PARALLEL, CPU_STATE_IDLE); | ||
| 619 | 3 | DLB_ATOMIC_ST_RLX(&cpu_data[cpuid].wanted_for_parallel, false); | |
| 620 | } | ||
| 621 | } | ||
| 622 | |||
| 623 | 1 | omptm_free_agents__lend(); | |
| 624 | } | ||
| 625 | 2 | } | |
| 626 | |||
| 627 | 5 | void omptm_free_agents__into_parallel_function( | |
| 628 | omptool_parallel_data_t *parallel_data, unsigned int index) { | ||
| 629 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1 times.
|
5 | if (parallel_data->level == 1) { |
| 630 | /* Obtain CPU id */ | ||
| 631 | /* FIXME: actually, we should test CPU binding every time we enter | ||
| 632 | * here, since the RT is free to rebind threads, but we need | ||
| 633 | * __worker_binding for testing */ | ||
| 634 | 4 | int cpuid = __worker_binding; | |
| 635 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (cpuid == -1) { |
| 636 | cpu_set_t thread_mask; | ||
| 637 | ✗ | pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &thread_mask); | |
| 638 | ✗ | cpuid = mu_get_single_cpu(&thread_mask); | |
| 639 | ✗ | fatal_cond(cpuid == -1, | |
| 640 | "DLB does not currently support thread binding to more than one CPU," | ||
| 641 | " current CPU affinity mask for thread %d: %s." | ||
| 642 | " Please, define OMP_PLACES=threads and run again.", | ||
| 643 | index, mu_to_str(&thread_mask)); | ||
| 644 | ✗ | __worker_binding = cpuid; | |
| 645 | } | ||
| 646 | |||
| 647 | |||
| 648 | /* Reclaim CPU if needed and set the appropriate state */ | ||
| 649 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].state) & CPU_STATE_LENT) { |
| 650 | ✗ | DLB_ReclaimCpu(cpuid); | |
| 651 | } | ||
| 652 | 4 | set_bit((atomic_int*)&cpu_data[cpuid].state, CPU_STATE_IN_PARALLEL); | |
| 653 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "CPU %d starting an implicit task", cpuid); |
| 654 | } | ||
| 655 | 5 | } | |
| 656 | |||
| 657 | ✗ | void omptm_free_agents__task_create(void) { | |
| 658 | /* Increment the amount of pending tasks */ | ||
| 659 | ✗ | DLB_ATOMIC_ADD(&pending_tasks, 1); | |
| 660 | |||
| 661 | /* For now, let's assume that we always want to increase the number | ||
| 662 | * of active threads whenever a task is created | ||
| 663 | */ | ||
| 664 | ✗ | acquire_one_free_agent(); | |
| 665 | } | ||
| 666 | |||
| 667 | 8 | void omptm_free_agents__task_complete(void) { | |
| 668 |
1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
|
8 | if (__free_agent_id >= 0) { |
| 669 | 8 | int cpuid = get_free_agent_cpuid_by_id(__free_agent_id); | |
| 670 | |||
| 671 | /* Disable free agent thread if this CPU is needed for a worker thread */ | ||
| 672 |
1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
|
8 | if (DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].state) & CPU_STATE_IN_PARALLEL |
| 673 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
|
8 | || DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].wanted_for_parallel)) { |
| 674 | ✗ | cb_disable_cpu(cpuid, NULL); | |
| 675 | } | ||
| 676 | |||
| 677 | /* Return CPU if reclaimed */ | ||
| 678 |
2/2✓ Branch 1 taken 3 times.
✓ Branch 2 taken 5 times.
|
8 | else if (DLB_CheckCpuAvailability(cpuid) == DLB_ERR_PERM) { |
| 679 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
|
3 | if (DLB_ReturnCpu(cpuid) == DLB_ERR_PERM) { |
| 680 | ✗ | cb_disable_cpu(cpuid, NULL); | |
| 681 | } | ||
| 682 | } | ||
| 683 | |||
| 684 | /* Lend CPU if no more tasks */ | ||
| 685 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1 times.
|
5 | else if (DLB_ATOMIC_LD(&pending_tasks) == 0) { |
| 686 | 4 | cb_disable_cpu(cpuid, NULL); | |
| 687 | |||
| 688 | /* Lend only free agents not part of the process mask */ | ||
| 689 | /* or, lend anyway if LEND policy */ | ||
| 690 |
5/6✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✓ Branch 3 taken 1 times.
✓ Branch 4 taken 3 times.
✓ Branch 5 taken 1 times.
|
4 | if (!CPU_ISSET(cpuid, &process_mask) |
| 691 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | || omptool_opts & OMPTOOL_OPTS_LEND) { |
| 692 | 1 | DLB_LendCpu(cpuid); | |
| 693 | } | ||
| 694 | } | ||
| 695 | } | ||
| 696 | 8 | } | |
| 697 | |||
| 698 | ✗ | void omptm_free_agents__task_switch(void) { | |
| 699 | ✗ | if (DLB_ATOMIC_SUB(&pending_tasks, 1) > 1) { | |
| 700 | ✗ | acquire_one_free_agent(); | |
| 701 | } | ||
| 702 | } | ||
| 703 | |||
| 704 | |||
| 705 | /*********************************************************************************/ | ||
| 706 | /* Vtable for handling omptool events */ | ||
| 707 | /*********************************************************************************/ | ||
| 708 | |||
| 709 | const omptool_event_funcs_t omptm_free_agents_events_vtable = (const omptool_event_funcs_t) { | ||
| 710 | .init = omptm_free_agents__init, | ||
| 711 | .finalize = omptm_free_agents__finalize, | ||
| 712 | .into_mpi = omptm_free_agents__IntoBlockingCall, | ||
| 713 | .outof_mpi = omptm_free_agents__OutOfBlockingCall, | ||
| 714 | .lend_from_api = NULL, | ||
| 715 | .thread_begin = omptm_free_agents__thread_begin, | ||
| 716 | .thread_end = NULL, | ||
| 717 | .thread_role_shift = NULL, | ||
| 718 | .parallel_begin = omptm_free_agents__parallel_begin, | ||
| 719 | .parallel_end = omptm_free_agents__parallel_end, | ||
| 720 | .into_parallel_function = omptm_free_agents__into_parallel_function, | ||
| 721 | .outof_parallel_function = NULL, | ||
| 722 | .into_parallel_implicit_barrier = NULL, | ||
| 723 | .task_create = omptm_free_agents__task_create, | ||
| 724 | .task_complete = omptm_free_agents__task_complete, | ||
| 725 | .task_switch = omptm_free_agents__task_switch, | ||
| 726 | }; | ||
| 727 | |||
| 728 | |||
| 729 | /*********************************************************************************/ | ||
| 730 | /* Functions for testing purposes */ | ||
| 731 | /*********************************************************************************/ | ||
| 732 | |||
| 733 | 4 | void omptm_free_agents_testing__set_worker_binding(int cpuid) { | |
| 734 | 4 | __worker_binding = cpuid; | |
| 735 | 4 | } | |
| 736 | |||
| 737 | 8 | void omptm_free_agents_testing__set_free_agent_id(int id) { | |
| 738 | 8 | __free_agent_id = id; | |
| 739 | 8 | } | |
| 740 | |||
| 741 | 5 | void omptm_free_agents_testing__set_pending_tasks(unsigned int num_tasks) { | |
| 742 | 5 | pending_tasks = num_tasks; | |
| 743 | 5 | } | |
| 744 | |||
| 745 | 16 | void omptm_free_agents_testing__acquire_one_free_agent(void) { | |
| 746 | 16 | acquire_one_free_agent(); | |
| 747 | 16 | } | |
| 748 | |||
| 749 | 3 | bool omptm_free_agents_testing__in_parallel(void) { | |
| 750 | 3 | return DLB_ATOMIC_LD(&in_parallel); | |
| 751 | } | ||
| 752 | |||
| 753 | 4 | bool omptm_free_agents_testing__check_cpu_in_parallel(int cpuid) { | |
| 754 | 4 | return DLB_ATOMIC_LD(&cpu_data[cpuid].state) & CPU_STATE_IN_PARALLEL; | |
| 755 | } | ||
| 756 | |||
| 757 | 6 | bool omptm_free_agents_testing__check_cpu_idle(int cpuid) { | |
| 758 | 6 | return DLB_ATOMIC_LD(&cpu_data[cpuid].state) & CPU_STATE_IDLE; | |
| 759 | } | ||
| 760 | |||
| 761 | 3 | bool omptm_free_agents_testing__check_cpu_free_agent_enabled(int cpuid) { | |
| 762 | 3 | return DLB_ATOMIC_LD(&cpu_data[cpuid].state) & CPU_STATE_FREE_AGENT_ENABLED; | |
| 763 | } | ||
| 764 | |||
| 765 | 18 | int omptm_free_agents_testing__get_num_enabled_free_agents(void) { | |
| 766 | 18 | return DLB_ATOMIC_LD(&num_enabled_free_agents); | |
| 767 | } | ||
| 768 | |||
| 769 | 7 | int omptm_free_agents_testing__get_free_agent_cpu(int thread_id) { | |
| 770 | 7 | return free_agent_cpu_list[thread_id]; | |
| 771 | } | ||
| 772 | |||
| 773 | 31 | int omptm_free_agents_testing__get_free_agent_binding(int thread_id) { | |
| 774 | 31 | return get_free_agent_binding(thread_id); | |
| 775 | } | ||
| 776 | |||
| 777 | 54 | int omptm_free_agents_testing__get_free_agent_id_by_cpuid(int cpuid) { | |
| 778 | 54 | return get_free_agent_id_by_cpuid(cpuid); | |
| 779 | } | ||
| 780 | |||
| 781 | 7 | int omptm_free_agents_testing__get_free_agent_cpuid_by_id(int thread_id) { | |
| 782 | 7 | return get_free_agent_cpuid_by_id(thread_id); | |
| 783 | } | ||
| 784 |