| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /*********************************************************************************/ | ||
| 2 | /* Copyright 2009-2024 Barcelona Supercomputing Center */ | ||
| 3 | /* */ | ||
| 4 | /* This file is part of the DLB library. */ | ||
| 5 | /* */ | ||
| 6 | /* DLB is free software: you can redistribute it and/or modify */ | ||
| 7 | /* it under the terms of the GNU Lesser General Public License as published by */ | ||
| 8 | /* the Free Software Foundation, either version 3 of the License, or */ | ||
| 9 | /* (at your option) any later version. */ | ||
| 10 | /* */ | ||
| 11 | /* DLB is distributed in the hope that it will be useful, */ | ||
| 12 | /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ | ||
| 13 | /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ | ||
| 14 | /* GNU Lesser General Public License for more details. */ | ||
| 15 | /* */ | ||
| 16 | /* You should have received a copy of the GNU Lesser General Public License */ | ||
| 17 | /* along with DLB. If not, see <https://www.gnu.org/licenses/>. */ | ||
| 18 | /*********************************************************************************/ | ||
| 19 | |||
| 20 | #include "LB_numThreads/omptm_role_shift.h" | ||
| 21 | |||
| 22 | #include "apis/dlb.h" | ||
| 23 | #include "support/atomic.h" | ||
| 24 | #include "support/debug.h" | ||
| 25 | #include "support/mask_utils.h" | ||
| 26 | #include "support/tracing.h" | ||
| 27 | #include "LB_comm/shmem_cpuinfo.h" | ||
| 28 | #include "LB_comm/shmem_procinfo.h" | ||
| 29 | #include "LB_core/spd.h" | ||
| 30 | #include "LB_numThreads/omptool.h" | ||
| 31 | |||
| 32 | #include <sched.h> | ||
| 33 | #include <unistd.h> | ||
| 34 | #include <string.h> | ||
| 35 | #include <pthread.h> | ||
| 36 | #include <inttypes.h> | ||
| 37 | |||
| 38 | /* OpenMP symbols */ | ||
| 39 | int __kmp_get_num_threads_role(ompt_role_t r) __attribute__((weak)); | ||
| 40 | int __kmp_get_thread_roles(int tid, ompt_role_t *r) __attribute((weak)); | ||
| 41 | void __kmp_set_thread_roles1(int how_many, ompt_role_t r) __attribute((weak)); | ||
| 42 | void __kmp_set_thread_roles2(int tid, ompt_role_t r) __attribute((weak)); | ||
| 43 | int __kmp_get_thread_id(void) __attribute((weak)); | ||
| 44 | |||
| 45 | /* Enum for ompt_data_t *parallel_data to detect level 1 (non nested) parallel | ||
| 46 | * regions */ | ||
| 47 | enum { | ||
| 48 | PARALLEL_UNSET, | ||
| 49 | PARALLEL_LEVEL_1, | ||
| 50 | }; | ||
| 51 | |||
| 52 | /*** Static variables ***/ | ||
| 53 | |||
| 54 | static bool lewi = false; | ||
| 55 | static pid_t pid; | ||
| 56 | static omptool_opts_t omptool_opts; | ||
| 57 | static int system_size; | ||
| 58 | static int default_num_threads; | ||
| 59 | static atomic_int num_free_agents = 0; | ||
| 60 | |||
| 61 | /* Masks */ | ||
| 62 | static cpu_set_t active_mask; | ||
| 63 | static cpu_set_t process_mask; | ||
| 64 | static cpu_set_t primary_thread_mask; | ||
| 65 | |||
| 66 | /* Atomic variables */ | ||
| 67 | static atomic_bool DLB_ALIGN_CACHE in_parallel = false; | ||
| 68 | static atomic_int DLB_ALIGN_CACHE current_parallel_size = 0; | ||
| 69 | static atomic_uint DLB_ALIGN_CACHE pending_tasks = 0; | ||
| 70 | |||
| 71 | /* Thread local */ | ||
| 72 | __thread int global_tid = -1; //References to the thread id of the kmp runtime | ||
| 73 | |||
| 74 | /*********************************************************************************/ | ||
| 75 | /* CPU Data structures and helper atomic flags functions */ | ||
| 76 | /*********************************************************************************/ | ||
| 77 | |||
| 78 | /* Current state of the CPU (what is being used for) */ | ||
| 79 | typedef enum CPUStatus { | ||
| 80 | OWN = 0, | ||
| 81 | UNKNOWN = 1 << 0, | ||
| 82 | LENT = 1 << 1, | ||
| 83 | BORROWED = 1 << 2 | ||
| 84 | } cpu_status_t; | ||
| 85 | |||
| 86 | typedef struct DLB_ALIGN_CACHE CPU_Data { | ||
| 87 | _Atomic(cpu_status_t) ownership; | ||
| 88 | bool fa; | ||
| 89 | } cpu_data_t; | ||
| 90 | |||
| 91 | static atomic_int registered_threads = 0; | ||
| 92 | static cpu_data_t *cpu_data = NULL; | ||
| 93 | static int *cpu_by_id = NULL; | ||
| 94 | |||
| 95 | 48 | static int get_id_from_cpu(int cpuid){ | |
| 96 | int i; | ||
| 97 | 48 | int nth = DLB_ATOMIC_LD_RLX(®istered_threads); | |
| 98 |
2/2✓ Branch 0 taken 172 times.
✓ Branch 1 taken 20 times.
|
192 | for(i = 0; i < nth; i++){ |
| 99 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 144 times.
|
172 | if(cpu_by_id[i] == cpuid) |
| 100 | 28 | return i; | |
| 101 | } | ||
| 102 | 20 | return -1; | |
| 103 | } | ||
| 104 | |||
| 105 | /*********************************************************************************/ | ||
| 106 | /* DLB callbacks */ | ||
| 107 | /*********************************************************************************/ | ||
| 108 | |||
| 109 | 9 | static void cb_enable_cpu(int cpuid, void *arg) { | |
| 110 | 9 | cas_bit((atomic_int *)&cpu_data[cpuid].ownership, LENT, OWN); | |
| 111 | 9 | cas_bit((atomic_int *)&cpu_data[cpuid].ownership, UNKNOWN, BORROWED); | |
| 112 | 9 | int pos = get_id_from_cpu(cpuid); | |
| 113 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 4 times.
|
9 | if(pos >= 0){ //A thread was running here previously |
| 114 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 2 times.
|
5 | if(cpu_data[cpuid].fa){//We had a FA here. Call the api to wake it up and mark the CPU as free here |
| 115 | 3 | DLB_ATOMIC_ADD(&num_free_agents, 1); | |
| 116 | 3 | __kmp_set_thread_roles2(pos, OMP_ROLE_FREE_AGENT); | |
| 117 | } | ||
| 118 | } | ||
| 119 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | else if(pos == -1){//ask for a new FA |
| 120 | 4 | cpu_data[cpuid].fa = false; | |
| 121 | 4 | DLB_ATOMIC_ADD(&num_free_agents, 1); | |
| 122 | 4 | __kmp_set_thread_roles2(system_size, OMP_ROLE_FREE_AGENT); | |
| 123 | 4 | cpu_by_id[DLB_ATOMIC_ADD(®istered_threads, 1)] = cpuid; | |
| 124 | } | ||
| 125 | else{ | ||
| 126 | ✗ | fatal("Enable cpu with a wrong pos"); | |
| 127 | } | ||
| 128 | 9 | } | |
| 129 | |||
| 130 | 7 | static void cb_disable_cpu(int cpuid, void *arg) { | |
| 131 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 3 times.
|
7 | if((DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].ownership) != OWN && |
| 132 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].ownership) != BORROWED)){ |
| 133 | //CPU already disabled, just skip it | ||
| 134 | ✗ | return; | |
| 135 | } | ||
| 136 | 7 | cas_bit((atomic_int *)&cpu_data[cpuid].ownership, OWN, LENT); | |
| 137 | 7 | cas_bit((atomic_int *)&cpu_data[cpuid].ownership, BORROWED, UNKNOWN); | |
| 138 |
1/2✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
|
7 | if(cpu_data[cpuid].fa){ |
| 139 | 7 | int tid = get_id_from_cpu(cpuid); | |
| 140 |
1/2✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
|
7 | if(tid >= 0){ |
| 141 | 7 | DLB_ATOMIC_SUB(&num_free_agents, 1); | |
| 142 | 7 | __kmp_set_thread_roles2(tid, OMP_ROLE_NONE); | |
| 143 | } | ||
| 144 | } | ||
| 145 | } | ||
| 146 | |||
| 147 | ✗ | static void cb_set_process_mask(const cpu_set_t *mask, void *arg) { | |
| 148 | } | ||
| 149 | |||
| 150 | /*********************************************************************************/ | ||
| 151 | /* Init & Finalize module */ | ||
| 152 | /*********************************************************************************/ | ||
| 153 | |||
| 154 | 4 | void omptm_role_shift__init(pid_t process_id, const options_t *options) { | |
| 155 | /* Initialize static variables */ | ||
| 156 | 4 | system_size = mu_get_system_size(); | |
| 157 | 4 | lewi = options->lewi; | |
| 158 | 4 | omptool_opts = options->lewi_ompt; | |
| 159 | 4 | pid = process_id; | |
| 160 | 4 | num_free_agents = __kmp_get_num_threads_role(OMP_ROLE_FREE_AGENT); | |
| 161 | 4 | shmem_procinfo__getprocessmask(pid, &process_mask, DLB_DROM_FLAGS_NONE); | |
| 162 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "Process mask: %s", mu_to_str(&process_mask)); |
| 163 | |||
| 164 | // omp_get_max_threads cannot be called here, try using the env. var. | ||
| 165 | 4 | const char *env_omp_num_threads = getenv("OMP_NUM_THREADS"); | |
| 166 | 4 | default_num_threads = env_omp_num_threads | |
| 167 | 4 | ? atoi(env_omp_num_threads) | |
| 168 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | : CPU_COUNT(&process_mask); |
| 169 | 4 | cpu_data = malloc(sizeof(cpu_data_t)*system_size); | |
| 170 | 4 | cpu_by_id = malloc(sizeof(int)*system_size); | |
| 171 | |||
| 172 | 4 | CPU_ZERO(&primary_thread_mask); | |
| 173 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | registered_threads = (default_num_threads > num_free_agents) ? default_num_threads : num_free_agents; |
| 174 | |||
| 175 | 4 | int encountered_cpus = 0; | |
| 176 | int i; | ||
| 177 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 4 times.
|
36 | for(i = 0; i < system_size; i++){ |
| 178 | 32 | cpu_by_id[i] = -1; | |
| 179 | } | ||
| 180 | //Building of the cpu_data structure. It holds info of the different CPUs from the node of the process | ||
| 181 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 4 times.
|
36 | for(i = 0; i < system_size; i++){ |
| 182 |
5/6✓ Branch 0 taken 32 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 16 times.
✓ Branch 4 taken 16 times.
✓ Branch 5 taken 16 times.
|
32 | if(CPU_ISSET(i, &process_mask)){ |
| 183 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 12 times.
|
16 | if(++encountered_cpus == 1){ |
| 184 | //First encountered CPU belongs to the primary thread | ||
| 185 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | CPU_SET(i, &primary_thread_mask); |
| 186 | 4 | cpu_by_id[encountered_cpus - 1] = i; | |
| 187 | } | ||
| 188 | 16 | cpu_data[i].ownership = OWN; | |
| 189 | } | ||
| 190 | else{ | ||
| 191 | 16 | cpu_data[i].ownership = UNKNOWN; | |
| 192 | } | ||
| 193 | 32 | cpu_data[i].fa = false; | |
| 194 | } | ||
| 195 | 4 | memcpy(&active_mask, &primary_thread_mask, sizeof(cpu_set_t)); | |
| 196 | |||
| 197 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (lewi) { |
| 198 | int err; | ||
| 199 | 4 | err = DLB_CallbackSet(dlb_callback_enable_cpu, (dlb_callback_t)cb_enable_cpu, NULL); | |
| 200 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (err != DLB_SUCCESS) { |
| 201 | ✗ | warning("DLB_CallbackSet enable_cpu: %s", DLB_Strerror(err)); | |
| 202 | } | ||
| 203 | 4 | err = DLB_CallbackSet(dlb_callback_disable_cpu, (dlb_callback_t)cb_disable_cpu, NULL); | |
| 204 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (err != DLB_SUCCESS) { |
| 205 | ✗ | warning("DLB_CallbackSet disable_cpu: %s", DLB_Strerror(err)); | |
| 206 | } | ||
| 207 | 4 | err = DLB_CallbackSet(dlb_callback_set_process_mask, | |
| 208 | (dlb_callback_t)cb_set_process_mask, NULL); | ||
| 209 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (err != DLB_SUCCESS) { |
| 210 | ✗ | warning("DLB_CallbackSet set_process_mask: %s", DLB_Strerror(err)); | |
| 211 | } | ||
| 212 | } | ||
| 213 | 4 | } | |
| 214 | |||
| 215 | 4 | void omptm_role_shift__finalize(void) { | |
| 216 | 4 | free(cpu_data); | |
| 217 | 4 | cpu_data = NULL; | |
| 218 | 4 | free(cpu_by_id); | |
| 219 | 4 | cpu_by_id = NULL; | |
| 220 | 4 | } | |
| 221 | |||
| 222 | /*********************************************************************************/ | ||
| 223 | /* Blocking calls specific functions */ | ||
| 224 | /*********************************************************************************/ | ||
| 225 | |||
| 226 | /*TODO: what happens when executing in "ompss" mode*/ | ||
| 227 | 1 | void omptm_role_shift__IntoBlockingCall(void) { | |
| 228 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (lewi) { |
| 229 | /* Don't know what to do if a Blocking Call is invoked inside a | ||
| 230 | * parallel region. We could ignore it, but then we should also ignore | ||
| 231 | * the associated OutOfBlockingCall, and how would we know it? */ | ||
| 232 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | fatal_cond(DLB_ATOMIC_LD(&in_parallel), |
| 233 | "Blocking Call inside a parallel region not supported"); | ||
| 234 | cpu_set_t cpus_to_lend; | ||
| 235 | 1 | CPU_ZERO(&cpus_to_lend); | |
| 236 | int i; | ||
| 237 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1 times.
|
9 | for(i = 0; i < system_size; i++){ |
| 238 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if(DLB_ATOMIC_LD_RLX(&cpu_data[i].ownership) == OWN){ |
| 239 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 3 times.
|
4 | if(i == cpu_by_id[global_tid]){ |
| 240 | //Just change the status for the thread calling MPI | ||
| 241 | 1 | DLB_ATOMIC_ST_RLX(&cpu_data[i].ownership, LENT); | |
| 242 | } | ||
| 243 | else{ | ||
| 244 | 3 | cb_disable_cpu(i, NULL); | |
| 245 | } | ||
| 246 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | CPU_SET(i, &cpus_to_lend); |
| 247 | } | ||
| 248 |
1/8✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
4 | else if(DLB_ATOMIC_LD_RLX(&cpu_data[i].ownership) == BORROWED && CPU_ISSET(i, &process_mask)){ |
| 249 | ✗ | DLB_ATOMIC_ST_RLX(&cpu_data[i].ownership, UNKNOWN); | |
| 250 | ✗ | CPU_SET(i, &cpus_to_lend); | |
| 251 | } | ||
| 252 | } | ||
| 253 | 1 | DLB_LendCpuMask(&cpus_to_lend); | |
| 254 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | verbose(VB_OMPT, "IntoBlockingCall - lending all"); |
| 255 | } | ||
| 256 | 1 | } | |
| 257 | |||
| 258 | |||
| 259 | 1 | void omptm_role_shift__OutOfBlockingCall(void) { | |
| 260 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (lewi) { |
| 261 | 1 | cb_enable_cpu(cpu_by_id[global_tid], NULL); | |
| 262 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (omptool_opts & OMPTOOL_OPTS_LEND) { |
| 263 | /* Do nothing. | ||
| 264 | * Do not reclaim since going out of a blocking call is not | ||
| 265 | * an indication that the CPUs may be needed. | ||
| 266 | * OMPTOOL_OPTS_AGGRESSIVE executes this if too. */ | ||
| 267 | } | ||
| 268 | else { | ||
| 269 | 1 | DLB_Reclaim(); | |
| 270 | } | ||
| 271 | } | ||
| 272 | 1 | } | |
| 273 | |||
| 274 | |||
| 275 | /*********************************************************************************/ | ||
| 276 | /* OMPT registered callbacks */ | ||
| 277 | /*********************************************************************************/ | ||
| 278 | |||
| 279 | 3 | void omptm_role_shift__thread_begin(ompt_thread_t thread_type) { | |
| 280 | /* Set up thread local spd */ | ||
| 281 | 3 | spd_enter_dlb(thread_spd); | |
| 282 | |||
| 283 | 3 | global_tid = __kmp_get_thread_id(); | |
| 284 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | fatal_cond(registered_threads > system_size, |
| 285 | "DLB created more threads than existing CPUs in the node"); | ||
| 286 | |||
| 287 | 3 | int cpuid = cpu_by_id[global_tid]; | |
| 288 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | if(thread_type == ompt_thread_other){ //other => free agent |
| 289 | cpu_set_t thread_mask; | ||
| 290 | 3 | CPU_ZERO(&thread_mask); | |
| 291 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
3 | if(cpuid >= 0 && (DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].ownership) != OWN && |
| 292 | ✗ | DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].ownership) != LENT)){ | |
| 293 | //Bind the thread to the pre-assigned CPU and return the CPU after that if necessary | ||
| 294 | ✗ | cpu_data[cpuid].fa = true; | |
| 295 | ✗ | cpu_by_id[global_tid] = cpuid; | |
| 296 | ✗ | CPU_SET(cpuid, &thread_mask); | |
| 297 | ✗ | pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &thread_mask); | |
| 298 | ✗ | verbose(VB_OMPT, "Binding a free agent to CPU %d", cpuid); | |
| 299 | instrument_event(REBIND_EVENT, cpuid+1, EVENT_BEGIN); | ||
| 300 | ✗ | if (lewi) { | |
| 301 | ✗ | if (DLB_CheckCpuAvailability(cpuid) == DLB_ERR_PERM) { | |
| 302 | ✗ | if (DLB_ReturnCpu(cpuid) == DLB_ERR_PERM) { | |
| 303 | ✗ | cb_disable_cpu(cpuid, NULL); | |
| 304 | } | ||
| 305 | } | ||
| 306 | ✗ | else if (DLB_ATOMIC_LD(&pending_tasks) == 0) { | |
| 307 | ✗ | cb_disable_cpu(cpuid, NULL); | |
| 308 | /* TODO: only lend free agents not part of the process mask */ | ||
| 309 | /* or, depending on the ompt dlb policy */ | ||
| 310 | ✗ | if (!CPU_ISSET(cpuid, &process_mask)) { | |
| 311 | ✗ | DLB_LendCpu(cpuid); | |
| 312 | } | ||
| 313 | } | ||
| 314 | } | ||
| 315 | } | ||
| 316 | else{ | ||
| 317 | 3 | pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &thread_mask); | |
| 318 | 3 | cpuid = mu_get_single_cpu(&thread_mask); | |
| 319 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | if(cpuid != -1){ |
| 320 | ✗ | cpu_by_id[global_tid] = cpuid; | |
| 321 | ✗ | cpu_data[cpuid].fa = true; | |
| 322 | } | ||
| 323 | else | ||
| 324 | 3 | warning("Started a free agent with multiple CPUs in the affinity mask"); | |
| 325 | } | ||
| 326 | } | ||
| 327 | 3 | } | |
| 328 | |||
| 329 | 2 | void omptm_role_shift__thread_role_shift(ompt_data_t *thread_data, | |
| 330 | ompt_role_t prior_role, | ||
| 331 | ompt_role_t next_role){ | ||
| 332 | 2 | int cpuid = cpu_by_id[global_tid]; | |
| 333 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if(prior_role == OMP_ROLE_FREE_AGENT){ |
| 334 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if(next_role == OMP_ROLE_COMMUNICATOR) return; //Don't supported now |
| 335 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | verbose(VB_OMPT, "Free agent %d changing the role to NONE", global_tid); |
| 336 | } | ||
| 337 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | else if(prior_role == OMP_ROLE_NONE){ |
| 338 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if(next_role == OMP_ROLE_COMMUNICATOR) return; //Don't supported now |
| 339 |
3/6✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
|
1 | if(CPU_ISSET(cpuid, &process_mask)) //One of the initial/worker threads. Don't need to check for own CPUs. |
| 340 | 1 | return; | |
| 341 | ✗ | if (lewi) { | |
| 342 | ✗ | if (DLB_CheckCpuAvailability(cpuid) == DLB_ERR_PERM) { | |
| 343 | ✗ | if (DLB_ReturnCpu(cpuid) == DLB_ERR_PERM) { | |
| 344 | ✗ | cb_disable_cpu(cpuid, NULL); | |
| 345 | } | ||
| 346 | } | ||
| 347 | ✗ | else if (DLB_ATOMIC_LD(&pending_tasks) == 0 | |
| 348 | ✗ | && (DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].ownership) == BORROWED | |
| 349 | ✗ | || omptool_opts & OMPTOOL_OPTS_LEND)) { | |
| 350 | ✗ | cb_disable_cpu(cpuid, NULL); | |
| 351 | ✗ | DLB_LendCpu(cpuid); | |
| 352 | } | ||
| 353 | } | ||
| 354 | } | ||
| 355 | } | ||
| 356 | |||
| 357 | |||
| 358 | 3 | void omptm_role_shift__parallel_begin(omptool_parallel_data_t *parallel_data) { | |
| 359 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | if (parallel_data->level == 1) { |
| 360 | 2 | DLB_ATOMIC_ST(&in_parallel, true); | |
| 361 | 2 | DLB_ATOMIC_ST(¤t_parallel_size, parallel_data->requested_parallelism); | |
| 362 | } | ||
| 363 | 3 | } | |
| 364 | |||
| 365 | 3 | void omptm_role_shift__parallel_end(omptool_parallel_data_t *parallel_data) { | |
| 366 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | if (parallel_data->level == 1) { |
| 367 | 2 | DLB_ATOMIC_ST(&in_parallel, false); | |
| 368 | } | ||
| 369 | 3 | } | |
| 370 | |||
| 371 | 6 | void omptm_role_shift__task_create(void) { | |
| 372 | /* Increment the amount of pending tasks */ | ||
| 373 | 6 | DLB_ATOMIC_ADD(&pending_tasks, 1); | |
| 374 | |||
| 375 | /* For now, let's assume that we always want to increase the number | ||
| 376 | * of active threads whenever a task is created | ||
| 377 | */ | ||
| 378 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | if (lewi) { |
| 379 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
|
6 | if(omptool_opts == OMPTOOL_OPTS_LEND) { |
| 380 | ✗ | DLB_BorrowCpus(1); | |
| 381 | } | ||
| 382 | else { | ||
| 383 | 6 | DLB_AcquireCpus(1); | |
| 384 | } | ||
| 385 | } | ||
| 386 | 6 | } | |
| 387 | |||
| 388 | 9 | void omptm_role_shift__task_complete(void) { | |
| 389 | 9 | int cpuid = cpu_by_id[global_tid]; | |
| 390 |
4/6✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 9 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 8 times.
✓ Branch 5 taken 1 times.
|
9 | if (lewi && cpuid >= 0 && cpu_data[cpuid].fa) { |
| 391 | /* Return CPU if reclaimed */ | ||
| 392 |
2/2✓ Branch 1 taken 3 times.
✓ Branch 2 taken 5 times.
|
8 | if (DLB_CheckCpuAvailability(cpuid) == DLB_ERR_PERM) { |
| 393 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | if(DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].ownership) == UNKNOWN) { |
| 394 | /* Previously we have returned the CPU, but the free agent | ||
| 395 | * didn't do a role shift event to be rescheduled This can | ||
| 396 | * happen when the thread receives a change from NONE to FA | ||
| 397 | * just after a FA to NONE change. In that case, the second | ||
| 398 | * shift cancels the first one and the thread doesn't emit a | ||
| 399 | * callback. Just deactivate the thread. */ | ||
| 400 | ✗ | DLB_ATOMIC_SUB(&num_free_agents, 1); | |
| 401 | ✗ | __kmp_set_thread_roles2(global_tid, OMP_ROLE_NONE); | |
| 402 | } | ||
| 403 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
|
3 | else if (DLB_ReturnCpu(cpuid) == DLB_ERR_PERM) { |
| 404 | ✗ | cb_disable_cpu(cpuid, NULL); | |
| 405 | } | ||
| 406 | } | ||
| 407 | /* Lend CPU if no more tasks and CPU is borrowed, or policy is LEND */ | ||
| 408 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1 times.
|
5 | else if (DLB_ATOMIC_LD(&pending_tasks) == 0 |
| 409 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | && (DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].ownership) == BORROWED |
| 410 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | || omptool_opts & OMPTOOL_OPTS_LEND)) { |
| 411 | 1 | cb_disable_cpu(cpuid, NULL); | |
| 412 | 1 | DLB_LendCpu(cpuid); | |
| 413 | } | ||
| 414 | } | ||
| 415 | 9 | } | |
| 416 | |||
| 417 | 6 | void omptm_role_shift__task_switch(void) { | |
| 418 |
3/4✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 1 times.
|
6 | if (lewi && DLB_ATOMIC_SUB(&pending_tasks, 1) > 1) { |
| 419 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | if(omptool_opts == OMPTOOL_OPTS_LEND) { |
| 420 | ✗ | DLB_BorrowCpus(1); | |
| 421 | } else { | ||
| 422 | 5 | DLB_AcquireCpus(1); | |
| 423 | } | ||
| 424 | } | ||
| 425 | 6 | } | |
| 426 | |||
| 427 | |||
| 428 | /*********************************************************************************/ | ||
| 429 | /* Vtable for handling omptool events */ | ||
| 430 | /*********************************************************************************/ | ||
| 431 | |||
| 432 | const omptool_event_funcs_t omptm_role_shift_events_vtable = (const omptool_event_funcs_t) { | ||
| 433 | .init = omptm_role_shift__init, | ||
| 434 | .finalize = omptm_role_shift__finalize, | ||
| 435 | .into_mpi = omptm_role_shift__IntoBlockingCall, | ||
| 436 | .outof_mpi = omptm_role_shift__OutOfBlockingCall, | ||
| 437 | .lend_from_api = NULL, | ||
| 438 | .thread_begin = omptm_role_shift__thread_begin, | ||
| 439 | .thread_end = NULL, | ||
| 440 | .thread_role_shift = omptm_role_shift__thread_role_shift, | ||
| 441 | .parallel_begin = omptm_role_shift__parallel_begin, | ||
| 442 | .parallel_end = omptm_role_shift__parallel_end, | ||
| 443 | .into_parallel_function = NULL, | ||
| 444 | .outof_parallel_function = NULL, | ||
| 445 | .into_parallel_implicit_barrier = NULL, | ||
| 446 | .task_create = omptm_role_shift__task_create, | ||
| 447 | .task_complete = omptm_role_shift__task_complete, | ||
| 448 | .task_switch = omptm_role_shift__task_switch, | ||
| 449 | }; | ||
| 450 | |||
| 451 | |||
| 452 | /*********************************************************************************/ | ||
| 453 | /* Functions for testing purposes */ | ||
| 454 | /*********************************************************************************/ | ||
| 455 | |||
| 456 | 3 | int omptm_role_shift_testing__get_num_free_agents(void) { | |
| 457 | 3 | return num_free_agents; | |
| 458 | } | ||
| 459 | |||
| 460 | 3 | int omptm_role_shift_testing__get_num_registered_threads(void) { | |
| 461 | 3 | return registered_threads; | |
| 462 | } | ||
| 463 | |||
| 464 | 3 | int omptm_role_shift_testing__get_current_parallel_size(void) { | |
| 465 | 3 | return current_parallel_size; | |
| 466 | } | ||
| 467 | |||
| 468 | 7 | void omptm_role_shift_testing__set_pending_tasks(unsigned int num_tasks) { | |
| 469 | 7 | pending_tasks = num_tasks; | |
| 470 | 7 | } | |
| 471 | |||
| 472 | 6 | unsigned int omptm_role_shift_testing__get_pending_tasks(void) { | |
| 473 | 6 | return pending_tasks; | |
| 474 | } | ||
| 475 | |||
| 476 | 10 | void omptm_role_shift_testing__set_global_tid(int tid) { | |
| 477 | 10 | global_tid = tid; | |
| 478 | 10 | } | |
| 479 | |||
| 480 | 6 | bool omptm_role_shift_testing__in_parallel(void) { | |
| 481 | 6 | return in_parallel; | |
| 482 | } | ||
| 483 | |||
| 484 | 32 | int omptm_role_shift_testing__get_id_from_cpu(int cpuid) { | |
| 485 | 32 | return get_id_from_cpu(cpuid); | |
| 486 | } | ||
| 487 | |||
| 488 | 1 | int* omptm_role_shift_testing__get_cpu_by_id_ptr(void) { | |
| 489 | 1 | return cpu_by_id; | |
| 490 | } | ||
| 491 | |||
| 492 | 1 | cpu_data_t* omptm_role_shift_testing__get_cpu_data_ptr(void) { | |
| 493 | 1 | return cpu_data; | |
| 494 | } | ||
| 495 |