Line | Branch | Exec | Source |
---|---|---|---|
1 | /*********************************************************************************/ | ||
2 | /* Copyright 2009-2024 Barcelona Supercomputing Center */ | ||
3 | /* */ | ||
4 | /* This file is part of the DLB library. */ | ||
5 | /* */ | ||
6 | /* DLB is free software: you can redistribute it and/or modify */ | ||
7 | /* it under the terms of the GNU Lesser General Public License as published by */ | ||
8 | /* the Free Software Foundation, either version 3 of the License, or */ | ||
9 | /* (at your option) any later version. */ | ||
10 | /* */ | ||
11 | /* DLB is distributed in the hope that it will be useful, */ | ||
12 | /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ | ||
13 | /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ | ||
14 | /* GNU Lesser General Public License for more details. */ | ||
15 | /* */ | ||
16 | /* You should have received a copy of the GNU Lesser General Public License */ | ||
17 | /* along with DLB. If not, see <https://www.gnu.org/licenses/>. */ | ||
18 | /*********************************************************************************/ | ||
19 | |||
20 | #include "LB_numThreads/omptm_role_shift.h" | ||
21 | |||
22 | #include "apis/dlb.h" | ||
23 | #include "support/atomic.h" | ||
24 | #include "support/debug.h" | ||
25 | #include "support/mask_utils.h" | ||
26 | #include "support/tracing.h" | ||
27 | #include "LB_comm/shmem_cpuinfo.h" | ||
28 | #include "LB_comm/shmem_procinfo.h" | ||
29 | #include "LB_core/spd.h" | ||
30 | #include "LB_numThreads/omptool.h" | ||
31 | |||
32 | #include <sched.h> | ||
33 | #include <unistd.h> | ||
34 | #include <string.h> | ||
35 | #include <pthread.h> | ||
36 | #include <inttypes.h> | ||
37 | |||
38 | /* OpenMP symbols */ | ||
39 | int __kmp_get_num_threads_role(ompt_role_t r) __attribute__((weak)); | ||
40 | int __kmp_get_thread_roles(int tid, ompt_role_t *r) __attribute((weak)); | ||
41 | void __kmp_set_thread_roles1(int how_many, ompt_role_t r) __attribute((weak)); | ||
42 | void __kmp_set_thread_roles2(int tid, ompt_role_t r) __attribute((weak)); | ||
43 | int __kmp_get_thread_id(void) __attribute((weak)); | ||
44 | |||
45 | /* Enum for ompt_data_t *parallel_data to detect level 1 (non nested) parallel | ||
46 | * regions */ | ||
47 | enum { | ||
48 | PARALLEL_UNSET, | ||
49 | PARALLEL_LEVEL_1, | ||
50 | }; | ||
51 | |||
52 | /*** Static variables ***/ | ||
53 | |||
54 | static bool lewi = false; | ||
55 | static pid_t pid; | ||
56 | static omptool_opts_t omptool_opts; | ||
57 | static int system_size; | ||
58 | static int default_num_threads; | ||
59 | static atomic_int num_free_agents = 0; | ||
60 | |||
61 | /* Masks */ | ||
62 | static cpu_set_t active_mask; | ||
63 | static cpu_set_t process_mask; | ||
64 | static cpu_set_t primary_thread_mask; | ||
65 | |||
66 | /* Atomic variables */ | ||
67 | static atomic_bool DLB_ALIGN_CACHE in_parallel = false; | ||
68 | static atomic_int DLB_ALIGN_CACHE current_parallel_size = 0; | ||
69 | static atomic_uint DLB_ALIGN_CACHE pending_tasks = 0; | ||
70 | |||
71 | /* Thread local */ | ||
72 | __thread int global_tid = -1; //References to the thread id of the kmp runtime | ||
73 | |||
74 | /*********************************************************************************/ | ||
75 | /* CPU Data structures and helper atomic flags functions */ | ||
76 | /*********************************************************************************/ | ||
77 | |||
78 | /* Current state of the CPU (what is being used for) */ | ||
79 | typedef enum CPUStatus { | ||
80 | OWN = 0, | ||
81 | UNKNOWN = 1 << 0, | ||
82 | LENT = 1 << 1, | ||
83 | BORROWED = 1 << 2 | ||
84 | } cpu_status_t; | ||
85 | |||
86 | typedef struct DLB_ALIGN_CACHE CPU_Data { | ||
87 | _Atomic(cpu_status_t) ownership; | ||
88 | bool fa; | ||
89 | } cpu_data_t; | ||
90 | |||
91 | static atomic_int registered_threads = 0; | ||
92 | static cpu_data_t *cpu_data = NULL; | ||
93 | static int *cpu_by_id = NULL; | ||
94 | |||
95 | 48 | static int get_id_from_cpu(int cpuid){ | |
96 | int i; | ||
97 | 48 | int nth = DLB_ATOMIC_LD_RLX(®istered_threads); | |
98 |
2/2✓ Branch 0 taken 172 times.
✓ Branch 1 taken 20 times.
|
192 | for(i = 0; i < nth; i++){ |
99 |
2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 144 times.
|
172 | if(cpu_by_id[i] == cpuid) |
100 | 28 | return i; | |
101 | } | ||
102 | 20 | return -1; | |
103 | } | ||
104 | |||
105 | /*********************************************************************************/ | ||
106 | /* DLB callbacks */ | ||
107 | /*********************************************************************************/ | ||
108 | |||
109 | 9 | static void cb_enable_cpu(int cpuid, void *arg) { | |
110 | 9 | cas_bit((atomic_int *)&cpu_data[cpuid].ownership, LENT, OWN); | |
111 | 9 | cas_bit((atomic_int *)&cpu_data[cpuid].ownership, UNKNOWN, BORROWED); | |
112 | 9 | int pos = get_id_from_cpu(cpuid); | |
113 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 4 times.
|
9 | if(pos >= 0){ //A thread was running here previously |
114 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 2 times.
|
5 | if(cpu_data[cpuid].fa){//We had a FA here. Call the api to wake it up and mark the CPU as free here |
115 | 3 | DLB_ATOMIC_ADD(&num_free_agents, 1); | |
116 | 3 | __kmp_set_thread_roles2(pos, OMP_ROLE_FREE_AGENT); | |
117 | } | ||
118 | } | ||
119 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | else if(pos == -1){//ask for a new FA |
120 | 4 | cpu_data[cpuid].fa = false; | |
121 | 4 | DLB_ATOMIC_ADD(&num_free_agents, 1); | |
122 | 4 | __kmp_set_thread_roles2(system_size, OMP_ROLE_FREE_AGENT); | |
123 | 4 | cpu_by_id[DLB_ATOMIC_ADD(®istered_threads, 1)] = cpuid; | |
124 | } | ||
125 | else{ | ||
126 | ✗ | fatal("Enable cpu with a wrong pos"); | |
127 | } | ||
128 | 9 | } | |
129 | |||
130 | 7 | static void cb_disable_cpu(int cpuid, void *arg) { | |
131 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 3 times.
|
7 | if((DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].ownership) != OWN && |
132 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].ownership) != BORROWED)){ |
133 | //CPU already disabled, just skip it | ||
134 | ✗ | return; | |
135 | } | ||
136 | 7 | cas_bit((atomic_int *)&cpu_data[cpuid].ownership, OWN, LENT); | |
137 | 7 | cas_bit((atomic_int *)&cpu_data[cpuid].ownership, BORROWED, UNKNOWN); | |
138 |
1/2✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
|
7 | if(cpu_data[cpuid].fa){ |
139 | 7 | int tid = get_id_from_cpu(cpuid); | |
140 |
1/2✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
|
7 | if(tid >= 0){ |
141 | 7 | DLB_ATOMIC_SUB(&num_free_agents, 1); | |
142 | 7 | __kmp_set_thread_roles2(tid, OMP_ROLE_NONE); | |
143 | } | ||
144 | } | ||
145 | } | ||
146 | |||
147 | ✗ | static void cb_set_process_mask(const cpu_set_t *mask, void *arg) { | |
148 | } | ||
149 | |||
150 | /*********************************************************************************/ | ||
151 | /* Init & Finalize module */ | ||
152 | /*********************************************************************************/ | ||
153 | |||
154 | 4 | void omptm_role_shift__init(pid_t process_id, const options_t *options) { | |
155 | /* Initialize static variables */ | ||
156 | 4 | system_size = mu_get_system_size(); | |
157 | 4 | lewi = options->lewi; | |
158 | 4 | omptool_opts = options->lewi_ompt; | |
159 | 4 | pid = process_id; | |
160 | 4 | num_free_agents = __kmp_get_num_threads_role(OMP_ROLE_FREE_AGENT); | |
161 | 4 | shmem_procinfo__getprocessmask(pid, &process_mask, 0); | |
162 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "Process mask: %s", mu_to_str(&process_mask)); |
163 | |||
164 | // omp_get_max_threads cannot be called here, try using the env. var. | ||
165 | 4 | const char *env_omp_num_threads = getenv("OMP_NUM_THREADS"); | |
166 | 4 | default_num_threads = env_omp_num_threads | |
167 | 4 | ? atoi(env_omp_num_threads) | |
168 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | : CPU_COUNT(&process_mask); |
169 | 4 | cpu_data = malloc(sizeof(cpu_data_t)*system_size); | |
170 | 4 | cpu_by_id = malloc(sizeof(int)*system_size); | |
171 | |||
172 | 4 | CPU_ZERO(&primary_thread_mask); | |
173 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | registered_threads = (default_num_threads > num_free_agents) ? default_num_threads : num_free_agents; |
174 | |||
175 | 4 | int encountered_cpus = 0; | |
176 | int i; | ||
177 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 4 times.
|
36 | for(i = 0; i < system_size; i++){ |
178 | 32 | cpu_by_id[i] = -1; | |
179 | } | ||
180 | //Building of the cpu_data structure. It holds info of the different CPUs from the node of the process | ||
181 |
2/2✓ Branch 0 taken 32 times.
✓ Branch 1 taken 4 times.
|
36 | for(i = 0; i < system_size; i++){ |
182 |
5/6✓ Branch 0 taken 32 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 16 times.
✓ Branch 3 taken 16 times.
✓ Branch 4 taken 16 times.
✓ Branch 5 taken 16 times.
|
32 | if(CPU_ISSET(i, &process_mask)){ |
183 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 12 times.
|
16 | if(++encountered_cpus == 1){ |
184 | //First encountered CPU belongs to the primary thread | ||
185 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | CPU_SET(i, &primary_thread_mask); |
186 | 4 | cpu_by_id[encountered_cpus - 1] = i; | |
187 | } | ||
188 | 16 | cpu_data[i].ownership = OWN; | |
189 | } | ||
190 | else{ | ||
191 | 16 | cpu_data[i].ownership = UNKNOWN; | |
192 | } | ||
193 | 32 | cpu_data[i].fa = false; | |
194 | } | ||
195 | 4 | memcpy(&active_mask, &primary_thread_mask, sizeof(cpu_set_t)); | |
196 | |||
197 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (lewi) { |
198 | int err; | ||
199 | 4 | err = DLB_CallbackSet(dlb_callback_enable_cpu, (dlb_callback_t)cb_enable_cpu, NULL); | |
200 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (err != DLB_SUCCESS) { |
201 | ✗ | warning("DLB_CallbackSet enable_cpu: %s", DLB_Strerror(err)); | |
202 | } | ||
203 | 4 | err = DLB_CallbackSet(dlb_callback_disable_cpu, (dlb_callback_t)cb_disable_cpu, NULL); | |
204 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (err != DLB_SUCCESS) { |
205 | ✗ | warning("DLB_CallbackSet disable_cpu: %s", DLB_Strerror(err)); | |
206 | } | ||
207 | 4 | err = DLB_CallbackSet(dlb_callback_set_process_mask, | |
208 | (dlb_callback_t)cb_set_process_mask, NULL); | ||
209 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (err != DLB_SUCCESS) { |
210 | ✗ | warning("DLB_CallbackSet set_process_mask: %s", DLB_Strerror(err)); | |
211 | } | ||
212 | } | ||
213 | 4 | } | |
214 | |||
215 | 4 | void omptm_role_shift__finalize(void) { | |
216 | 4 | free(cpu_data); | |
217 | 4 | cpu_data = NULL; | |
218 | 4 | free(cpu_by_id); | |
219 | 4 | cpu_by_id = NULL; | |
220 | 4 | } | |
221 | |||
222 | /*********************************************************************************/ | ||
223 | /* Blocking calls specific functions */ | ||
224 | /*********************************************************************************/ | ||
225 | |||
226 | /*TODO: what happens when executing in "ompss" mode*/ | ||
227 | 1 | void omptm_role_shift__IntoBlockingCall(void) { | |
228 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (lewi) { |
229 | /* Don't know what to do if a Blocking Call is invoked inside a | ||
230 | * parallel region. We could ignore it, but then we should also ignore | ||
231 | * the associated OutOfBlockingCall, and how would we know it? */ | ||
232 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | fatal_cond(DLB_ATOMIC_LD(&in_parallel), |
233 | "Blocking Call inside a parallel region not supported"); | ||
234 | cpu_set_t cpus_to_lend; | ||
235 | 1 | CPU_ZERO(&cpus_to_lend); | |
236 | int i; | ||
237 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 1 times.
|
9 | for(i = 0; i < system_size; i++){ |
238 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if(DLB_ATOMIC_LD_RLX(&cpu_data[i].ownership) == OWN){ |
239 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 3 times.
|
4 | if(i == cpu_by_id[global_tid]){ |
240 | //Just change the status for the thread calling MPI | ||
241 | 1 | DLB_ATOMIC_ST_RLX(&cpu_data[i].ownership, LENT); | |
242 | } | ||
243 | else{ | ||
244 | 3 | cb_disable_cpu(i, NULL); | |
245 | } | ||
246 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | CPU_SET(i, &cpus_to_lend); |
247 | } | ||
248 |
1/8✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
|
4 | else if(DLB_ATOMIC_LD_RLX(&cpu_data[i].ownership) == BORROWED && CPU_ISSET(i, &process_mask)){ |
249 | ✗ | DLB_ATOMIC_ST_RLX(&cpu_data[i].ownership, UNKNOWN); | |
250 | ✗ | CPU_SET(i, &cpus_to_lend); | |
251 | } | ||
252 | } | ||
253 | 1 | DLB_LendCpuMask(&cpus_to_lend); | |
254 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | verbose(VB_OMPT, "IntoBlockingCall - lending all"); |
255 | } | ||
256 | 1 | } | |
257 | |||
258 | |||
259 | 1 | void omptm_role_shift__OutOfBlockingCall(void) { | |
260 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (lewi) { |
261 | 1 | cb_enable_cpu(cpu_by_id[global_tid], NULL); | |
262 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (omptool_opts & OMPTOOL_OPTS_LEND) { |
263 | /* Do nothing. | ||
264 | * Do not reclaim since going out of a blocking call is not | ||
265 | * an indication that the CPUs may be needed. | ||
266 | * OMPTOOL_OPTS_AGGRESSIVE executes this if too. */ | ||
267 | } | ||
268 | else { | ||
269 | 1 | DLB_Reclaim(); | |
270 | } | ||
271 | } | ||
272 | 1 | } | |
273 | |||
274 | |||
275 | /*********************************************************************************/ | ||
276 | /* OMPT registered callbacks */ | ||
277 | /*********************************************************************************/ | ||
278 | |||
279 | 3 | void omptm_role_shift__thread_begin(ompt_thread_t thread_type) { | |
280 | /* Set up thread local spd */ | ||
281 | 3 | spd_enter_dlb(thread_spd); | |
282 | |||
283 | 3 | global_tid = __kmp_get_thread_id(); | |
284 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | fatal_cond(registered_threads > system_size, |
285 | "DLB created more threads than existing CPUs in the node"); | ||
286 | |||
287 | 3 | int cpuid = cpu_by_id[global_tid]; | |
288 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | if(thread_type == ompt_thread_other){ //other => free agent |
289 | cpu_set_t thread_mask; | ||
290 | 3 | CPU_ZERO(&thread_mask); | |
291 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
3 | if(cpuid >= 0 && (DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].ownership) != OWN && |
292 | ✗ | DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].ownership) != LENT)){ | |
293 | //Bind the thread to the pre-assigned CPU and return the CPU after that if necessary | ||
294 | ✗ | cpu_data[cpuid].fa = true; | |
295 | ✗ | cpu_by_id[global_tid] = cpuid; | |
296 | ✗ | CPU_SET(cpuid, &thread_mask); | |
297 | ✗ | pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &thread_mask); | |
298 | ✗ | verbose(VB_OMPT, "Binding a free agent to CPU %d", cpuid); | |
299 | instrument_event(REBIND_EVENT, cpuid+1, EVENT_BEGIN); | ||
300 | ✗ | if (lewi) { | |
301 | ✗ | if (DLB_CheckCpuAvailability(cpuid) == DLB_ERR_PERM) { | |
302 | ✗ | if (DLB_ReturnCpu(cpuid) == DLB_ERR_PERM) { | |
303 | ✗ | cb_disable_cpu(cpuid, NULL); | |
304 | } | ||
305 | } | ||
306 | ✗ | else if (DLB_ATOMIC_LD(&pending_tasks) == 0) { | |
307 | ✗ | cb_disable_cpu(cpuid, NULL); | |
308 | /* TODO: only lend free agents not part of the process mask */ | ||
309 | /* or, depending on the ompt dlb policy */ | ||
310 | ✗ | if (!CPU_ISSET(cpuid, &process_mask)) { | |
311 | ✗ | DLB_LendCpu(cpuid); | |
312 | } | ||
313 | } | ||
314 | } | ||
315 | } | ||
316 | else{ | ||
317 | 3 | pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &thread_mask); | |
318 | 3 | cpuid = mu_get_single_cpu(&thread_mask); | |
319 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | if(cpuid != -1){ |
320 | ✗ | cpu_by_id[global_tid] = cpuid; | |
321 | ✗ | cpu_data[cpuid].fa = true; | |
322 | } | ||
323 | else | ||
324 | 3 | warning("Started a free agent with multiple CPUs in the affinity mask"); | |
325 | } | ||
326 | } | ||
327 | 3 | } | |
328 | |||
329 | 2 | void omptm_role_shift__thread_role_shift(ompt_data_t *thread_data, | |
330 | ompt_role_t prior_role, | ||
331 | ompt_role_t next_role){ | ||
332 | 2 | int cpuid = cpu_by_id[global_tid]; | |
333 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if(prior_role == OMP_ROLE_FREE_AGENT){ |
334 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if(next_role == OMP_ROLE_COMMUNICATOR) return; //Don't supported now |
335 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | verbose(VB_OMPT, "Free agent %d changing the role to NONE", global_tid); |
336 | } | ||
337 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | else if(prior_role == OMP_ROLE_NONE){ |
338 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if(next_role == OMP_ROLE_COMMUNICATOR) return; //Don't supported now |
339 |
3/6✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 1 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 1 times.
✗ Branch 5 not taken.
|
1 | if(CPU_ISSET(cpuid, &process_mask)) //One of the initial/worker threads. Don't need to check for own CPUs. |
340 | 1 | return; | |
341 | ✗ | if (lewi) { | |
342 | ✗ | if (DLB_CheckCpuAvailability(cpuid) == DLB_ERR_PERM) { | |
343 | ✗ | if (DLB_ReturnCpu(cpuid) == DLB_ERR_PERM) { | |
344 | ✗ | cb_disable_cpu(cpuid, NULL); | |
345 | } | ||
346 | } | ||
347 | ✗ | else if (DLB_ATOMIC_LD(&pending_tasks) == 0 | |
348 | ✗ | && (DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].ownership) == BORROWED | |
349 | ✗ | || omptool_opts & OMPTOOL_OPTS_LEND)) { | |
350 | ✗ | cb_disable_cpu(cpuid, NULL); | |
351 | ✗ | DLB_LendCpu(cpuid); | |
352 | } | ||
353 | } | ||
354 | } | ||
355 | } | ||
356 | |||
357 | |||
358 | 3 | void omptm_role_shift__parallel_begin(omptool_parallel_data_t *parallel_data) { | |
359 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | if (parallel_data->level == 1) { |
360 | 2 | DLB_ATOMIC_ST(&in_parallel, true); | |
361 | 2 | DLB_ATOMIC_ST(¤t_parallel_size, parallel_data->requested_parallelism); | |
362 | } | ||
363 | 3 | } | |
364 | |||
365 | 3 | void omptm_role_shift__parallel_end(omptool_parallel_data_t *parallel_data) { | |
366 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | if (parallel_data->level == 1) { |
367 | 2 | DLB_ATOMIC_ST(&in_parallel, false); | |
368 | } | ||
369 | 3 | } | |
370 | |||
371 | 6 | void omptm_role_shift__task_create(void) { | |
372 | /* Increment the amount of pending tasks */ | ||
373 | 6 | DLB_ATOMIC_ADD(&pending_tasks, 1); | |
374 | |||
375 | /* For now, let's assume that we always want to increase the number | ||
376 | * of active threads whenever a task is created | ||
377 | */ | ||
378 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | if (lewi) { |
379 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
|
6 | if(omptool_opts == OMPTOOL_OPTS_LEND) { |
380 | ✗ | DLB_BorrowCpus(1); | |
381 | } | ||
382 | else { | ||
383 | 6 | DLB_AcquireCpus(1); | |
384 | } | ||
385 | } | ||
386 | 6 | } | |
387 | |||
388 | 9 | void omptm_role_shift__task_complete(void) { | |
389 | 9 | int cpuid = cpu_by_id[global_tid]; | |
390 |
4/6✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 9 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 8 times.
✓ Branch 5 taken 1 times.
|
9 | if (lewi && cpuid >= 0 && cpu_data[cpuid].fa) { |
391 | /* Return CPU if reclaimed */ | ||
392 |
2/2✓ Branch 1 taken 3 times.
✓ Branch 2 taken 5 times.
|
8 | if (DLB_CheckCpuAvailability(cpuid) == DLB_ERR_PERM) { |
393 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | if(DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].ownership) == UNKNOWN) { |
394 | /* Previously we have returned the CPU, but the free agent | ||
395 | * didn't do a role shift event to be rescheduled This can | ||
396 | * happen when the thread receives a change from NONE to FA | ||
397 | * just after a FA to NONE change. In that case, the second | ||
398 | * shift cancels the first one and the thread doesn't emit a | ||
399 | * callback. Just deactivate the thread. */ | ||
400 | ✗ | DLB_ATOMIC_SUB(&num_free_agents, 1); | |
401 | ✗ | __kmp_set_thread_roles2(global_tid, OMP_ROLE_NONE); | |
402 | } | ||
403 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
|
3 | else if (DLB_ReturnCpu(cpuid) == DLB_ERR_PERM) { |
404 | ✗ | cb_disable_cpu(cpuid, NULL); | |
405 | } | ||
406 | } | ||
407 | /* Lend CPU if no more tasks and CPU is borrowed, or policy is LEND */ | ||
408 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 1 times.
|
5 | else if (DLB_ATOMIC_LD(&pending_tasks) == 0 |
409 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | && (DLB_ATOMIC_LD_RLX(&cpu_data[cpuid].ownership) == BORROWED |
410 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 3 times.
|
3 | || omptool_opts & OMPTOOL_OPTS_LEND)) { |
411 | 1 | cb_disable_cpu(cpuid, NULL); | |
412 | 1 | DLB_LendCpu(cpuid); | |
413 | } | ||
414 | } | ||
415 | 9 | } | |
416 | |||
417 | 6 | void omptm_role_shift__task_switch(void) { | |
418 |
3/4✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 5 times.
✓ Branch 3 taken 1 times.
|
6 | if (lewi && DLB_ATOMIC_SUB(&pending_tasks, 1) > 1) { |
419 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5 times.
|
5 | if(omptool_opts == OMPTOOL_OPTS_LEND) { |
420 | ✗ | DLB_BorrowCpus(1); | |
421 | } else { | ||
422 | 5 | DLB_AcquireCpus(1); | |
423 | } | ||
424 | } | ||
425 | 6 | } | |
426 | |||
427 | |||
428 | /*********************************************************************************/ | ||
429 | /* Functions for testing purposes */ | ||
430 | /*********************************************************************************/ | ||
431 | |||
432 | 3 | int omptm_role_shift_testing__get_num_free_agents(void) { | |
433 | 3 | return num_free_agents; | |
434 | } | ||
435 | |||
436 | 3 | int omptm_role_shift_testing__get_num_registered_threads(void) { | |
437 | 3 | return registered_threads; | |
438 | } | ||
439 | |||
440 | 3 | int omptm_role_shift_testing__get_current_parallel_size(void) { | |
441 | 3 | return current_parallel_size; | |
442 | } | ||
443 | |||
444 | 7 | void omptm_role_shift_testing__set_pending_tasks(unsigned int num_tasks) { | |
445 | 7 | pending_tasks = num_tasks; | |
446 | 7 | } | |
447 | |||
448 | 6 | unsigned int omptm_role_shift_testing__get_pending_tasks(void) { | |
449 | 6 | return pending_tasks; | |
450 | } | ||
451 | |||
452 | 10 | void omptm_role_shift_testing__set_global_tid(int tid) { | |
453 | 10 | global_tid = tid; | |
454 | 10 | } | |
455 | |||
456 | 6 | bool omptm_role_shift_testing__in_parallel(void) { | |
457 | 6 | return in_parallel; | |
458 | } | ||
459 | |||
460 | 32 | int omptm_role_shift_testing__get_id_from_cpu(int cpuid) { | |
461 | 32 | return get_id_from_cpu(cpuid); | |
462 | } | ||
463 | |||
464 | 1 | int* omptm_role_shift_testing__get_cpu_by_id_ptr(void) { | |
465 | 1 | return cpu_by_id; | |
466 | } | ||
467 | |||
468 | 1 | cpu_data_t* omptm_role_shift_testing__get_cpu_data_ptr(void) { | |
469 | 1 | return cpu_data; | |
470 | } | ||
471 |