Line | Branch | Exec | Source |
---|---|---|---|
1 | /*********************************************************************************/ | ||
2 | /* Copyright 2009-2024 Barcelona Supercomputing Center */ | ||
3 | /* */ | ||
4 | /* This file is part of the DLB library. */ | ||
5 | /* */ | ||
6 | /* DLB is free software: you can redistribute it and/or modify */ | ||
7 | /* it under the terms of the GNU Lesser General Public License as published by */ | ||
8 | /* the Free Software Foundation, either version 3 of the License, or */ | ||
9 | /* (at your option) any later version. */ | ||
10 | /* */ | ||
11 | /* DLB is distributed in the hope that it will be useful, */ | ||
12 | /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ | ||
13 | /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ | ||
14 | /* GNU Lesser General Public License for more details. */ | ||
15 | /* */ | ||
16 | /* You should have received a copy of the GNU Lesser General Public License */ | ||
17 | /* along with DLB. If not, see <https://www.gnu.org/licenses/>. */ | ||
18 | /*********************************************************************************/ | ||
19 | |||
20 | /* Configure second tool with: | ||
21 | * DLB_TOOL_LIBRARIES | ||
22 | * DLB_TOOL_VERBOSE_INIT | ||
23 | */ | ||
24 | #define OMPT_MULTIPLEX_TOOL_NAME "DLB" | ||
25 | #include "LB_numThreads/ompt-multiplex.h" | ||
26 | |||
27 | #include "LB_numThreads/omptool.h" | ||
28 | |||
29 | #include "LB_numThreads/omp-tools.h" | ||
30 | #include "LB_numThreads/omptm_omp5.h" | ||
31 | #include "LB_numThreads/omptm_free_agents.h" | ||
32 | #include "LB_numThreads/omptm_role_shift.h" | ||
33 | #include "LB_core/DLB_talp.h" | ||
34 | #include "LB_core/spd.h" | ||
35 | #include "apis/dlb.h" | ||
36 | #include "support/debug.h" | ||
37 | #include "support/mask_utils.h" | ||
38 | #include "support/tracing.h" | ||
39 | #include "apis/dlb_errors.h" | ||
40 | |||
41 | #include <inttypes.h> | ||
42 | #include <unistd.h> | ||
43 | #include <string.h> | ||
44 | #include <stdbool.h> | ||
45 | |||
46 | |||
47 | static omptool_callback_funcs_t omptool_funcs = {0}; | ||
48 | static omptool_event_funcs_t talp_funcs = {0}; | ||
49 | static omptool_event_funcs_t omptm_funcs = {0}; | ||
50 | static ompt_set_callback_t set_callback_fn = NULL; | ||
51 | |||
52 | |||
53 | /*********************************************************************************/ | ||
54 | /* Init & Finalize */ | ||
55 | /*********************************************************************************/ | ||
56 | |||
57 | 4 | static void omptool__init(pid_t process_id, const options_t *options) { | |
58 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (talp_funcs.init) { |
59 | 2 | talp_funcs.init(process_id, options); | |
60 | } | ||
61 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (omptm_funcs.init) { |
62 | 2 | omptm_funcs.init(process_id, options); | |
63 | } | ||
64 | 4 | } | |
65 | |||
66 | 4 | static void omptool__finalize(void) { | |
67 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (talp_funcs.finalize) { |
68 | 2 | talp_funcs.finalize(); | |
69 | } | ||
70 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (omptm_funcs.finalize) { |
71 | 2 | omptm_funcs.finalize(); | |
72 | } | ||
73 | 4 | } | |
74 | |||
75 | |||
76 | /*********************************************************************************/ | ||
77 | /* "Callbacks" from MPI and DLB API */ | ||
78 | /*********************************************************************************/ | ||
79 | |||
80 | 18 | void omptool__into_blocking_call(void) { | |
81 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 16 times.
|
18 | if (talp_funcs.into_mpi) { |
82 | 2 | talp_funcs.into_mpi(); | |
83 | } | ||
84 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 16 times.
|
18 | if (omptm_funcs.into_mpi) { |
85 | 2 | omptm_funcs.into_mpi(); | |
86 | } | ||
87 | 18 | } | |
88 | |||
89 | 18 | void omptool__outof_blocking_call(void) { | |
90 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 16 times.
|
18 | if (omptm_funcs.outof_mpi) { |
91 | 2 | omptm_funcs.outof_mpi(); | |
92 | } | ||
93 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 16 times.
|
18 | if (talp_funcs.outof_mpi) { |
94 | 2 | talp_funcs.outof_mpi(); | |
95 | } | ||
96 | 18 | } | |
97 | |||
98 | 3 | void omptool__lend_from_api(void) { | |
99 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | if (omptm_funcs.lend_from_api) { |
100 | 2 | omptm_funcs.lend_from_api(); | |
101 | } | ||
102 | 3 | } | |
103 | |||
104 | |||
105 | /*********************************************************************************/ | ||
106 | /* OpenMP callbacks */ | ||
107 | /*********************************************************************************/ | ||
108 | |||
109 | 3 | static void omptool_callback__thread_begin( | |
110 | ompt_thread_t thread_type, | ||
111 | ompt_data_t *thread_data) { | ||
112 | |||
113 | 3 | spd_enter_dlb(thread_spd); | |
114 | |||
115 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | if (talp_funcs.thread_begin) { |
116 | 2 | talp_funcs.thread_begin(thread_type); | |
117 | } | ||
118 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | if (omptm_funcs.thread_begin) { |
119 | 2 | omptm_funcs.thread_begin(thread_type); | |
120 | } | ||
121 | 3 | } | |
122 | |||
123 | 3 | static void omptool_callback__thread_end( | |
124 | ompt_data_t *thread_data) { | ||
125 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | if (talp_funcs.thread_end) { |
126 | 2 | talp_funcs.thread_end(); | |
127 | } | ||
128 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | if (omptm_funcs.thread_end) { |
129 | 2 | omptm_funcs.thread_end(); | |
130 | } | ||
131 | 3 | } | |
132 | |||
133 | |||
134 | /* Parallel data storage from level 1 will be reused, other levels will be | ||
135 | * allocated on demand */ | ||
136 | static omptool_parallel_data_t omptool_parallel_data_level1 = {.level = 1}; | ||
137 | |||
138 | 9 | static void omptool_callback__parallel_begin( | |
139 | ompt_data_t *encountering_task_data, | ||
140 | const ompt_frame_t *encountering_task_frame, | ||
141 | ompt_data_t *parallel_data, | ||
142 | unsigned int requested_parallelism, | ||
143 | int flags, | ||
144 | const void *codeptr_ra) { | ||
145 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 3 times.
|
9 | if (flags & ompt_parallel_team) { |
146 | /* Obtain the nesting level of the generating parallel region and save | ||
147 | * some info in parallel_data */ | ||
148 | 6 | omptool_parallel_data_t *omptool_parallel_data = NULL; | |
149 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
|
6 | if (encountering_task_frame->exit_frame.ptr == NULL) { |
150 | /* No exit frame means inital task, so this parallel is level 1 */ | ||
151 | 3 | omptool_parallel_data = &omptool_parallel_data_level1; | |
152 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | } else if (encountering_task_data->value > 0) { |
153 | /* Allocate new data */ | ||
154 | 3 | omptool_parallel_data = malloc(sizeof(omptool_parallel_data_t)); | |
155 | 3 | omptool_parallel_data->level = encountering_task_data->value + 1; | |
156 | } | ||
157 | |||
158 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
|
6 | ensure(omptool_parallel_data != NULL, "Unhandled case in %s",__func__); |
159 | |||
160 | 6 | omptool_parallel_data->codeptr_ra = codeptr_ra; | |
161 | 6 | omptool_parallel_data->requested_parallelism = requested_parallelism; | |
162 | 6 | parallel_data->ptr = omptool_parallel_data; | |
163 | |||
164 | /* Finally, invoke TALP or OMPTM if needed */ | ||
165 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.
|
6 | if (talp_funcs.parallel_begin) { |
166 | 4 | talp_funcs.parallel_begin(omptool_parallel_data); | |
167 | } | ||
168 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.
|
6 | if (omptm_funcs.parallel_begin) { |
169 | 4 | omptm_funcs.parallel_begin(omptool_parallel_data); | |
170 | } | ||
171 | } else { | ||
172 | /* ompt_parallel_league ? not supported */ | ||
173 | } | ||
174 | 9 | } | |
175 | |||
176 | 6 | static void omptool_callback__parallel_end( | |
177 | ompt_data_t *parallel_data, | ||
178 | ompt_data_t *encountering_task_data, | ||
179 | int flags, | ||
180 | const void *codeptr_ra) { | ||
181 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | if (flags & ompt_parallel_team) { |
182 | 6 | omptool_parallel_data_t *omptool_parallel_data = parallel_data->ptr; | |
183 | |||
184 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.
|
6 | if (talp_funcs.parallel_end) { |
185 | 4 | talp_funcs.parallel_end(omptool_parallel_data); | |
186 | } | ||
187 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.
|
6 | if (omptm_funcs.parallel_end) { |
188 | 4 | omptm_funcs.parallel_end(omptool_parallel_data); | |
189 | } | ||
190 | |||
191 | /* Deallocate if needed */ | ||
192 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
|
6 | if (omptool_parallel_data->level > 1) { |
193 | 3 | free(parallel_data->ptr); | |
194 | } | ||
195 | } | ||
196 | 6 | } | |
197 | |||
198 | 3 | static void omptool_callback__task_create( | |
199 | ompt_data_t *encountering_task_data, | ||
200 | const ompt_frame_t *encountering_task_frame, | ||
201 | ompt_data_t *new_task_data, | ||
202 | int flags, | ||
203 | int has_dependences, | ||
204 | const void *codeptr_ra) { | ||
205 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | if (flags & ompt_task_explicit) { |
206 | /* Pass nesting level */ | ||
207 | 3 | new_task_data->value = encountering_task_data->value; | |
208 | |||
209 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | if (talp_funcs.task_create) { |
210 | 2 | talp_funcs.task_create(); | |
211 | } | ||
212 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | if (omptm_funcs.task_create) { |
213 | 2 | omptm_funcs.task_create(); | |
214 | } | ||
215 | } | ||
216 | 3 | } | |
217 | |||
218 | 6 | static void omptool_callback__task_schedule( | |
219 | ompt_data_t *prior_task_data, | ||
220 | ompt_task_status_t prior_task_status, | ||
221 | ompt_data_t *next_task_data) { | ||
222 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
|
6 | if (prior_task_status == ompt_task_complete) { |
223 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | if (talp_funcs.task_complete) { |
224 | 2 | talp_funcs.task_complete(); | |
225 | } | ||
226 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | if (omptm_funcs.task_complete) { |
227 | 2 | omptm_funcs.task_complete(); | |
228 | } | ||
229 | instrument_event(BINDINGS_EVENT, 0, EVENT_END); | ||
230 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | } else if (prior_task_status == ompt_task_switch) { |
231 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | if (talp_funcs.task_switch) { |
232 | 2 | talp_funcs.task_switch(); | |
233 | } | ||
234 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | if (omptm_funcs.task_switch) { |
235 | 2 | omptm_funcs.task_switch(); | |
236 | } | ||
237 | instrument_event(BINDINGS_EVENT, sched_getcpu()+1, EVENT_BEGIN); | ||
238 | } | ||
239 | 6 | } | |
240 | |||
241 | 6 | static void omptool_callback__implicit_task( | |
242 | ompt_scope_endpoint_t endpoint, | ||
243 | ompt_data_t *parallel_data, | ||
244 | ompt_data_t *task_data, | ||
245 | unsigned int actual_parallelism, | ||
246 | unsigned int index, | ||
247 | int flags) { | ||
248 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | if (endpoint == ompt_scope_begin |
249 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | && parallel_data != NULL |
250 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | && parallel_data->ptr != NULL |
251 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | && flags & ompt_task_implicit) { |
252 | |||
253 | 6 | omptool_parallel_data_t *omptool_parallel_data = parallel_data->ptr; | |
254 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | if (index == 0) { |
255 | 6 | omptool_parallel_data->actual_parallelism = actual_parallelism; | |
256 | } | ||
257 | |||
258 | /* Pass nesting level to implicit task */ | ||
259 | 6 | task_data->value = omptool_parallel_data->level; | |
260 | |||
261 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.
|
6 | if (talp_funcs.into_parallel_function) { |
262 | 4 | talp_funcs.into_parallel_function(omptool_parallel_data, index); | |
263 | } | ||
264 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.
|
6 | if (omptm_funcs.into_parallel_function) { |
265 | 4 | omptm_funcs.into_parallel_function(omptool_parallel_data, index); | |
266 | } | ||
267 | |||
268 | instrument_event(BINDINGS_EVENT, sched_getcpu()+1, EVENT_BEGIN); | ||
269 | } | ||
270 | 6 | } | |
271 | |||
272 | /* Warning: at the time of writing (Sep 2023), LLVM upstream still uses the | ||
273 | * deprecated kind ompt_sync_region_barrier_implicit for the | ||
274 | * sync-region-implicit-parallel event, so we cannot yet remove the deprecated | ||
275 | * enum. This enum value is used also for implicit barriers in a **single** | ||
276 | * region, but we can still identify the implicit barrier of a parallel | ||
277 | * comparing the codeptr_ra, which will be NULL for all team-worker threads, | ||
278 | * and equal to the codeptr_ra from parallel_begin for the primary thread. | ||
279 | */ | ||
280 | 9 | static void omptool_callback__sync_region( | |
281 | ompt_sync_region_t kind, | ||
282 | ompt_scope_endpoint_t endpoint, | ||
283 | ompt_data_t *parallel_data, | ||
284 | ompt_data_t *task_data, | ||
285 | const void *codeptr_ra) { | ||
286 |
2/4✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 9 times.
✗ Branch 3 not taken.
|
9 | if (parallel_data != NULL && parallel_data->ptr != NULL) { |
287 | 9 | omptool_parallel_data_t *data = parallel_data->ptr; | |
288 |
2/4✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✗ Branch 3 not taken.
|
9 | switch (kind) { |
289 | 6 | case ompt_sync_region_barrier_implicit: | |
290 | /* deprecated enum, includes implicit barriers from parallel, | ||
291 | * single, workshare, etc. */ | ||
292 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | if (endpoint == ompt_scope_begin |
293 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | && (codeptr_ra == NULL |
294 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | || codeptr_ra == data->codeptr_ra)) { |
295 | /* barrier of implicit parallel only if codeptr_ra is NULL | ||
296 | * or equal to parallel region's */ | ||
297 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.
|
6 | if (talp_funcs.into_parallel_implicit_barrier) { |
298 | 4 | talp_funcs.into_parallel_implicit_barrier(data); | |
299 | } | ||
300 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.
|
6 | if (omptm_funcs.into_parallel_implicit_barrier) { |
301 | 4 | omptm_funcs.into_parallel_implicit_barrier(data); | |
302 | } | ||
303 | instrument_event(BINDINGS_EVENT, 0, EVENT_END); | ||
304 | ✗ | } else if (endpoint == ompt_scope_begin) { | |
305 | ✗ | if (talp_funcs.into_parallel_sync) { | |
306 | ✗ | talp_funcs.into_parallel_sync(data); | |
307 | } | ||
308 | ✗ | if (omptm_funcs.into_parallel_sync) { | |
309 | ✗ | omptm_funcs.into_parallel_sync(data); | |
310 | } | ||
311 | ✗ | } else if (endpoint == ompt_scope_end) { | |
312 | ✗ | if (talp_funcs.outof_parallel_sync) { | |
313 | ✗ | talp_funcs.outof_parallel_sync(data); | |
314 | } | ||
315 | ✗ | if (omptm_funcs.outof_parallel_sync) { | |
316 | ✗ | omptm_funcs.outof_parallel_sync(data); | |
317 | } | ||
318 | } | ||
319 | 6 | break; | |
320 | ✗ | case ompt_sync_region_barrier_explicit: | |
321 | DLB_FALLTHROUGH; | ||
322 | case ompt_sync_region_taskwait: | ||
323 | DLB_FALLTHROUGH; | ||
324 | case ompt_sync_region_taskgroup: | ||
325 | ✗ | if (endpoint == ompt_scope_begin) { | |
326 | ✗ | if (talp_funcs.into_parallel_sync) { | |
327 | ✗ | talp_funcs.into_parallel_sync(data); | |
328 | } | ||
329 | ✗ | if (omptm_funcs.into_parallel_sync) { | |
330 | ✗ | omptm_funcs.into_parallel_sync(data); | |
331 | } | ||
332 | ✗ | } else if (endpoint == ompt_scope_end) { | |
333 | ✗ | if (talp_funcs.outof_parallel_sync) { | |
334 | ✗ | talp_funcs.outof_parallel_sync(data); | |
335 | } | ||
336 | ✗ | if (omptm_funcs.outof_parallel_sync) { | |
337 | ✗ | omptm_funcs.outof_parallel_sync(data); | |
338 | } | ||
339 | } | ||
340 | ✗ | break; | |
341 | 3 | case ompt_sync_region_barrier_implicit_parallel: | |
342 | /* new enum in OMP 5.1 not yet implemented in any known runtime */ | ||
343 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | if (endpoint == ompt_scope_begin) { |
344 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | if (talp_funcs.into_parallel_implicit_barrier) { |
345 | 2 | talp_funcs.into_parallel_implicit_barrier(data); | |
346 | } | ||
347 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | if (omptm_funcs.into_parallel_implicit_barrier) { |
348 | 2 | omptm_funcs.into_parallel_implicit_barrier(data); | |
349 | } | ||
350 | instrument_event(BINDINGS_EVENT, 0, EVENT_END); | ||
351 | } | ||
352 | 3 | break; | |
353 | ✗ | default: | |
354 | ✗ | break; | |
355 | } | ||
356 | } | ||
357 | 9 | } | |
358 | |||
359 | |||
360 | /*********************************************************************************/ | ||
361 | /* Function pointers setup */ | ||
362 | /*********************************************************************************/ | ||
363 | |||
364 | /* For testing purposes only, predefine function pointers */ | ||
365 | static bool test_funcs_init = false; | ||
366 | 3 | void omptool_testing__setup_event_fn_ptrs( | |
367 | const omptool_event_funcs_t *talp_test_funcs, | ||
368 | const omptool_event_funcs_t *omptm_test_funcs) { | ||
369 | 3 | test_funcs_init = true; | |
370 | 3 | talp_funcs = *talp_test_funcs; | |
371 | 3 | omptm_funcs = *omptm_test_funcs; | |
372 | 3 | } | |
373 | |||
374 | 4 | static void setup_omp_fn_ptrs(omptm_version_t omptm_version, bool talp_openmp) { | |
375 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 3 times.
|
4 | if (!test_funcs_init) { |
376 | /* talp_funcs */ | ||
377 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (talp_openmp) { |
378 | ✗ | verbose(VB_OMPT, "Enabling OMPT support for TALP"); | |
379 | ✗ | talp_funcs = (const omptool_event_funcs_t) { | |
380 | .init = talp_openmp_init, | ||
381 | .finalize = talp_openmp_finalize, | ||
382 | .into_mpi = NULL, | ||
383 | .outof_mpi = NULL, | ||
384 | .lend_from_api = NULL, | ||
385 | .thread_begin = talp_openmp_thread_begin, | ||
386 | .thread_end = talp_openmp_thread_end, | ||
387 | .thread_role_shift = NULL, | ||
388 | .parallel_begin = talp_openmp_parallel_begin, | ||
389 | .parallel_end = talp_openmp_parallel_end, | ||
390 | .into_parallel_function | ||
391 | = talp_openmp_into_parallel_function, | ||
392 | .into_parallel_implicit_barrier | ||
393 | = talp_openmp_into_parallel_implicit_barrier, | ||
394 | .into_parallel_sync = talp_openmp_into_parallel_sync, | ||
395 | .outof_parallel_sync = talp_openmp_outof_parallel_sync, | ||
396 | .task_create = talp_openmp_task_create, | ||
397 | .task_complete = talp_openmp_task_complete, | ||
398 | .task_switch = talp_openmp_task_switch, | ||
399 | }; | ||
400 | } | ||
401 | |||
402 | /* omptm_funcs */ | ||
403 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (omptm_version == OMPTM_OMP5) { |
404 | ✗ | omptm_funcs = (const omptool_event_funcs_t) { | |
405 | .init = omptm_omp5__init, | ||
406 | .finalize = omptm_omp5__finalize, | ||
407 | .into_mpi = omptm_omp5__IntoBlockingCall, | ||
408 | .outof_mpi = omptm_omp5__OutOfBlockingCall, | ||
409 | .lend_from_api = omptm_omp5__lend_from_api, | ||
410 | .thread_begin = NULL, | ||
411 | .thread_end = NULL, | ||
412 | .thread_role_shift = NULL, | ||
413 | .parallel_begin = omptm_omp5__parallel_begin, | ||
414 | .parallel_end = omptm_omp5__parallel_end, | ||
415 | .into_parallel_function | ||
416 | = omptm_omp5__into_parallel_function, | ||
417 | .into_parallel_implicit_barrier | ||
418 | = omptm_omp5__into_parallel_implicit_barrier, | ||
419 | .task_create = NULL, | ||
420 | .task_complete = NULL, | ||
421 | .task_switch = NULL, | ||
422 | }; | ||
423 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | } else if (omptm_version == OMPTM_FREE_AGENTS) { |
424 | ✗ | verbose(VB_OMPT, "Enabling experimental support with free agent threads"); | |
425 | ✗ | omptm_funcs = (const omptool_event_funcs_t) { | |
426 | .init = omptm_free_agents__init, | ||
427 | .finalize = omptm_free_agents__finalize, | ||
428 | .into_mpi = omptm_free_agents__IntoBlockingCall, | ||
429 | .outof_mpi = omptm_free_agents__OutOfBlockingCall, | ||
430 | .lend_from_api = NULL, | ||
431 | .thread_begin = omptm_free_agents__thread_begin, | ||
432 | .thread_end = NULL, | ||
433 | .thread_role_shift = NULL, | ||
434 | .parallel_begin = omptm_free_agents__parallel_begin, | ||
435 | .parallel_end = omptm_free_agents__parallel_end, | ||
436 | .into_parallel_function | ||
437 | = omptm_free_agents__into_parallel_function, | ||
438 | .into_parallel_implicit_barrier = NULL, | ||
439 | .task_create = omptm_free_agents__task_create, | ||
440 | .task_complete = omptm_free_agents__task_complete, | ||
441 | .task_switch = omptm_free_agents__task_switch, | ||
442 | }; | ||
443 | } | ||
444 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | else if (omptm_version == OMPTM_ROLE_SHIFT) { |
445 | ✗ | verbose(VB_OMPT, "Enabling experimental support with role shift threads"); | |
446 | ✗ | omptm_funcs = (const omptool_event_funcs_t) { | |
447 | .init = omptm_role_shift__init, | ||
448 | .finalize = omptm_role_shift__finalize, | ||
449 | .into_mpi = omptm_role_shift__IntoBlockingCall, | ||
450 | .outof_mpi = omptm_role_shift__OutOfBlockingCall, | ||
451 | .lend_from_api = NULL, | ||
452 | .thread_begin = omptm_role_shift__thread_begin, | ||
453 | .thread_end = NULL, | ||
454 | .thread_role_shift = omptm_role_shift__thread_role_shift, | ||
455 | .parallel_begin = omptm_role_shift__parallel_begin, | ||
456 | .parallel_end = omptm_role_shift__parallel_end, | ||
457 | .into_parallel_function = NULL, | ||
458 | .into_parallel_implicit_barrier = NULL, | ||
459 | .task_create = omptm_role_shift__task_create, | ||
460 | .task_complete = omptm_role_shift__task_complete, | ||
461 | .task_switch = omptm_role_shift__task_switch, | ||
462 | }; | ||
463 | } | ||
464 | } | ||
465 | |||
466 | 4 | omptool_funcs = (const omptool_callback_funcs_t) {}; | |
467 | |||
468 | /* The following callbacks use thread_data, task_data, or parallel_data and need | ||
469 | * to be centralized, whether we use only one module or both */ | ||
470 |
4/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 1 times.
|
4 | if (talp_openmp || omptm_version != OMPTM_NONE) { |
471 | 3 | omptool_funcs.thread_begin = omptool_callback__thread_begin; | |
472 | 3 | omptool_funcs.thread_end = omptool_callback__thread_end; | |
473 | 3 | omptool_funcs.parallel_begin = omptool_callback__parallel_begin; | |
474 | 3 | omptool_funcs.parallel_end = omptool_callback__parallel_end; | |
475 | 3 | omptool_funcs.task_create = omptool_callback__task_create; | |
476 | 3 | omptool_funcs.task_schedule = omptool_callback__task_schedule; | |
477 | 3 | omptool_funcs.implicit_task = omptool_callback__implicit_task; | |
478 | 3 | omptool_funcs.sync_region = omptool_callback__sync_region; | |
479 | } | ||
480 | |||
481 | /* The following function is a custom callback and it's only used in the | ||
482 | * experimental role-shift thread manager */ | ||
483 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (omptm_version == OMPTM_ROLE_SHIFT) { |
484 | ✗ | omptool_funcs.thread_role_shift = omptm_role_shift__thread_role_shift; | |
485 | } | ||
486 | 4 | } | |
487 | |||
488 | 24 | static inline int set_ompt_callback(ompt_callbacks_t event, ompt_callback_t callback) { | |
489 | 24 | int error = 1; | |
490 |
1/7✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 24 times.
✗ Branch 7 not taken.
|
24 | switch (set_callback_fn(event, callback)) { |
491 | ✗ | case ompt_set_error: | |
492 | ✗ | verbose(VB_OMPT, "OMPT set callback %d failed.", event); | |
493 | ✗ | break; | |
494 | ✗ | case ompt_set_never: | |
495 | ✗ | verbose(VB_OMPT, "OMPT set callback %d returned 'never'. The event was " | |
496 | "registered but it will never occur or the callback will never " | ||
497 | "be invoked at runtime.", event); | ||
498 | ✗ | break; | |
499 | ✗ | case ompt_set_impossible: | |
500 | ✗ | verbose(VB_OMPT, "OMPT set callback %d returned 'impossible'. The event " | |
501 | "may occur but the tracing of it is not possible.", event); | ||
502 | ✗ | break; | |
503 | ✗ | case ompt_set_sometimes: | |
504 | ✗ | verbose(VB_OMPT, "OMPT set callback %d returned 'sometimes'. The event " | |
505 | "may occur and the callback will be invoked at runtime, but " | ||
506 | "only for an implementation-defined subset of associated event " | ||
507 | "occurrences.", event); | ||
508 | ✗ | error = 0; | |
509 | ✗ | break; | |
510 | ✗ | case ompt_set_sometimes_paired: | |
511 | ✗ | verbose(VB_OMPT, "OMPT set callback %d returned 'sometimes paired'. " | |
512 | "The event may occur and the callback will be invoked at " | ||
513 | "runtime, but only for an implementation-defined subset of " | ||
514 | "associated event occurrences. If any callback is invoked " | ||
515 | "with a begin_scope endpoint, it will be invoked also later " | ||
516 | "with and end_scope endpoint.", event); | ||
517 | ✗ | error = 0; | |
518 | ✗ | break; | |
519 | 24 | case ompt_set_always: | |
520 | 24 | error = 0; | |
521 | 24 | break; | |
522 | ✗ | default: | |
523 | ✗ | fatal("Unsupported return code at set_ompt_callback, " | |
524 | "please file a bug report."); | ||
525 | } | ||
526 | 24 | return error; | |
527 | } | ||
528 | |||
529 | 4 | static int set_ompt_callbacks(ompt_function_lookup_t lookup, omptm_version_t omptm_version, | |
530 | bool talp_openmp) { | ||
531 | |||
532 | /* Populate global structs */ | ||
533 | 4 | setup_omp_fn_ptrs(omptm_version, talp_openmp); | |
534 | |||
535 | 4 | int error = 0; | |
536 | 4 | set_callback_fn = (ompt_set_callback_t)lookup("ompt_set_callback"); | |
537 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (set_callback_fn) { |
538 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | if (omptool_funcs.thread_begin) { |
539 | 3 | error += set_ompt_callback( | |
540 | ompt_callback_thread_begin, | ||
541 | 3 | (ompt_callback_t)omptool_funcs.thread_begin); | |
542 | } | ||
543 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | if (omptool_funcs.thread_end) { |
544 | 3 | error += set_ompt_callback( | |
545 | ompt_callback_thread_end, | ||
546 | 3 | (ompt_callback_t)omptool_funcs.thread_end); | |
547 | } | ||
548 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (omptool_funcs.thread_role_shift) { |
549 | ✗ | error += set_ompt_callback( | |
550 | ompt_callback_thread_role_shift, | ||
551 | ✗ | (ompt_callback_t)omptool_funcs.thread_role_shift); | |
552 | } | ||
553 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | if (omptool_funcs.parallel_begin) { |
554 | 3 | error += set_ompt_callback( | |
555 | ompt_callback_parallel_begin, | ||
556 | 3 | (ompt_callback_t)omptool_funcs.parallel_begin); | |
557 | } | ||
558 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | if (omptool_funcs.parallel_end) { |
559 | 3 | error += set_ompt_callback( | |
560 | ompt_callback_parallel_end, | ||
561 | 3 | (ompt_callback_t)omptool_funcs.parallel_end); | |
562 | } | ||
563 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | if (omptool_funcs.task_create) { |
564 | 3 | error += set_ompt_callback( | |
565 | ompt_callback_task_create, | ||
566 | 3 | (ompt_callback_t)omptool_funcs.task_create); | |
567 | } | ||
568 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | if (omptool_funcs.task_schedule) { |
569 | 3 | error += set_ompt_callback( | |
570 | ompt_callback_task_schedule, | ||
571 | 3 | (ompt_callback_t)omptool_funcs.task_schedule); | |
572 | } | ||
573 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | if (omptool_funcs.implicit_task) { |
574 | 3 | error += set_ompt_callback( | |
575 | ompt_callback_implicit_task, | ||
576 | 3 | (ompt_callback_t)omptool_funcs.implicit_task); | |
577 | } | ||
578 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | if (omptool_funcs.sync_region) { |
579 | 3 | error += set_ompt_callback( | |
580 | ompt_callback_sync_region, | ||
581 | 3 | (ompt_callback_t)omptool_funcs.sync_region); | |
582 | } | ||
583 | } else { | ||
584 | ✗ | error = 1; | |
585 | ✗ | verbose(VB_OMPT, "Could not look up function \"ompt_set_callback\""); | |
586 | } | ||
587 | |||
588 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (!error) { |
589 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "OMPT callbacks successfully registered"); |
590 | } | ||
591 | |||
592 | 4 | return error; | |
593 | } | ||
594 | |||
595 | 4 | static void unset_ompt_callbacks(void) { | |
596 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (set_callback_fn) { |
597 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | if (omptool_funcs.thread_begin) { |
598 | 3 | set_callback_fn(ompt_callback_thread_begin, NULL); | |
599 | } | ||
600 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | if (omptool_funcs.thread_end) { |
601 | 3 | set_callback_fn(ompt_callback_thread_end, NULL); | |
602 | } | ||
603 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (omptool_funcs.thread_role_shift) { |
604 | ✗ | set_callback_fn(ompt_callback_thread_role_shift, NULL); | |
605 | } | ||
606 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | if (omptool_funcs.parallel_begin) { |
607 | 3 | set_callback_fn(ompt_callback_parallel_begin, NULL); | |
608 | } | ||
609 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | if (omptool_funcs.parallel_end) { |
610 | 3 | set_callback_fn(ompt_callback_parallel_end, NULL); | |
611 | } | ||
612 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | if (omptool_funcs.task_create) { |
613 | 3 | set_callback_fn(ompt_callback_task_create, NULL); | |
614 | } | ||
615 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | if (omptool_funcs.task_schedule) { |
616 | 3 | set_callback_fn(ompt_callback_task_schedule, NULL); | |
617 | } | ||
618 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | if (omptool_funcs.implicit_task) { |
619 | 3 | set_callback_fn(ompt_callback_implicit_task, NULL); | |
620 | } | ||
621 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
|
4 | if (omptool_funcs.sync_region) { |
622 | 3 | set_callback_fn(ompt_callback_sync_region, NULL); | |
623 | } | ||
624 | } | ||
625 | 4 | } | |
626 | |||
627 | |||
628 | /*********************************************************************************/ | ||
629 | /* OMPT start tool */ | ||
630 | /*********************************************************************************/ | ||
631 | |||
632 | static bool dlb_initialized_through_ompt = false; | ||
633 | static const char *openmp_runtime_version; | ||
634 | |||
635 | 4 | static int omptool_initialize(ompt_function_lookup_t lookup, int initial_device_num, | |
636 | ompt_data_t *tool_data) { | ||
637 | /* Parse options and get the required fields */ | ||
638 | options_t options; | ||
639 | 4 | options_init(&options, NULL); | |
640 | 4 | debug_init(&options); | |
641 | |||
642 | /* Print OMPT version and variables*/ | ||
643 | 4 | const char *omp_policy_str = getenv("OMP_WAIT_POLICY"); | |
644 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "Detected OpenMP runtime: %s", openmp_runtime_version); |
645 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "Environment variables of interest:"); |
646 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, " OMP_WAIT_POLICY: %s", omp_policy_str); |
647 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (strncmp(openmp_runtime_version, "Intel", 5) == 0 || |
648 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | strncmp(openmp_runtime_version, "LLVM", 4) == 0) { |
649 | ✗ | verbose(VB_OMPT, " KMP_LIBRARY: %s", getenv("KMP_LIBRARY")); | |
650 | ✗ | verbose(VB_OMPT, " KMP_BLOCKTIME: %s", getenv("KMP_BLOCKTIME")); | |
651 | } | ||
652 | /* when GCC implements OMPT: else if "gomp", print GOMP_SPINCOUNT */ | ||
653 | |||
654 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
4 | verbose(VB_OMPT, "DLB with OMPT support is %s", options.ompt ? "ENABLED" : "DISABLED"); |
655 | |||
656 | /* Enable OMPT only if requested */ | ||
657 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (options.ompt) { |
658 | /* Emit warning if OMP_WAIT_POLICY is not "passive" */ | ||
659 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (options.lewi && |
660 | ✗ | options.omptm_version == OMPTM_OMP5 && | |
661 | ✗ | (!omp_policy_str || strcasecmp(omp_policy_str, "passive") != 0)) { | |
662 | ✗ | warning("OMP_WAIT_POLICY value it not \"passive\". Even though the default " | |
663 | "value may be \"passive\", setting it explicitly is recommended " | ||
664 | "since it modifies other runtime related environment variables"); | ||
665 | } | ||
666 | |||
667 | 4 | int err = 0; | |
668 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (options.preinit_pid == 0) { |
669 | /* Force init */ | ||
670 | cpu_set_t process_mask; | ||
671 | 4 | sched_getaffinity(0, sizeof(process_mask), &process_mask); | |
672 | 4 | err = DLB_Init(0, &process_mask, NULL); | |
673 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (err == DLB_SUCCESS) { |
674 | 4 | dlb_initialized_through_ompt = true; | |
675 | ✗ | } else if (err != DLB_ERR_INIT) { | |
676 | ✗ | warning("DLB_Init failed: %s", DLB_Strerror(err)); | |
677 | } | ||
678 | } else { | ||
679 | /* Initialize DLB only if ompt is enabled, and | ||
680 | * remember if succeeded to finalize it when ompt_finalize is invoked. */ | ||
681 | ✗ | err = DLB_Init(0, NULL, NULL); | |
682 | ✗ | if (err == DLB_SUCCESS) { | |
683 | ✗ | dlb_initialized_through_ompt = true; | |
684 | } else { | ||
685 | ✗ | verbose(VB_OMPT, "DLB_Init: %s", DLB_Strerror(err)); | |
686 | } | ||
687 | } | ||
688 | |||
689 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "Initializing OMPT module"); |
690 | |||
691 | /* Register OMPT callbacks */ | ||
692 | 4 | err = set_ompt_callbacks(lookup, options.omptm_version, options.talp_openmp); | |
693 | |||
694 | /* If callbacks are successfully registered, initialize modules | ||
695 | * and return a non-zero value to activate the tool */ | ||
696 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (!err) { |
697 | 4 | omptool__init(thread_spd->id, &options); | |
698 | 4 | options_finalize(&options); | |
699 | 4 | return 1; | |
700 | } | ||
701 | |||
702 | /* Otherwise, finalize DLB if init succeeded */ | ||
703 | ✗ | if (dlb_initialized_through_ompt) { | |
704 | ✗ | DLB_Finalize(); | |
705 | } | ||
706 | |||
707 | ✗ | warning("DLB could not register itself as OpenMP tool"); | |
708 | } | ||
709 | |||
710 | ✗ | options_finalize(&options); | |
711 | |||
712 | ✗ | return 0; | |
713 | } | ||
714 | |||
715 | 4 | static void omptool_finalize(ompt_data_t *tool_data) { | |
716 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "Finalizing OMPT module"); |
717 | |||
718 | /* Finalize DLB if needed */ | ||
719 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (dlb_initialized_through_ompt) { |
720 | 4 | DLB_Finalize(); | |
721 | } | ||
722 | |||
723 | /* Disable OMPT callbacks */ | ||
724 | 4 | unset_ompt_callbacks(); | |
725 | |||
726 | /* Finalize modules */ | ||
727 | 4 | omptool__finalize(); | |
728 | 4 | } | |
729 | |||
730 | |||
731 | #pragma GCC visibility push(default) | ||
732 | 1 | ompt_start_tool_result_t* ompt_start_tool(unsigned int omp_version, const char *runtime_version) { | |
733 | 1 | openmp_runtime_version = runtime_version; | |
734 | static ompt_start_tool_result_t tool = { | ||
735 | .initialize = omptool_initialize, | ||
736 | .finalize = omptool_finalize, | ||
737 | .tool_data = {0} | ||
738 | }; | ||
739 | 1 | return &tool; | |
740 | } | ||
741 | #pragma GCC visibility pop | ||
742 |