GCC Code Coverage Report


Directory: src/
File: src/LB_numThreads/omptool.c
Date: 2025-11-21 10:34:40
Exec Total Coverage
Lines: 262 399 65.7%
Functions: 22 24 91.7%
Branches: 150 366 41.0%

Line Branch Exec Source
1 /*********************************************************************************/
2 /* Copyright 2009-2024 Barcelona Supercomputing Center */
3 /* */
4 /* This file is part of the DLB library. */
5 /* */
6 /* DLB is free software: you can redistribute it and/or modify */
7 /* it under the terms of the GNU Lesser General Public License as published by */
8 /* the Free Software Foundation, either version 3 of the License, or */
9 /* (at your option) any later version. */
10 /* */
11 /* DLB is distributed in the hope that it will be useful, */
12 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
13 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
14 /* GNU Lesser General Public License for more details. */
15 /* */
16 /* You should have received a copy of the GNU Lesser General Public License */
17 /* along with DLB. If not, see <https://www.gnu.org/licenses/>. */
18 /*********************************************************************************/
19
20 /* Configure second tool with:
21 * DLB_TOOL_LIBRARIES
22 * DLB_TOOL_VERBOSE_INIT
23 */
24 #define OMPT_MULTIPLEX_TOOL_NAME "DLB"
25 #include "LB_numThreads/ompt-multiplex.h"
26
27 #include "LB_numThreads/omptool.h"
28
29 #include "LB_numThreads/omp-tools.h"
30 #include "LB_numThreads/omptm_omp5.h"
31 #include "LB_numThreads/omptm_free_agents.h"
32 #include "LB_numThreads/omptm_role_shift.h"
33 #include "LB_core/spd.h"
34 #include "apis/dlb.h"
35 #include "apis/dlb_errors.h"
36 #include "support/debug.h"
37 #include "support/dlb_common.h"
38 #include "support/mask_utils.h"
39 #include "support/tracing.h"
40 #include "talp/talp_openmp.h"
41
42 #include <inttypes.h>
43 #include <unistd.h>
44 #include <string.h>
45 #include <stdbool.h>
46 #include <sys/syscall.h>
47
48
49 static omptool_callback_funcs_t omptool_funcs = {0};
50 static omptool_event_funcs_t talp_funcs = {0};
51 static omptool_event_funcs_t omptm_funcs = {0};
52 static ompt_set_callback_t set_callback_fn = NULL;
53
54
55 /*********************************************************************************/
56 /* Init & Finalize */
57 /*********************************************************************************/
58
59 4 static void omptool__init(pid_t process_id, const options_t *options) {
60
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (talp_funcs.init) {
61 2 talp_funcs.init(process_id, options);
62 }
63
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (omptm_funcs.init) {
64 2 omptm_funcs.init(process_id, options);
65 }
66 4 }
67
68 4 static void omptool__finalize(void) {
69
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (talp_funcs.finalize) {
70 2 talp_funcs.finalize();
71 }
72
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (omptm_funcs.finalize) {
73 2 omptm_funcs.finalize();
74 }
75 4 }
76
77
78 /*********************************************************************************/
79 /* "Callbacks" from MPI and DLB API */
80 /*********************************************************************************/
81
82 20 void omptool__into_blocking_call(void) {
83
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 18 times.
20 if (talp_funcs.into_mpi) {
84 2 talp_funcs.into_mpi();
85 }
86
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 18 times.
20 if (omptm_funcs.into_mpi) {
87 2 omptm_funcs.into_mpi();
88 }
89 20 }
90
91 20 void omptool__outof_blocking_call(void) {
92
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 18 times.
20 if (omptm_funcs.outof_mpi) {
93 2 omptm_funcs.outof_mpi();
94 }
95
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 18 times.
20 if (talp_funcs.outof_mpi) {
96 2 talp_funcs.outof_mpi();
97 }
98 20 }
99
100 4 void omptool__lend_from_api(void) {
101
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (omptm_funcs.lend_from_api) {
102 2 omptm_funcs.lend_from_api();
103 }
104 4 }
105
106
107 /*********************************************************************************/
108 /* OpenMP callbacks */
109 /*********************************************************************************/
110
111 4 static void omptool_callback__thread_begin(
112 ompt_thread_t thread_type,
113 ompt_data_t *thread_data) {
114
115 4 spd_enter_dlb(thread_spd);
116
117
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, "native-thread-begin event");
118
119
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (talp_funcs.thread_begin) {
120 2 talp_funcs.thread_begin(thread_type);
121 }
122
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (omptm_funcs.thread_begin) {
123 2 omptm_funcs.thread_begin(thread_type);
124 }
125 4 }
126
127 4 static void omptool_callback__thread_end(
128 ompt_data_t *thread_data) {
129
130
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, "native-thread-end event");
131
132
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (talp_funcs.thread_end) {
133 2 talp_funcs.thread_end();
134 }
135
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (omptm_funcs.thread_end) {
136 2 omptm_funcs.thread_end();
137 }
138 4 }
139
140
141 /* Parallel data storage from level 1 will be reused, other levels will be
142 * allocated on demand */
143 static omptool_parallel_data_t omptool_parallel_data_level1 = {.level = 1};
144
145 12 static void omptool_callback__parallel_begin(
146 ompt_data_t *encountering_task_data,
147 const ompt_frame_t *encountering_task_frame,
148 ompt_data_t *parallel_data,
149 unsigned int requested_parallelism,
150 int flags,
151 const void *codeptr_ra) {
152
2/2
✓ Branch 0 taken 8 times.
✓ Branch 1 taken 4 times.
12 if (flags & ompt_parallel_team) {
153 /* Obtain the nesting level of the generating parallel region and save
154 * some info in parallel_data */
155 8 omptool_parallel_data_t *omptool_parallel_data = NULL;
156
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (encountering_task_frame->exit_frame.ptr == NULL) {
157 /* No exit frame means inital task, so this parallel is level 1 */
158 4 omptool_parallel_data = &omptool_parallel_data_level1;
159
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 } else if (encountering_task_data->value > 0) {
160 /* Allocate new data */
161 4 omptool_parallel_data = malloc(sizeof(omptool_parallel_data_t));
162 4 omptool_parallel_data->level = encountering_task_data->value + 1;
163 }
164
165
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
8 ensure(omptool_parallel_data != NULL, "Unhandled case in %s",__func__);
166
167 8 omptool_parallel_data->codeptr_ra = codeptr_ra;
168 8 omptool_parallel_data->requested_parallelism = requested_parallelism;
169 8 parallel_data->ptr = omptool_parallel_data;
170
171
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (omptool_parallel_data->level == 1) {
172
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, "parallel-begin event");
173 } else {
174
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, "parallel-begin event: nesting level %u",
175 omptool_parallel_data->level);
176 }
177
178 /* Finally, invoke TALP or OMPTM if needed */
179
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (talp_funcs.parallel_begin) {
180 4 talp_funcs.parallel_begin(omptool_parallel_data);
181 }
182
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (omptm_funcs.parallel_begin) {
183 4 omptm_funcs.parallel_begin(omptool_parallel_data);
184 }
185 } else {
186 /* ompt_parallel_league ? not supported */
187 }
188 12 }
189
190 8 static void omptool_callback__parallel_end(
191 ompt_data_t *parallel_data,
192 ompt_data_t *encountering_task_data,
193 int flags,
194 const void *codeptr_ra) {
195
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
8 if (flags & ompt_parallel_team) {
196 8 omptool_parallel_data_t *omptool_parallel_data = parallel_data->ptr;
197
198
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (omptool_parallel_data->level == 1) {
199
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, "parallel-end event");
200 } else {
201
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, "parallel-end event: nesting level %u",
202 omptool_parallel_data->level);
203 }
204
205
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (talp_funcs.parallel_end) {
206 4 talp_funcs.parallel_end(omptool_parallel_data);
207 }
208
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (omptm_funcs.parallel_end) {
209 4 omptm_funcs.parallel_end(omptool_parallel_data);
210 }
211
212 /* Deallocate if needed */
213
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (omptool_parallel_data->level > 1) {
214 4 free(parallel_data->ptr);
215 }
216
217 instrument_event(BINDINGS_EVENT, sched_getcpu()+1, EVENT_BEGIN);
218 }
219 8 }
220
221 4 static void omptool_callback__task_create(
222 ompt_data_t *encountering_task_data,
223 const ompt_frame_t *encountering_task_frame,
224 ompt_data_t *new_task_data,
225 int flags,
226 int has_dependences,
227 const void *codeptr_ra) {
228
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (flags & ompt_task_explicit) {
229 /* Pass nesting level */
230 4 new_task_data->value = encountering_task_data->value;
231
232
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, "task-create event");
233
234
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (talp_funcs.task_create) {
235 2 talp_funcs.task_create();
236 }
237
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (omptm_funcs.task_create) {
238 2 omptm_funcs.task_create();
239 }
240 }
241 4 }
242
243 8 static void omptool_callback__task_schedule(
244 ompt_data_t *prior_task_data,
245 ompt_task_status_t prior_task_status,
246 ompt_data_t *next_task_data) {
247
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (prior_task_status == ompt_task_complete) {
248
249
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, "task-schedule event: task complete");
250
251
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (talp_funcs.task_complete) {
252 2 talp_funcs.task_complete();
253 }
254
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (omptm_funcs.task_complete) {
255 2 omptm_funcs.task_complete();
256 }
257
258 instrument_event(BINDINGS_EVENT, 0, EVENT_END);
259
260
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 } else if (prior_task_status == ompt_task_switch) {
261
262
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, "task-schedule event: task switch");
263
264
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (talp_funcs.task_switch) {
265 2 talp_funcs.task_switch();
266 }
267
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (omptm_funcs.task_switch) {
268 2 omptm_funcs.task_switch();
269 }
270
271 instrument_event(BINDINGS_EVENT, sched_getcpu()+1, EVENT_BEGIN);
272 }
273 8 }
274
275 8 static void omptool_callback__implicit_task(
276 ompt_scope_endpoint_t endpoint,
277 ompt_data_t *parallel_data,
278 ompt_data_t *task_data,
279 unsigned int actual_parallelism,
280 unsigned int index,
281 int flags) {
282
283
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
8 if (flags & ompt_task_implicit) {
284
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
8 if (endpoint == ompt_scope_begin
285
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
8 && parallel_data != NULL
286
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
16 && parallel_data->ptr != NULL) {
287
288 8 omptool_parallel_data_t *omptool_parallel_data = parallel_data->ptr;
289
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
8 if (index == 0) {
290 8 omptool_parallel_data->actual_parallelism = actual_parallelism;
291 }
292
293 /* Pass nesting level to implicit task */
294 8 task_data->value = omptool_parallel_data->level;
295
296
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
8 verbose(VB_OMPT, "implicit-task-begin event: into parallel function");
297
298
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (talp_funcs.into_parallel_function) {
299 4 talp_funcs.into_parallel_function(omptool_parallel_data, index);
300 }
301
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
8 if (omptm_funcs.into_parallel_function) {
302 4 omptm_funcs.into_parallel_function(omptool_parallel_data, index);
303 }
304
305 instrument_event(BINDINGS_EVENT, sched_getcpu()+1, EVENT_BEGIN);
306 }
307 else if (endpoint == ompt_scope_end) {
308 /* Note: parallel_data is NULL */
309
310 verbose(VB_OMPT, "implicit-task-end event");
311
312 if (index == 0) {
313 /* The primary thread state is already controlled in the parallel-end event */
314 } else {
315 /* This event is emitted differently across OpenMP implementations.
316 *
317 * In LLVM (tested with earlier versions until 19), team-worker threads are
318 * typically suspended during the implicit barrier at the end of a parallel
319 * region, and this event is not triggered until they resume at the start of
320 * the next parallel region.
321 *
322 * Cray PE (testing with versions 8.3 - 8.5, based on LLVM 17) invokes this
323 * event for each team-worker thread, but sometimes after the primary thread
324 * has already triggered the parallel-end event. This can introduce race
325 * conditions that cause the timing metrics to be attributed to different states,
326 * potentially leading to inconsistent efficiency results.
327 */
328 if (talp_funcs.outof_parallel_function) {
329 talp_funcs.outof_parallel_function();
330 }
331 if (omptm_funcs.outof_parallel_function) {
332 omptm_funcs.outof_parallel_function();
333 }
334
335 instrument_event(BINDINGS_EVENT, 0, EVENT_END);
336 }
337 }
338 }
339 else if (flags & ompt_task_initial) {
340 if (endpoint == ompt_scope_begin) {
341 verbose(VB_OMPT, "initial-task-begin event");
342 } else if (endpoint == ompt_scope_end) {
343 verbose(VB_OMPT, "initial-task-end event");
344 }
345 }
346 8 }
347
348
349 12 static inline void into_parallel_implicit_barrier(omptool_parallel_data_t *data) {
350
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 if (talp_funcs.into_parallel_implicit_barrier) {
351 6 talp_funcs.into_parallel_implicit_barrier(data);
352 }
353
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
12 if (omptm_funcs.into_parallel_implicit_barrier) {
354 6 omptm_funcs.into_parallel_implicit_barrier(data);
355 }
356
357 instrument_event(BINDINGS_EVENT, 0, EVENT_END);
358 12 }
359
360 static inline void into_parallel_sync(omptool_parallel_data_t *data) {
361 if (talp_funcs.into_parallel_sync) {
362 talp_funcs.into_parallel_sync(data);
363 }
364 if (omptm_funcs.into_parallel_sync) {
365 omptm_funcs.into_parallel_sync(data);
366 }
367 }
368
369 static inline void outof_parallel_sync(omptool_parallel_data_t *data) {
370 if (talp_funcs.outof_parallel_sync) {
371 talp_funcs.outof_parallel_sync(data);
372 }
373 if (omptm_funcs.outof_parallel_sync) {
374 omptm_funcs.outof_parallel_sync(data);
375 }
376 }
377
378 /* Warning: Newer LLVM versions already use the new
379 * `ompt_sync_region_barrier_implicit_parallel` enum, but older versions and
380 * Cray runtimes still use the deprecated `ompt_sync_region_barrier_implicit`.
381 * This enum value is also used for implicit barriers in a **single** region,
382 * but we can still identify the implicit barrier of a parallel region
383 * comparing the codeptr_ra, which will be NULL for all team-worker threads,
384 * and equal to the codeptr_ra from parallel_begin for the primary thread.
385 */
386 12 static void omptool_callback__sync_region(
387 ompt_sync_region_t kind,
388 ompt_scope_endpoint_t endpoint,
389 ompt_data_t *parallel_data,
390 ompt_data_t *task_data,
391 const void *codeptr_ra) {
392
2/4
✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.
12 if (parallel_data != NULL && parallel_data->ptr != NULL) {
393 12 omptool_parallel_data_t *data = parallel_data->ptr;
394
2/10
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✓ Branch 8 taken 4 times.
✗ Branch 9 not taken.
12 switch (kind) {
395 8 case ompt_sync_region_barrier_implicit:
396 /* deprecated enum, includes implicit barriers from parallel,
397 * single, workshare, etc. */
398
2/4
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 8 times.
✗ Branch 3 not taken.
8 if (codeptr_ra == NULL || codeptr_ra == data->codeptr_ra) {
399
400 /* Note: Cray OpenMP still uses this enum, but apparently
401 * only for parallel region with no tasks */
402
403 /* implicit barrier in parallel region only if codeptr_ra is NULL
404 * or equal to parallel region's */
405
406
1/2
✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
8 if (endpoint == ompt_scope_begin) {
407
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
8 verbose(VB_OMPT, "implicit-barrier-begin event: "
408 "end of parallel function (deprecated enum)");
409 8 into_parallel_implicit_barrier(data);
410 }
411 else if (endpoint == ompt_scope_end) {
412 verbose(VB_OMPT, "implicit-barrier-end event: "
413 "end of parallel function (deprecated enum)");
414 }
415
416 } else {
417
418 /* other implicit barriers */
419
420 if (endpoint == ompt_scope_begin) {
421 verbose(VB_OMPT, "implicit-barrier-begin event: sync region");
422 into_parallel_sync(data);
423 } else if (endpoint == ompt_scope_end) {
424 verbose(VB_OMPT, "implicit-barrier-end event: sync region");
425 outof_parallel_sync(data);
426 }
427 }
428 8 break;
429
430 case ompt_sync_region_barrier_explicit:
431 if (endpoint == ompt_scope_begin) {
432 verbose(VB_OMPT, "explicit-barrier-begin event");
433 into_parallel_sync(data);
434 } else if (endpoint == ompt_scope_end) {
435 verbose(VB_OMPT, "explicit-barrier-end event");
436 outof_parallel_sync(data);
437 }
438 break;
439
440 case ompt_sync_region_barrier_implementation:
441 verbose(VB_OMPT, "Implementation-Specific Barrier event");
442 if (endpoint == ompt_scope_begin) {
443 into_parallel_sync(data);
444 } else if (endpoint == ompt_scope_end) {
445 outof_parallel_sync(data);
446 }
447 break;
448
449 case ompt_sync_region_taskwait:
450 if (endpoint == ompt_scope_begin) {
451 verbose(VB_OMPT, "taskwait-begin event");
452 into_parallel_sync(data);
453 } else if (endpoint == ompt_scope_end) {
454 verbose(VB_OMPT, "taskwait-end event");
455 outof_parallel_sync(data);
456 }
457 break;
458
459 case ompt_sync_region_taskgroup:
460 if (endpoint == ompt_scope_begin) {
461 verbose(VB_OMPT, "taskgroup-begin event");
462 into_parallel_sync(data);
463 } else if (endpoint == ompt_scope_end) {
464 verbose(VB_OMPT, "taskgroup-end event");
465 outof_parallel_sync(data);
466 }
467 break;
468
469 case ompt_sync_region_barrier_implicit_workshare:
470 if (endpoint == ompt_scope_begin) {
471 verbose(VB_OMPT, "implicit-barrier-begin event: workshare");
472 into_parallel_sync(data);
473 } else if (endpoint == ompt_scope_end) {
474 verbose(VB_OMPT, "implicit-barrier-end event: workshare");
475 outof_parallel_sync(data);
476 }
477 break;
478
479 case ompt_sync_region_barrier_teams:
480 if (endpoint == ompt_scope_begin) {
481 verbose(VB_OMPT, "implicit-barrier-begin event: teams");
482 into_parallel_sync(data);
483 } else if (endpoint == ompt_scope_end) {
484 verbose(VB_OMPT, "implicit-barrier-end event: teams");
485 outof_parallel_sync(data);
486 }
487 break;
488
489 case ompt_sync_region_reduction:
490 /* For now we don't do anything with reductions */
491 break;
492
493 4 case ompt_sync_region_barrier_implicit_parallel:
494 /* new enum in OMP 5.1 */
495
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (endpoint == ompt_scope_begin) {
496
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, "implicit-barrier-begin event: end of parallel function");
497 4 into_parallel_implicit_barrier(data);
498 }
499 else if (endpoint == ompt_scope_end) {
500 verbose(VB_OMPT, "implicit-barrier-end event: end of parallel function");
501 }
502 4 break;
503
504 default:
505 break;
506 }
507 }
508 12 }
509
510
511 /*********************************************************************************/
512 /* Function pointers setup */
513 /*********************************************************************************/
514
515 /* For testing purposes only, override function pointers */
516 static const omptool_event_funcs_t *test_talp_funcs = NULL;
517 static const omptool_event_funcs_t *test_omptm_funcs = NULL;
518 1 void omptool_testing__setup_event_fn_ptrs(
519 const omptool_event_funcs_t *talp_fns,
520 const omptool_event_funcs_t *omptm_fns) {
521 1 test_talp_funcs = talp_fns;
522 1 test_omptm_funcs = omptm_fns;
523 1 }
524
525 4 static void setup_omp_fn_ptrs(omptm_version_t omptm_version, bool talp_openmp) {
526
527 /* talp_funcs */
528
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (talp_openmp) {
529
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 verbose(VB_OMPT, "Enabling OMPT support for TALP");
530
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
2 if (test_talp_funcs) {
531 2 talp_funcs = *test_talp_funcs;
532 } else {
533 talp_funcs = talp_events_vtable;
534 }
535 } else {
536 2 talp_funcs = (const omptool_event_funcs_t) {};
537 }
538
539 /* omptm_funcs */
540
2/5
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
4 switch(omptm_version) {
541 2 case OMPTM_NONE:
542 2 omptm_funcs = (const omptool_event_funcs_t) {};
543 2 break;
544 2 case OMPTM_OMP5:
545
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 verbose(VB_OMPT, "Enabling OMPT support for OpenMP 5.0");
546
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
2 if (test_omptm_funcs) {
547 2 omptm_funcs = *test_omptm_funcs;
548 } else {
549 omptm_funcs = omptm_omp5_events_vtable;
550 }
551 2 break;
552 case OMPTM_FREE_AGENTS:
553 verbose(VB_OMPT, "Enabling experimental support with free agent threads");
554 if (test_omptm_funcs) {
555 omptm_funcs = *test_omptm_funcs;
556 } else {
557 omptm_funcs = omptm_free_agents_events_vtable;
558 }
559 break;
560 case OMPTM_ROLE_SHIFT:
561 verbose(VB_OMPT, "Enabling experimental support with role shift threads");
562 if (test_omptm_funcs) {
563 omptm_funcs = *test_omptm_funcs;
564 } else {
565 omptm_funcs = omptm_role_shift_events_vtable;
566 }
567 break;
568 }
569
570 /* The following callbacks are centralized and always registered, even if both
571 * omptm and talp are disabled. It is deciced this way to allow event tracing and
572 * debugging events entry via verbose messages. */
573 4 omptool_funcs = (const omptool_callback_funcs_t) {
574 .thread_begin = omptool_callback__thread_begin,
575 .thread_end = omptool_callback__thread_end,
576 .parallel_begin = omptool_callback__parallel_begin,
577 .parallel_end = omptool_callback__parallel_end,
578 .task_create = omptool_callback__task_create,
579 .task_schedule = omptool_callback__task_schedule,
580 .implicit_task = omptool_callback__implicit_task,
581 .sync_region = omptool_callback__sync_region,
582 };
583
584 /* The following function is a custom callback and it's only used in the
585 * experimental role-shift thread manager */
586
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 if (omptm_version == OMPTM_ROLE_SHIFT) {
587 omptool_funcs.thread_role_shift = omptm_role_shift__thread_role_shift;
588 }
589 4 }
590
591 32 static inline int set_ompt_callback(ompt_callbacks_t event, ompt_callback_t callback) {
592 32 int error = 1;
593
1/7
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 32 times.
✗ Branch 7 not taken.
32 switch (set_callback_fn(event, callback)) {
594 case ompt_set_error:
595 verbose(VB_OMPT, "OMPT set callback %d failed.", event);
596 break;
597 case ompt_set_never:
598 verbose(VB_OMPT, "OMPT set callback %d returned 'never'. The event was "
599 "registered but it will never occur or the callback will never "
600 "be invoked at runtime.", event);
601 break;
602 case ompt_set_impossible:
603 verbose(VB_OMPT, "OMPT set callback %d returned 'impossible'. The event "
604 "may occur but the tracing of it is not possible.", event);
605 break;
606 case ompt_set_sometimes:
607 verbose(VB_OMPT, "OMPT set callback %d returned 'sometimes'. The event "
608 "may occur and the callback will be invoked at runtime, but "
609 "only for an implementation-defined subset of associated event "
610 "occurrences.", event);
611 error = 0;
612 break;
613 case ompt_set_sometimes_paired:
614 verbose(VB_OMPT, "OMPT set callback %d returned 'sometimes paired'. "
615 "The event may occur and the callback will be invoked at "
616 "runtime, but only for an implementation-defined subset of "
617 "associated event occurrences. If any callback is invoked "
618 "with a begin_scope endpoint, it will be invoked also later "
619 "with and end_scope endpoint.", event);
620 error = 0;
621 break;
622 32 case ompt_set_always:
623 32 error = 0;
624 32 break;
625 default:
626 fatal("Unsupported return code at set_ompt_callback, "
627 "please file a bug report.");
628 }
629 32 return error;
630 }
631
632 4 static int set_ompt_callbacks(ompt_function_lookup_t lookup, omptm_version_t omptm_version,
633 bool talp_openmp) {
634
635 /* Populate global structs */
636 4 setup_omp_fn_ptrs(omptm_version, talp_openmp);
637
638 4 int error = 0;
639 4 set_callback_fn = (ompt_set_callback_t)lookup("ompt_set_callback");
640
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (set_callback_fn) {
641
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (omptool_funcs.thread_begin) {
642 4 error += set_ompt_callback(
643 ompt_callback_thread_begin,
644 4 (ompt_callback_t)omptool_funcs.thread_begin);
645 }
646
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (omptool_funcs.thread_end) {
647 4 error += set_ompt_callback(
648 ompt_callback_thread_end,
649 4 (ompt_callback_t)omptool_funcs.thread_end);
650 }
651
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 if (omptool_funcs.thread_role_shift) {
652 error += set_ompt_callback(
653 ompt_callback_thread_role_shift,
654 (ompt_callback_t)omptool_funcs.thread_role_shift);
655 }
656
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (omptool_funcs.parallel_begin) {
657 4 error += set_ompt_callback(
658 ompt_callback_parallel_begin,
659 4 (ompt_callback_t)omptool_funcs.parallel_begin);
660 }
661
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (omptool_funcs.parallel_end) {
662 4 error += set_ompt_callback(
663 ompt_callback_parallel_end,
664 4 (ompt_callback_t)omptool_funcs.parallel_end);
665 }
666
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (omptool_funcs.task_create) {
667 4 error += set_ompt_callback(
668 ompt_callback_task_create,
669 4 (ompt_callback_t)omptool_funcs.task_create);
670 }
671
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (omptool_funcs.task_schedule) {
672 4 error += set_ompt_callback(
673 ompt_callback_task_schedule,
674 4 (ompt_callback_t)omptool_funcs.task_schedule);
675 }
676
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (omptool_funcs.implicit_task) {
677 4 error += set_ompt_callback(
678 ompt_callback_implicit_task,
679 4 (ompt_callback_t)omptool_funcs.implicit_task);
680 }
681
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (omptool_funcs.sync_region) {
682 4 error += set_ompt_callback(
683 ompt_callback_sync_region,
684 4 (ompt_callback_t)omptool_funcs.sync_region);
685 }
686 } else {
687 error = 1;
688 verbose(VB_OMPT, "Could not look up function \"ompt_set_callback\"");
689 }
690
691
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (!error) {
692
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, "OMPT callbacks successfully registered");
693 }
694
695 4 return error;
696 }
697
698 4 static void unset_ompt_callbacks(void) {
699
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (set_callback_fn) {
700
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (omptool_funcs.thread_begin) {
701 4 set_callback_fn(ompt_callback_thread_begin, NULL);
702 }
703
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (omptool_funcs.thread_end) {
704 4 set_callback_fn(ompt_callback_thread_end, NULL);
705 }
706
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 if (omptool_funcs.thread_role_shift) {
707 set_callback_fn(ompt_callback_thread_role_shift, NULL);
708 }
709
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (omptool_funcs.parallel_begin) {
710 4 set_callback_fn(ompt_callback_parallel_begin, NULL);
711 }
712
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (omptool_funcs.parallel_end) {
713 4 set_callback_fn(ompt_callback_parallel_end, NULL);
714 }
715
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (omptool_funcs.task_create) {
716 4 set_callback_fn(ompt_callback_task_create, NULL);
717 }
718
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (omptool_funcs.task_schedule) {
719 4 set_callback_fn(ompt_callback_task_schedule, NULL);
720 }
721
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (omptool_funcs.implicit_task) {
722 4 set_callback_fn(ompt_callback_implicit_task, NULL);
723 }
724
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (omptool_funcs.sync_region) {
725 4 set_callback_fn(ompt_callback_sync_region, NULL);
726 }
727 }
728 4 }
729
730
731 /*********************************************************************************/
732 /* OMPT start tool */
733 /*********************************************************************************/
734
735 static bool dlb_initialized_through_ompt = false;
736 static bool omptool_initialized = false;
737 static const char *openmp_runtime_version;
738
739 4 static int omptool_initialize(ompt_function_lookup_t lookup, int initial_device_num,
740 ompt_data_t *tool_data) {
741 /* Parse options and get the required fields */
742 options_t options;
743 4 options_init(&options, NULL);
744 4 debug_init(&options);
745
746 /* Print OMPT version and variables*/
747 4 const char *omp_policy_str = getenv("OMP_WAIT_POLICY");
748
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, "Detected OpenMP runtime: %s", openmp_runtime_version);
749
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, "Environment variables of interest:");
750
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, " OMP_WAIT_POLICY: %s", omp_policy_str);
751
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (strncmp(openmp_runtime_version, "Intel", 5) == 0 ||
752
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 strncmp(openmp_runtime_version, "LLVM", 4) == 0) {
753 verbose(VB_OMPT, " KMP_LIBRARY: %s", getenv("KMP_LIBRARY"));
754 verbose(VB_OMPT, " KMP_BLOCKTIME: %s", getenv("KMP_BLOCKTIME"));
755 }
756 /* when GCC implements OMPT: else if "gomp", print GOMP_SPINCOUNT */
757
758
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
4 verbose(VB_OMPT, "DLB with OMPT support is %s", options.ompt ? "ENABLED" : "DISABLED");
759
760 /* Enable OMPT only if requested */
761
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (options.ompt) {
762
763 /* Disable --ompt-thread-manager if no LeWI or DROM */
764
3/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
4 if (!(options.lewi || options.drom)) {
765 2 options.omptm_version = OMPTM_NONE;
766 }
767
768 /* Emit warning if OMP_WAIT_POLICY is not "passive" */
769
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (options.lewi &&
770
2/4
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
2 options.omptm_version == OMPTM_OMP5 &&
771 (!omp_policy_str || strcasecmp(omp_policy_str, "passive") != 0)) {
772 2 warning("OMP_WAIT_POLICY value it not \"passive\". Even though the default "
773 "value may be \"passive\", setting it explicitly is recommended "
774 "since it modifies other runtime related environment variables");
775 }
776
777 4 int err = 0;
778
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (options.preinit_pid == 0) {
779 /* Force init */
780 cpu_set_t process_mask;
781 4 sched_getaffinity(0, sizeof(process_mask), &process_mask);
782 4 err = DLB_Init(0, &process_mask, NULL);
783
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (err == DLB_SUCCESS) {
784 4 dlb_initialized_through_ompt = true;
785 } else if (err != DLB_ERR_INIT) {
786 warning("DLB_Init failed: %s", DLB_Strerror(err));
787 }
788 } else {
789 /* Initialize DLB only if ompt is enabled, and
790 * remember if succeeded to finalize it when ompt_finalize is invoked. */
791 err = DLB_Init(0, NULL, NULL);
792 if (err == DLB_SUCCESS) {
793 dlb_initialized_through_ompt = true;
794 } else {
795 verbose(VB_OMPT, "DLB_Init: %s", DLB_Strerror(err));
796 }
797 }
798
799
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, "Initializing OMPT module");
800
801 /* Register OMPT callbacks */
802 4 err = set_ompt_callbacks(lookup, options.omptm_version, options.talp_openmp);
803
804 /* If callbacks are successfully registered, initialize modules
805 * and return a non-zero value to activate the tool */
806
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (!err) {
807 4 omptool__init(thread_spd->id, &options);
808 4 options_finalize(&options);
809 /* we save the thread ID that initialized OMPT */
810 4 tool_data->value = syscall(SYS_gettid);
811 4 omptool_initialized = true;
812 4 return 1;
813 }
814
815 /* Otherwise, finalize DLB if init succeeded */
816 if (dlb_initialized_through_ompt) {
817 DLB_Finalize();
818 dlb_initialized_through_ompt = false;
819 }
820
821 warning("DLB could not register itself as OpenMP tool");
822 }
823
824 options_finalize(&options);
825
826 return 0;
827 }
828
829 4 static void omptool_finalize(ompt_data_t *tool_data) {
830
831 /* Protect finalization against forks and other double calls */
832
1/2
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
4 if (tool_data->value == (uint64_t)syscall(SYS_gettid)
833
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 && omptool_initialized) {
834
835
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, "Finalizing OMPT module");
836
837 /* Finalize DLB if needed */
838
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (dlb_initialized_through_ompt) {
839 4 DLB_Finalize();
840 }
841
842 /* Disable OMPT callbacks */
843 4 unset_ompt_callbacks();
844
845 /* Finalize modules */
846 4 omptool__finalize();
847
848 4 omptool_initialized = false;
849 4 dlb_initialized_through_ompt = false;
850 }
851 4 }
852
853
854 #ifndef OMPT_MULTIPLEX_H
855 DLB_EXPORT_SYMBOL
856 #endif
857 1 ompt_start_tool_result_t* ompt_start_tool(unsigned int omp_version, const char *runtime_version) {
858 1 openmp_runtime_version = runtime_version;
859 static ompt_start_tool_result_t tool = {
860 .initialize = omptool_initialize,
861 .finalize = omptool_finalize,
862 .tool_data = {0}
863 };
864 1 return &tool;
865 }
866