GCC Code Coverage Report


Directory: src/
File: src/LB_numThreads/omptool.c
Date: 2024-11-22 17:07:10
Exec Total Coverage
Lines: 240 312 76.9%
Functions: 21 21 100.0%
Branches: 148 259 57.1%

Line Branch Exec Source
1 /*********************************************************************************/
2 /* Copyright 2009-2024 Barcelona Supercomputing Center */
3 /* */
4 /* This file is part of the DLB library. */
5 /* */
6 /* DLB is free software: you can redistribute it and/or modify */
7 /* it under the terms of the GNU Lesser General Public License as published by */
8 /* the Free Software Foundation, either version 3 of the License, or */
9 /* (at your option) any later version. */
10 /* */
11 /* DLB is distributed in the hope that it will be useful, */
12 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
13 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
14 /* GNU Lesser General Public License for more details. */
15 /* */
16 /* You should have received a copy of the GNU Lesser General Public License */
17 /* along with DLB. If not, see <https://www.gnu.org/licenses/>. */
18 /*********************************************************************************/
19
20 /* Configure second tool with:
21 * DLB_TOOL_LIBRARIES
22 * DLB_TOOL_VERBOSE_INIT
23 */
24 #define OMPT_MULTIPLEX_TOOL_NAME "DLB"
25 #include "LB_numThreads/ompt-multiplex.h"
26
27 #include "LB_numThreads/omptool.h"
28
29 #include "LB_numThreads/omp-tools.h"
30 #include "LB_numThreads/omptm_omp5.h"
31 #include "LB_numThreads/omptm_free_agents.h"
32 #include "LB_numThreads/omptm_role_shift.h"
33 #include "LB_core/DLB_talp.h"
34 #include "LB_core/spd.h"
35 #include "apis/dlb.h"
36 #include "support/debug.h"
37 #include "support/mask_utils.h"
38 #include "support/tracing.h"
39 #include "apis/dlb_errors.h"
40
41 #include <inttypes.h>
42 #include <unistd.h>
43 #include <string.h>
44 #include <stdbool.h>
45
46
47 static omptool_callback_funcs_t omptool_funcs = {0};
48 static omptool_event_funcs_t talp_funcs = {0};
49 static omptool_event_funcs_t omptm_funcs = {0};
50 static ompt_set_callback_t set_callback_fn = NULL;
51
52
53 /*********************************************************************************/
54 /* Init & Finalize */
55 /*********************************************************************************/
56
57 4 static void omptool__init(pid_t process_id, const options_t *options) {
58
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (talp_funcs.init) {
59 2 talp_funcs.init(process_id, options);
60 }
61
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (omptm_funcs.init) {
62 2 omptm_funcs.init(process_id, options);
63 }
64 4 }
65
66 4 static void omptool__finalize(void) {
67
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (talp_funcs.finalize) {
68 2 talp_funcs.finalize();
69 }
70
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 if (omptm_funcs.finalize) {
71 2 omptm_funcs.finalize();
72 }
73 4 }
74
75
76 /*********************************************************************************/
77 /* "Callbacks" from MPI and DLB API */
78 /*********************************************************************************/
79
80 18 void omptool__into_blocking_call(void) {
81
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 16 times.
18 if (talp_funcs.into_mpi) {
82 2 talp_funcs.into_mpi();
83 }
84
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 16 times.
18 if (omptm_funcs.into_mpi) {
85 2 omptm_funcs.into_mpi();
86 }
87 18 }
88
89 18 void omptool__outof_blocking_call(void) {
90
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 16 times.
18 if (omptm_funcs.outof_mpi) {
91 2 omptm_funcs.outof_mpi();
92 }
93
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 16 times.
18 if (talp_funcs.outof_mpi) {
94 2 talp_funcs.outof_mpi();
95 }
96 18 }
97
98 3 void omptool__lend_from_api(void) {
99
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (omptm_funcs.lend_from_api) {
100 2 omptm_funcs.lend_from_api();
101 }
102 3 }
103
104
105 /*********************************************************************************/
106 /* OpenMP callbacks */
107 /*********************************************************************************/
108
109 3 static void omptool_callback__thread_begin(
110 ompt_thread_t thread_type,
111 ompt_data_t *thread_data) {
112
113 3 spd_enter_dlb(thread_spd);
114
115
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (talp_funcs.thread_begin) {
116 2 talp_funcs.thread_begin(thread_type);
117 }
118
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (omptm_funcs.thread_begin) {
119 2 omptm_funcs.thread_begin(thread_type);
120 }
121 3 }
122
123 3 static void omptool_callback__thread_end(
124 ompt_data_t *thread_data) {
125
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (talp_funcs.thread_end) {
126 2 talp_funcs.thread_end();
127 }
128
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (omptm_funcs.thread_end) {
129 2 omptm_funcs.thread_end();
130 }
131 3 }
132
133
134 /* Parallel data storage from level 1 will be reused, other levels will be
135 * allocated on demand */
136 static omptool_parallel_data_t omptool_parallel_data_level1 = {.level = 1};
137
138 9 static void omptool_callback__parallel_begin(
139 ompt_data_t *encountering_task_data,
140 const ompt_frame_t *encountering_task_frame,
141 ompt_data_t *parallel_data,
142 unsigned int requested_parallelism,
143 int flags,
144 const void *codeptr_ra) {
145
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 3 times.
9 if (flags & ompt_parallel_team) {
146 /* Obtain the nesting level of the generating parallel region and save
147 * some info in parallel_data */
148 6 omptool_parallel_data_t *omptool_parallel_data = NULL;
149
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 if (encountering_task_frame->exit_frame.ptr == NULL) {
150 /* No exit frame means inital task, so this parallel is level 1 */
151 3 omptool_parallel_data = &omptool_parallel_data_level1;
152
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 } else if (encountering_task_data->value > 0) {
153 /* Allocate new data */
154 3 omptool_parallel_data = malloc(sizeof(omptool_parallel_data_t));
155 3 omptool_parallel_data->level = encountering_task_data->value + 1;
156 }
157
158
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 ensure(omptool_parallel_data != NULL, "Unhandled case in %s",__func__);
159
160 6 omptool_parallel_data->codeptr_ra = codeptr_ra;
161 6 omptool_parallel_data->requested_parallelism = requested_parallelism;
162 6 parallel_data->ptr = omptool_parallel_data;
163
164 /* Finally, invoke TALP or OMPTM if needed */
165
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.
6 if (talp_funcs.parallel_begin) {
166 4 talp_funcs.parallel_begin(omptool_parallel_data);
167 }
168
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.
6 if (omptm_funcs.parallel_begin) {
169 4 omptm_funcs.parallel_begin(omptool_parallel_data);
170 }
171 } else {
172 /* ompt_parallel_league ? not supported */
173 }
174 9 }
175
176 6 static void omptool_callback__parallel_end(
177 ompt_data_t *parallel_data,
178 ompt_data_t *encountering_task_data,
179 int flags,
180 const void *codeptr_ra) {
181
1/2
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
6 if (flags & ompt_parallel_team) {
182 6 omptool_parallel_data_t *omptool_parallel_data = parallel_data->ptr;
183
184
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.
6 if (talp_funcs.parallel_end) {
185 4 talp_funcs.parallel_end(omptool_parallel_data);
186 }
187
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.
6 if (omptm_funcs.parallel_end) {
188 4 omptm_funcs.parallel_end(omptool_parallel_data);
189 }
190
191 /* Deallocate if needed */
192
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 if (omptool_parallel_data->level > 1) {
193 3 free(parallel_data->ptr);
194 }
195 }
196 6 }
197
198 3 static void omptool_callback__task_create(
199 ompt_data_t *encountering_task_data,
200 const ompt_frame_t *encountering_task_frame,
201 ompt_data_t *new_task_data,
202 int flags,
203 int has_dependences,
204 const void *codeptr_ra) {
205
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (flags & ompt_task_explicit) {
206 /* Pass nesting level */
207 3 new_task_data->value = encountering_task_data->value;
208
209
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (talp_funcs.task_create) {
210 2 talp_funcs.task_create();
211 }
212
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (omptm_funcs.task_create) {
213 2 omptm_funcs.task_create();
214 }
215 }
216 3 }
217
218 6 static void omptool_callback__task_schedule(
219 ompt_data_t *prior_task_data,
220 ompt_task_status_t prior_task_status,
221 ompt_data_t *next_task_data) {
222
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 if (prior_task_status == ompt_task_complete) {
223
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (talp_funcs.task_complete) {
224 2 talp_funcs.task_complete();
225 }
226
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (omptm_funcs.task_complete) {
227 2 omptm_funcs.task_complete();
228 }
229 instrument_event(BINDINGS_EVENT, 0, EVENT_END);
230
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 } else if (prior_task_status == ompt_task_switch) {
231
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (talp_funcs.task_switch) {
232 2 talp_funcs.task_switch();
233 }
234
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (omptm_funcs.task_switch) {
235 2 omptm_funcs.task_switch();
236 }
237 instrument_event(BINDINGS_EVENT, sched_getcpu()+1, EVENT_BEGIN);
238 }
239 6 }
240
241 6 static void omptool_callback__implicit_task(
242 ompt_scope_endpoint_t endpoint,
243 ompt_data_t *parallel_data,
244 ompt_data_t *task_data,
245 unsigned int actual_parallelism,
246 unsigned int index,
247 int flags) {
248
1/2
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
6 if (endpoint == ompt_scope_begin
249
1/2
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
6 && parallel_data != NULL
250
1/2
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
6 && parallel_data->ptr != NULL
251
1/2
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
6 && flags & ompt_task_implicit) {
252
253 6 omptool_parallel_data_t *omptool_parallel_data = parallel_data->ptr;
254
1/2
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
6 if (index == 0) {
255 6 omptool_parallel_data->actual_parallelism = actual_parallelism;
256 }
257
258 /* Pass nesting level to implicit task */
259 6 task_data->value = omptool_parallel_data->level;
260
261
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.
6 if (talp_funcs.into_parallel_function) {
262 4 talp_funcs.into_parallel_function(omptool_parallel_data, index);
263 }
264
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.
6 if (omptm_funcs.into_parallel_function) {
265 4 omptm_funcs.into_parallel_function(omptool_parallel_data, index);
266 }
267
268 instrument_event(BINDINGS_EVENT, sched_getcpu()+1, EVENT_BEGIN);
269 }
270 6 }
271
272 /* Warning: at the time of writing (Sep 2023), LLVM upstream still uses the
273 * deprecated kind ompt_sync_region_barrier_implicit for the
274 * sync-region-implicit-parallel event, so we cannot yet remove the deprecated
275 * enum. This enum value is used also for implicit barriers in a **single**
276 * region, but we can still identify the implicit barrier of a parallel
277 * comparing the codeptr_ra, which will be NULL for all team-worker threads,
278 * and equal to the codeptr_ra from parallel_begin for the primary thread.
279 */
280 9 static void omptool_callback__sync_region(
281 ompt_sync_region_t kind,
282 ompt_scope_endpoint_t endpoint,
283 ompt_data_t *parallel_data,
284 ompt_data_t *task_data,
285 const void *codeptr_ra) {
286
2/4
✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 9 times.
✗ Branch 3 not taken.
9 if (parallel_data != NULL && parallel_data->ptr != NULL) {
287 9 omptool_parallel_data_t *data = parallel_data->ptr;
288
2/4
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 3 times.
✗ Branch 3 not taken.
9 switch (kind) {
289 6 case ompt_sync_region_barrier_implicit:
290 /* deprecated enum, includes implicit barriers from parallel,
291 * single, workshare, etc. */
292
1/2
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
6 if (endpoint == ompt_scope_begin
293
1/2
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
6 && (codeptr_ra == NULL
294
1/2
✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
6 || codeptr_ra == data->codeptr_ra)) {
295 /* barrier of implicit parallel only if codeptr_ra is NULL
296 * or equal to parallel region's */
297
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.
6 if (talp_funcs.into_parallel_implicit_barrier) {
298 4 talp_funcs.into_parallel_implicit_barrier(data);
299 }
300
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 2 times.
6 if (omptm_funcs.into_parallel_implicit_barrier) {
301 4 omptm_funcs.into_parallel_implicit_barrier(data);
302 }
303 instrument_event(BINDINGS_EVENT, 0, EVENT_END);
304 } else if (endpoint == ompt_scope_begin) {
305 if (talp_funcs.into_parallel_sync) {
306 talp_funcs.into_parallel_sync(data);
307 }
308 if (omptm_funcs.into_parallel_sync) {
309 omptm_funcs.into_parallel_sync(data);
310 }
311 } else if (endpoint == ompt_scope_end) {
312 if (talp_funcs.outof_parallel_sync) {
313 talp_funcs.outof_parallel_sync(data);
314 }
315 if (omptm_funcs.outof_parallel_sync) {
316 omptm_funcs.outof_parallel_sync(data);
317 }
318 }
319 6 break;
320 case ompt_sync_region_barrier_explicit:
321 DLB_FALLTHROUGH;
322 case ompt_sync_region_taskwait:
323 DLB_FALLTHROUGH;
324 case ompt_sync_region_taskgroup:
325 if (endpoint == ompt_scope_begin) {
326 if (talp_funcs.into_parallel_sync) {
327 talp_funcs.into_parallel_sync(data);
328 }
329 if (omptm_funcs.into_parallel_sync) {
330 omptm_funcs.into_parallel_sync(data);
331 }
332 } else if (endpoint == ompt_scope_end) {
333 if (talp_funcs.outof_parallel_sync) {
334 talp_funcs.outof_parallel_sync(data);
335 }
336 if (omptm_funcs.outof_parallel_sync) {
337 omptm_funcs.outof_parallel_sync(data);
338 }
339 }
340 break;
341 3 case ompt_sync_region_barrier_implicit_parallel:
342 /* new enum in OMP 5.1 not yet implemented in any known runtime */
343
1/2
✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
3 if (endpoint == ompt_scope_begin) {
344
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (talp_funcs.into_parallel_implicit_barrier) {
345 2 talp_funcs.into_parallel_implicit_barrier(data);
346 }
347
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
3 if (omptm_funcs.into_parallel_implicit_barrier) {
348 2 omptm_funcs.into_parallel_implicit_barrier(data);
349 }
350 instrument_event(BINDINGS_EVENT, 0, EVENT_END);
351 }
352 3 break;
353 default:
354 break;
355 }
356 }
357 9 }
358
359
360 /*********************************************************************************/
361 /* Function pointers setup */
362 /*********************************************************************************/
363
364 /* For testing purposes only, predefine function pointers */
365 static bool test_funcs_init = false;
366 3 void omptool_testing__setup_event_fn_ptrs(
367 const omptool_event_funcs_t *talp_test_funcs,
368 const omptool_event_funcs_t *omptm_test_funcs) {
369 3 test_funcs_init = true;
370 3 talp_funcs = *talp_test_funcs;
371 3 omptm_funcs = *omptm_test_funcs;
372 3 }
373
374 4 static void setup_omp_fn_ptrs(omptm_version_t omptm_version, bool talp_openmp) {
375
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 3 times.
4 if (!test_funcs_init) {
376 /* talp_funcs */
377
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (talp_openmp) {
378 verbose(VB_OMPT, "Enabling OMPT support for TALP");
379 talp_funcs = (const omptool_event_funcs_t) {
380 .init = talp_openmp_init,
381 .finalize = talp_openmp_finalize,
382 .into_mpi = NULL,
383 .outof_mpi = NULL,
384 .lend_from_api = NULL,
385 .thread_begin = talp_openmp_thread_begin,
386 .thread_end = talp_openmp_thread_end,
387 .thread_role_shift = NULL,
388 .parallel_begin = talp_openmp_parallel_begin,
389 .parallel_end = talp_openmp_parallel_end,
390 .into_parallel_function
391 = talp_openmp_into_parallel_function,
392 .into_parallel_implicit_barrier
393 = talp_openmp_into_parallel_implicit_barrier,
394 .into_parallel_sync = talp_openmp_into_parallel_sync,
395 .outof_parallel_sync = talp_openmp_outof_parallel_sync,
396 .task_create = talp_openmp_task_create,
397 .task_complete = talp_openmp_task_complete,
398 .task_switch = talp_openmp_task_switch,
399 };
400 }
401
402 /* omptm_funcs */
403
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 if (omptm_version == OMPTM_OMP5) {
404 omptm_funcs = (const omptool_event_funcs_t) {
405 .init = omptm_omp5__init,
406 .finalize = omptm_omp5__finalize,
407 .into_mpi = omptm_omp5__IntoBlockingCall,
408 .outof_mpi = omptm_omp5__OutOfBlockingCall,
409 .lend_from_api = omptm_omp5__lend_from_api,
410 .thread_begin = NULL,
411 .thread_end = NULL,
412 .thread_role_shift = NULL,
413 .parallel_begin = omptm_omp5__parallel_begin,
414 .parallel_end = omptm_omp5__parallel_end,
415 .into_parallel_function
416 = omptm_omp5__into_parallel_function,
417 .into_parallel_implicit_barrier
418 = omptm_omp5__into_parallel_implicit_barrier,
419 .task_create = NULL,
420 .task_complete = NULL,
421 .task_switch = NULL,
422 };
423
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 } else if (omptm_version == OMPTM_FREE_AGENTS) {
424 verbose(VB_OMPT, "Enabling experimental support with free agent threads");
425 omptm_funcs = (const omptool_event_funcs_t) {
426 .init = omptm_free_agents__init,
427 .finalize = omptm_free_agents__finalize,
428 .into_mpi = omptm_free_agents__IntoBlockingCall,
429 .outof_mpi = omptm_free_agents__OutOfBlockingCall,
430 .lend_from_api = NULL,
431 .thread_begin = omptm_free_agents__thread_begin,
432 .thread_end = NULL,
433 .thread_role_shift = NULL,
434 .parallel_begin = omptm_free_agents__parallel_begin,
435 .parallel_end = omptm_free_agents__parallel_end,
436 .into_parallel_function
437 = omptm_free_agents__into_parallel_function,
438 .into_parallel_implicit_barrier = NULL,
439 .task_create = omptm_free_agents__task_create,
440 .task_complete = omptm_free_agents__task_complete,
441 .task_switch = omptm_free_agents__task_switch,
442 };
443 }
444
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
1 else if (omptm_version == OMPTM_ROLE_SHIFT) {
445 verbose(VB_OMPT, "Enabling experimental support with role shift threads");
446 omptm_funcs = (const omptool_event_funcs_t) {
447 .init = omptm_role_shift__init,
448 .finalize = omptm_role_shift__finalize,
449 .into_mpi = omptm_role_shift__IntoBlockingCall,
450 .outof_mpi = omptm_role_shift__OutOfBlockingCall,
451 .lend_from_api = NULL,
452 .thread_begin = omptm_role_shift__thread_begin,
453 .thread_end = NULL,
454 .thread_role_shift = omptm_role_shift__thread_role_shift,
455 .parallel_begin = omptm_role_shift__parallel_begin,
456 .parallel_end = omptm_role_shift__parallel_end,
457 .into_parallel_function = NULL,
458 .into_parallel_implicit_barrier = NULL,
459 .task_create = omptm_role_shift__task_create,
460 .task_complete = omptm_role_shift__task_complete,
461 .task_switch = omptm_role_shift__task_switch,
462 };
463 }
464 }
465
466 4 omptool_funcs = (const omptool_callback_funcs_t) {};
467
468 /* The following callbacks use thread_data, task_data, or parallel_data and need
469 * to be centralized, whether we use only one module or both */
470
4/4
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 1 times.
✓ Branch 3 taken 1 times.
4 if (talp_openmp || omptm_version != OMPTM_NONE) {
471 3 omptool_funcs.thread_begin = omptool_callback__thread_begin;
472 3 omptool_funcs.thread_end = omptool_callback__thread_end;
473 3 omptool_funcs.parallel_begin = omptool_callback__parallel_begin;
474 3 omptool_funcs.parallel_end = omptool_callback__parallel_end;
475 3 omptool_funcs.task_create = omptool_callback__task_create;
476 3 omptool_funcs.task_schedule = omptool_callback__task_schedule;
477 3 omptool_funcs.implicit_task = omptool_callback__implicit_task;
478 3 omptool_funcs.sync_region = omptool_callback__sync_region;
479 }
480
481 /* The following function is a custom callback and it's only used in the
482 * experimental role-shift thread manager */
483
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 if (omptm_version == OMPTM_ROLE_SHIFT) {
484 omptool_funcs.thread_role_shift = omptm_role_shift__thread_role_shift;
485 }
486 4 }
487
488 24 static inline int set_ompt_callback(ompt_callbacks_t event, ompt_callback_t callback) {
489 24 int error = 1;
490
1/7
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 24 times.
✗ Branch 7 not taken.
24 switch (set_callback_fn(event, callback)) {
491 case ompt_set_error:
492 verbose(VB_OMPT, "OMPT set callback %d failed.", event);
493 break;
494 case ompt_set_never:
495 verbose(VB_OMPT, "OMPT set callback %d returned 'never'. The event was "
496 "registered but it will never occur or the callback will never "
497 "be invoked at runtime.", event);
498 break;
499 case ompt_set_impossible:
500 verbose(VB_OMPT, "OMPT set callback %d returned 'impossible'. The event "
501 "may occur but the tracing of it is not possible.", event);
502 break;
503 case ompt_set_sometimes:
504 verbose(VB_OMPT, "OMPT set callback %d returned 'sometimes'. The event "
505 "may occur and the callback will be invoked at runtime, but "
506 "only for an implementation-defined subset of associated event "
507 "occurrences.", event);
508 error = 0;
509 break;
510 case ompt_set_sometimes_paired:
511 verbose(VB_OMPT, "OMPT set callback %d returned 'sometimes paired'. "
512 "The event may occur and the callback will be invoked at "
513 "runtime, but only for an implementation-defined subset of "
514 "associated event occurrences. If any callback is invoked "
515 "with a begin_scope endpoint, it will be invoked also later "
516 "with and end_scope endpoint.", event);
517 error = 0;
518 break;
519 24 case ompt_set_always:
520 24 error = 0;
521 24 break;
522 default:
523 fatal("Unsupported return code at set_ompt_callback, "
524 "please file a bug report.");
525 }
526 24 return error;
527 }
528
529 4 static int set_ompt_callbacks(ompt_function_lookup_t lookup, omptm_version_t omptm_version,
530 bool talp_openmp) {
531
532 /* Populate global structs */
533 4 setup_omp_fn_ptrs(omptm_version, talp_openmp);
534
535 4 int error = 0;
536 4 set_callback_fn = (ompt_set_callback_t)lookup("ompt_set_callback");
537
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (set_callback_fn) {
538
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 if (omptool_funcs.thread_begin) {
539 3 error += set_ompt_callback(
540 ompt_callback_thread_begin,
541 3 (ompt_callback_t)omptool_funcs.thread_begin);
542 }
543
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 if (omptool_funcs.thread_end) {
544 3 error += set_ompt_callback(
545 ompt_callback_thread_end,
546 3 (ompt_callback_t)omptool_funcs.thread_end);
547 }
548
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 if (omptool_funcs.thread_role_shift) {
549 error += set_ompt_callback(
550 ompt_callback_thread_role_shift,
551 (ompt_callback_t)omptool_funcs.thread_role_shift);
552 }
553
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 if (omptool_funcs.parallel_begin) {
554 3 error += set_ompt_callback(
555 ompt_callback_parallel_begin,
556 3 (ompt_callback_t)omptool_funcs.parallel_begin);
557 }
558
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 if (omptool_funcs.parallel_end) {
559 3 error += set_ompt_callback(
560 ompt_callback_parallel_end,
561 3 (ompt_callback_t)omptool_funcs.parallel_end);
562 }
563
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 if (omptool_funcs.task_create) {
564 3 error += set_ompt_callback(
565 ompt_callback_task_create,
566 3 (ompt_callback_t)omptool_funcs.task_create);
567 }
568
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 if (omptool_funcs.task_schedule) {
569 3 error += set_ompt_callback(
570 ompt_callback_task_schedule,
571 3 (ompt_callback_t)omptool_funcs.task_schedule);
572 }
573
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 if (omptool_funcs.implicit_task) {
574 3 error += set_ompt_callback(
575 ompt_callback_implicit_task,
576 3 (ompt_callback_t)omptool_funcs.implicit_task);
577 }
578
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 if (omptool_funcs.sync_region) {
579 3 error += set_ompt_callback(
580 ompt_callback_sync_region,
581 3 (ompt_callback_t)omptool_funcs.sync_region);
582 }
583 } else {
584 error = 1;
585 verbose(VB_OMPT, "Could not look up function \"ompt_set_callback\"");
586 }
587
588
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (!error) {
589
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, "OMPT callbacks successfully registered");
590 }
591
592 4 return error;
593 }
594
595 4 static void unset_ompt_callbacks(void) {
596
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (set_callback_fn) {
597
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 if (omptool_funcs.thread_begin) {
598 3 set_callback_fn(ompt_callback_thread_begin, NULL);
599 }
600
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 if (omptool_funcs.thread_end) {
601 3 set_callback_fn(ompt_callback_thread_end, NULL);
602 }
603
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 if (omptool_funcs.thread_role_shift) {
604 set_callback_fn(ompt_callback_thread_role_shift, NULL);
605 }
606
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 if (omptool_funcs.parallel_begin) {
607 3 set_callback_fn(ompt_callback_parallel_begin, NULL);
608 }
609
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 if (omptool_funcs.parallel_end) {
610 3 set_callback_fn(ompt_callback_parallel_end, NULL);
611 }
612
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 if (omptool_funcs.task_create) {
613 3 set_callback_fn(ompt_callback_task_create, NULL);
614 }
615
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 if (omptool_funcs.task_schedule) {
616 3 set_callback_fn(ompt_callback_task_schedule, NULL);
617 }
618
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 if (omptool_funcs.implicit_task) {
619 3 set_callback_fn(ompt_callback_implicit_task, NULL);
620 }
621
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 1 times.
4 if (omptool_funcs.sync_region) {
622 3 set_callback_fn(ompt_callback_sync_region, NULL);
623 }
624 }
625 4 }
626
627
628 /*********************************************************************************/
629 /* OMPT start tool */
630 /*********************************************************************************/
631
632 static bool dlb_initialized_through_ompt = false;
633 static const char *openmp_runtime_version;
634
635 4 static int omptool_initialize(ompt_function_lookup_t lookup, int initial_device_num,
636 ompt_data_t *tool_data) {
637 /* Parse options and get the required fields */
638 options_t options;
639 4 options_init(&options, NULL);
640 4 debug_init(&options);
641
642 /* Print OMPT version and variables*/
643 4 const char *omp_policy_str = getenv("OMP_WAIT_POLICY");
644
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, "Detected OpenMP runtime: %s", openmp_runtime_version);
645
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, "Environment variables of interest:");
646
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, " OMP_WAIT_POLICY: %s", omp_policy_str);
647
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (strncmp(openmp_runtime_version, "Intel", 5) == 0 ||
648
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 strncmp(openmp_runtime_version, "LLVM", 4) == 0) {
649 verbose(VB_OMPT, " KMP_LIBRARY: %s", getenv("KMP_LIBRARY"));
650 verbose(VB_OMPT, " KMP_BLOCKTIME: %s", getenv("KMP_BLOCKTIME"));
651 }
652 /* when GCC implements OMPT: else if "gomp", print GOMP_SPINCOUNT */
653
654
1/4
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
4 verbose(VB_OMPT, "DLB with OMPT support is %s", options.ompt ? "ENABLED" : "DISABLED");
655
656 /* Enable OMPT only if requested */
657
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (options.ompt) {
658 /* Emit warning if OMP_WAIT_POLICY is not "passive" */
659
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 if (options.lewi &&
660 options.omptm_version == OMPTM_OMP5 &&
661 (!omp_policy_str || strcasecmp(omp_policy_str, "passive") != 0)) {
662 warning("OMP_WAIT_POLICY value it not \"passive\". Even though the default "
663 "value may be \"passive\", setting it explicitly is recommended "
664 "since it modifies other runtime related environment variables");
665 }
666
667 4 int err = 0;
668
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (options.preinit_pid == 0) {
669 /* Force init */
670 cpu_set_t process_mask;
671 4 sched_getaffinity(0, sizeof(process_mask), &process_mask);
672 4 err = DLB_Init(0, &process_mask, NULL);
673
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (err == DLB_SUCCESS) {
674 4 dlb_initialized_through_ompt = true;
675 } else if (err != DLB_ERR_INIT) {
676 warning("DLB_Init failed: %s", DLB_Strerror(err));
677 }
678 } else {
679 /* Initialize DLB only if ompt is enabled, and
680 * remember if succeeded to finalize it when ompt_finalize is invoked. */
681 err = DLB_Init(0, NULL, NULL);
682 if (err == DLB_SUCCESS) {
683 dlb_initialized_through_ompt = true;
684 } else {
685 verbose(VB_OMPT, "DLB_Init: %s", DLB_Strerror(err));
686 }
687 }
688
689
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, "Initializing OMPT module");
690
691 /* Register OMPT callbacks */
692 4 err = set_ompt_callbacks(lookup, options.omptm_version, options.talp_openmp);
693
694 /* If callbacks are successfully registered, initialize modules
695 * and return a non-zero value to activate the tool */
696
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (!err) {
697 4 omptool__init(thread_spd->id, &options);
698 4 options_finalize(&options);
699 4 return 1;
700 }
701
702 /* Otherwise, finalize DLB if init succeeded */
703 if (dlb_initialized_through_ompt) {
704 DLB_Finalize();
705 }
706
707 warning("DLB could not register itself as OpenMP tool");
708 }
709
710 options_finalize(&options);
711
712 return 0;
713 }
714
715 4 static void omptool_finalize(ompt_data_t *tool_data) {
716
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
4 verbose(VB_OMPT, "Finalizing OMPT module");
717
718 /* Finalize DLB if needed */
719
1/2
✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
4 if (dlb_initialized_through_ompt) {
720 4 DLB_Finalize();
721 }
722
723 /* Disable OMPT callbacks */
724 4 unset_ompt_callbacks();
725
726 /* Finalize modules */
727 4 omptool__finalize();
728 4 }
729
730
731 #pragma GCC visibility push(default)
732 1 ompt_start_tool_result_t* ompt_start_tool(unsigned int omp_version, const char *runtime_version) {
733 1 openmp_runtime_version = runtime_version;
734 static ompt_start_tool_result_t tool = {
735 .initialize = omptool_initialize,
736 .finalize = omptool_finalize,
737 .tool_data = {0}
738 };
739 1 return &tool;
740 }
741 #pragma GCC visibility pop
742