| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /*********************************************************************************/ | ||
| 2 | /* Copyright 2009-2024 Barcelona Supercomputing Center */ | ||
| 3 | /* */ | ||
| 4 | /* This file is part of the DLB library. */ | ||
| 5 | /* */ | ||
| 6 | /* DLB is free software: you can redistribute it and/or modify */ | ||
| 7 | /* it under the terms of the GNU Lesser General Public License as published by */ | ||
| 8 | /* the Free Software Foundation, either version 3 of the License, or */ | ||
| 9 | /* (at your option) any later version. */ | ||
| 10 | /* */ | ||
| 11 | /* DLB is distributed in the hope that it will be useful, */ | ||
| 12 | /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ | ||
| 13 | /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ | ||
| 14 | /* GNU Lesser General Public License for more details. */ | ||
| 15 | /* */ | ||
| 16 | /* You should have received a copy of the GNU Lesser General Public License */ | ||
| 17 | /* along with DLB. If not, see <https://www.gnu.org/licenses/>. */ | ||
| 18 | /*********************************************************************************/ | ||
| 19 | |||
| 20 | /* Configure second tool with: | ||
| 21 | * DLB_TOOL_LIBRARIES | ||
| 22 | * DLB_TOOL_VERBOSE_INIT | ||
| 23 | */ | ||
| 24 | #define OMPT_MULTIPLEX_TOOL_NAME "DLB" | ||
| 25 | #include "LB_numThreads/ompt-multiplex.h" | ||
| 26 | |||
| 27 | #include "LB_numThreads/omptool.h" | ||
| 28 | |||
| 29 | #include "LB_numThreads/omp-tools.h" | ||
| 30 | #include "LB_numThreads/omptm_omp5.h" | ||
| 31 | #include "LB_numThreads/omptm_free_agents.h" | ||
| 32 | #include "LB_numThreads/omptm_role_shift.h" | ||
| 33 | #include "LB_core/spd.h" | ||
| 34 | #include "apis/dlb.h" | ||
| 35 | #include "apis/dlb_errors.h" | ||
| 36 | #include "support/debug.h" | ||
| 37 | #include "support/dlb_common.h" | ||
| 38 | #include "support/mask_utils.h" | ||
| 39 | #include "support/tracing.h" | ||
| 40 | #include "talp/talp_openmp.h" | ||
| 41 | |||
| 42 | #include <inttypes.h> | ||
| 43 | #include <unistd.h> | ||
| 44 | #include <string.h> | ||
| 45 | #include <stdbool.h> | ||
| 46 | #include <sys/syscall.h> | ||
| 47 | |||
| 48 | |||
| 49 | static omptool_callback_funcs_t omptool_funcs = {0}; | ||
| 50 | static omptool_event_funcs_t talp_funcs = {0}; | ||
| 51 | static omptool_event_funcs_t omptm_funcs = {0}; | ||
| 52 | static ompt_set_callback_t set_callback_fn = NULL; | ||
| 53 | |||
| 54 | |||
| 55 | /*********************************************************************************/ | ||
| 56 | /* Init & Finalize */ | ||
| 57 | /*********************************************************************************/ | ||
| 58 | |||
| 59 | 4 | static void omptool__init(pid_t process_id, const options_t *options) { | |
| 60 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (talp_funcs.init) { |
| 61 | 2 | talp_funcs.init(process_id, options); | |
| 62 | } | ||
| 63 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (omptm_funcs.init) { |
| 64 | 2 | omptm_funcs.init(process_id, options); | |
| 65 | } | ||
| 66 | 4 | } | |
| 67 | |||
| 68 | 4 | static void omptool__finalize(void) { | |
| 69 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (talp_funcs.finalize) { |
| 70 | 2 | talp_funcs.finalize(); | |
| 71 | } | ||
| 72 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (omptm_funcs.finalize) { |
| 73 | 2 | omptm_funcs.finalize(); | |
| 74 | } | ||
| 75 | 4 | } | |
| 76 | |||
| 77 | |||
| 78 | /*********************************************************************************/ | ||
| 79 | /* "Callbacks" from MPI and DLB API */ | ||
| 80 | /*********************************************************************************/ | ||
| 81 | |||
| 82 | 20 | void omptool__into_blocking_call(void) { | |
| 83 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 18 times.
|
20 | if (talp_funcs.into_mpi) { |
| 84 | 2 | talp_funcs.into_mpi(); | |
| 85 | } | ||
| 86 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 18 times.
|
20 | if (omptm_funcs.into_mpi) { |
| 87 | 2 | omptm_funcs.into_mpi(); | |
| 88 | } | ||
| 89 | 20 | } | |
| 90 | |||
| 91 | 20 | void omptool__outof_blocking_call(void) { | |
| 92 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 18 times.
|
20 | if (omptm_funcs.outof_mpi) { |
| 93 | 2 | omptm_funcs.outof_mpi(); | |
| 94 | } | ||
| 95 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 18 times.
|
20 | if (talp_funcs.outof_mpi) { |
| 96 | 2 | talp_funcs.outof_mpi(); | |
| 97 | } | ||
| 98 | 20 | } | |
| 99 | |||
| 100 | 4 | void omptool__lend_from_api(void) { | |
| 101 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (omptm_funcs.lend_from_api) { |
| 102 | 2 | omptm_funcs.lend_from_api(); | |
| 103 | } | ||
| 104 | 4 | } | |
| 105 | |||
| 106 | |||
| 107 | /*********************************************************************************/ | ||
| 108 | /* OpenMP callbacks */ | ||
| 109 | /*********************************************************************************/ | ||
| 110 | |||
| 111 | 4 | static void omptool_callback__thread_begin( | |
| 112 | ompt_thread_t thread_type, | ||
| 113 | ompt_data_t *thread_data) { | ||
| 114 | |||
| 115 | 4 | spd_enter_dlb(thread_spd); | |
| 116 | |||
| 117 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "native-thread-begin event"); |
| 118 | |||
| 119 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (talp_funcs.thread_begin) { |
| 120 | 2 | talp_funcs.thread_begin(thread_type); | |
| 121 | } | ||
| 122 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (omptm_funcs.thread_begin) { |
| 123 | 2 | omptm_funcs.thread_begin(thread_type); | |
| 124 | } | ||
| 125 | 4 | } | |
| 126 | |||
| 127 | 4 | static void omptool_callback__thread_end( | |
| 128 | ompt_data_t *thread_data) { | ||
| 129 | |||
| 130 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "native-thread-end event"); |
| 131 | |||
| 132 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (talp_funcs.thread_end) { |
| 133 | 2 | talp_funcs.thread_end(); | |
| 134 | } | ||
| 135 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (omptm_funcs.thread_end) { |
| 136 | 2 | omptm_funcs.thread_end(); | |
| 137 | } | ||
| 138 | 4 | } | |
| 139 | |||
| 140 | |||
| 141 | /* Parallel data storage from level 1 will be reused, other levels will be | ||
| 142 | * allocated on demand */ | ||
| 143 | static omptool_parallel_data_t omptool_parallel_data_level1 = {.level = 1}; | ||
| 144 | |||
| 145 | 12 | static void omptool_callback__parallel_begin( | |
| 146 | ompt_data_t *encountering_task_data, | ||
| 147 | const ompt_frame_t *encountering_task_frame, | ||
| 148 | ompt_data_t *parallel_data, | ||
| 149 | unsigned int requested_parallelism, | ||
| 150 | int flags, | ||
| 151 | const void *codeptr_ra) { | ||
| 152 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 4 times.
|
12 | if (flags & ompt_parallel_team) { |
| 153 | /* Obtain the nesting level of the generating parallel region and save | ||
| 154 | * some info in parallel_data */ | ||
| 155 | 8 | omptool_parallel_data_t *omptool_parallel_data = NULL; | |
| 156 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (encountering_task_frame->exit_frame.ptr == NULL) { |
| 157 | /* No exit frame means inital task, so this parallel is level 1 */ | ||
| 158 | 4 | omptool_parallel_data = &omptool_parallel_data_level1; | |
| 159 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | } else if (encountering_task_data->value > 0) { |
| 160 | /* Allocate new data */ | ||
| 161 | 4 | omptool_parallel_data = malloc(sizeof(omptool_parallel_data_t)); | |
| 162 | 4 | omptool_parallel_data->level = encountering_task_data->value + 1; | |
| 163 | } | ||
| 164 | |||
| 165 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
|
8 | ensure(omptool_parallel_data != NULL, "Unhandled case in %s",__func__); |
| 166 | |||
| 167 | 8 | omptool_parallel_data->codeptr_ra = codeptr_ra; | |
| 168 | 8 | omptool_parallel_data->requested_parallelism = requested_parallelism; | |
| 169 | 8 | parallel_data->ptr = omptool_parallel_data; | |
| 170 | |||
| 171 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (omptool_parallel_data->level == 1) { |
| 172 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "parallel-begin event"); |
| 173 | } else { | ||
| 174 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "parallel-begin event: nesting level %u", |
| 175 | omptool_parallel_data->level); | ||
| 176 | } | ||
| 177 | |||
| 178 | /* Finally, invoke TALP or OMPTM if needed */ | ||
| 179 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (talp_funcs.parallel_begin) { |
| 180 | 4 | talp_funcs.parallel_begin(omptool_parallel_data); | |
| 181 | } | ||
| 182 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (omptm_funcs.parallel_begin) { |
| 183 | 4 | omptm_funcs.parallel_begin(omptool_parallel_data); | |
| 184 | } | ||
| 185 | } else { | ||
| 186 | /* ompt_parallel_league ? not supported */ | ||
| 187 | } | ||
| 188 | 12 | } | |
| 189 | |||
| 190 | 8 | static void omptool_callback__parallel_end( | |
| 191 | ompt_data_t *parallel_data, | ||
| 192 | ompt_data_t *encountering_task_data, | ||
| 193 | int flags, | ||
| 194 | const void *codeptr_ra) { | ||
| 195 |
1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
|
8 | if (flags & ompt_parallel_team) { |
| 196 | 8 | omptool_parallel_data_t *omptool_parallel_data = parallel_data->ptr; | |
| 197 | |||
| 198 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (omptool_parallel_data->level == 1) { |
| 199 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "parallel-end event"); |
| 200 | } else { | ||
| 201 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "parallel-end event: nesting level %u", |
| 202 | omptool_parallel_data->level); | ||
| 203 | } | ||
| 204 | |||
| 205 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (talp_funcs.parallel_end) { |
| 206 | 4 | talp_funcs.parallel_end(omptool_parallel_data); | |
| 207 | } | ||
| 208 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (omptm_funcs.parallel_end) { |
| 209 | 4 | omptm_funcs.parallel_end(omptool_parallel_data); | |
| 210 | } | ||
| 211 | |||
| 212 | /* Deallocate if needed */ | ||
| 213 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (omptool_parallel_data->level > 1) { |
| 214 | 4 | free(parallel_data->ptr); | |
| 215 | } | ||
| 216 | |||
| 217 | instrument_event(BINDINGS_EVENT, sched_getcpu()+1, EVENT_BEGIN); | ||
| 218 | } | ||
| 219 | 8 | } | |
| 220 | |||
| 221 | 4 | static void omptool_callback__task_create( | |
| 222 | ompt_data_t *encountering_task_data, | ||
| 223 | const ompt_frame_t *encountering_task_frame, | ||
| 224 | ompt_data_t *new_task_data, | ||
| 225 | int flags, | ||
| 226 | int has_dependences, | ||
| 227 | const void *codeptr_ra) { | ||
| 228 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (flags & ompt_task_explicit) { |
| 229 | /* Pass nesting level */ | ||
| 230 | 4 | new_task_data->value = encountering_task_data->value; | |
| 231 | |||
| 232 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "task-create event"); |
| 233 | |||
| 234 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (talp_funcs.task_create) { |
| 235 | 2 | talp_funcs.task_create(); | |
| 236 | } | ||
| 237 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (omptm_funcs.task_create) { |
| 238 | 2 | omptm_funcs.task_create(); | |
| 239 | } | ||
| 240 | } | ||
| 241 | 4 | } | |
| 242 | |||
| 243 | 8 | static void omptool_callback__task_schedule( | |
| 244 | ompt_data_t *prior_task_data, | ||
| 245 | ompt_task_status_t prior_task_status, | ||
| 246 | ompt_data_t *next_task_data) { | ||
| 247 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (prior_task_status == ompt_task_complete) { |
| 248 | |||
| 249 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "task-schedule event: task complete"); |
| 250 | |||
| 251 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (talp_funcs.task_complete) { |
| 252 | 2 | talp_funcs.task_complete(); | |
| 253 | } | ||
| 254 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (omptm_funcs.task_complete) { |
| 255 | 2 | omptm_funcs.task_complete(); | |
| 256 | } | ||
| 257 | |||
| 258 | instrument_event(BINDINGS_EVENT, 0, EVENT_END); | ||
| 259 | |||
| 260 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | } else if (prior_task_status == ompt_task_switch) { |
| 261 | |||
| 262 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "task-schedule event: task switch"); |
| 263 | |||
| 264 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (talp_funcs.task_switch) { |
| 265 | 2 | talp_funcs.task_switch(); | |
| 266 | } | ||
| 267 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (omptm_funcs.task_switch) { |
| 268 | 2 | omptm_funcs.task_switch(); | |
| 269 | } | ||
| 270 | |||
| 271 | instrument_event(BINDINGS_EVENT, sched_getcpu()+1, EVENT_BEGIN); | ||
| 272 | } | ||
| 273 | 8 | } | |
| 274 | |||
| 275 | 8 | static void omptool_callback__implicit_task( | |
| 276 | ompt_scope_endpoint_t endpoint, | ||
| 277 | ompt_data_t *parallel_data, | ||
| 278 | ompt_data_t *task_data, | ||
| 279 | unsigned int actual_parallelism, | ||
| 280 | unsigned int index, | ||
| 281 | int flags) { | ||
| 282 | |||
| 283 |
1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
|
8 | if (flags & ompt_task_implicit) { |
| 284 |
1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
|
8 | if (endpoint == ompt_scope_begin |
| 285 |
1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
|
8 | && parallel_data != NULL |
| 286 |
1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
|
16 | && parallel_data->ptr != NULL) { |
| 287 | |||
| 288 | 8 | omptool_parallel_data_t *omptool_parallel_data = parallel_data->ptr; | |
| 289 |
1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
|
8 | if (index == 0) { |
| 290 | 8 | omptool_parallel_data->actual_parallelism = actual_parallelism; | |
| 291 | } | ||
| 292 | |||
| 293 | /* Pass nesting level to implicit task */ | ||
| 294 | 8 | task_data->value = omptool_parallel_data->level; | |
| 295 | |||
| 296 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
|
8 | verbose(VB_OMPT, "implicit-task-begin event: into parallel function"); |
| 297 | |||
| 298 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (talp_funcs.into_parallel_function) { |
| 299 | 4 | talp_funcs.into_parallel_function(omptool_parallel_data, index); | |
| 300 | } | ||
| 301 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 4 times.
|
8 | if (omptm_funcs.into_parallel_function) { |
| 302 | 4 | omptm_funcs.into_parallel_function(omptool_parallel_data, index); | |
| 303 | } | ||
| 304 | |||
| 305 | instrument_event(BINDINGS_EVENT, sched_getcpu()+1, EVENT_BEGIN); | ||
| 306 | } | ||
| 307 | ✗ | else if (endpoint == ompt_scope_end) { | |
| 308 | /* Note: parallel_data is NULL */ | ||
| 309 | |||
| 310 | ✗ | verbose(VB_OMPT, "implicit-task-end event"); | |
| 311 | |||
| 312 | ✗ | if (index == 0) { | |
| 313 | /* The primary thread state is already controlled in the parallel-end event */ | ||
| 314 | } else { | ||
| 315 | /* This event is emitted differently across OpenMP implementations. | ||
| 316 | * | ||
| 317 | * In LLVM (tested with earlier versions until 19), team-worker threads are | ||
| 318 | * typically suspended during the implicit barrier at the end of a parallel | ||
| 319 | * region, and this event is not triggered until they resume at the start of | ||
| 320 | * the next parallel region. | ||
| 321 | * | ||
| 322 | * Cray PE (testing with versions 8.3 - 8.5, based on LLVM 17) invokes this | ||
| 323 | * event for each team-worker thread, but sometimes after the primary thread | ||
| 324 | * has already triggered the parallel-end event. This can introduce race | ||
| 325 | * conditions that cause the timing metrics to be attributed to different states, | ||
| 326 | * potentially leading to inconsistent efficiency results. | ||
| 327 | */ | ||
| 328 | ✗ | if (talp_funcs.outof_parallel_function) { | |
| 329 | ✗ | talp_funcs.outof_parallel_function(); | |
| 330 | } | ||
| 331 | ✗ | if (omptm_funcs.outof_parallel_function) { | |
| 332 | ✗ | omptm_funcs.outof_parallel_function(); | |
| 333 | } | ||
| 334 | |||
| 335 | instrument_event(BINDINGS_EVENT, 0, EVENT_END); | ||
| 336 | } | ||
| 337 | } | ||
| 338 | } | ||
| 339 | ✗ | else if (flags & ompt_task_initial) { | |
| 340 | ✗ | if (endpoint == ompt_scope_begin) { | |
| 341 | ✗ | verbose(VB_OMPT, "initial-task-begin event"); | |
| 342 | ✗ | } else if (endpoint == ompt_scope_end) { | |
| 343 | ✗ | verbose(VB_OMPT, "initial-task-end event"); | |
| 344 | } | ||
| 345 | } | ||
| 346 | 8 | } | |
| 347 | |||
| 348 | |||
| 349 | 12 | static inline void into_parallel_implicit_barrier(omptool_parallel_data_t *data) { | |
| 350 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
|
12 | if (talp_funcs.into_parallel_implicit_barrier) { |
| 351 | 6 | talp_funcs.into_parallel_implicit_barrier(data); | |
| 352 | } | ||
| 353 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 6 times.
|
12 | if (omptm_funcs.into_parallel_implicit_barrier) { |
| 354 | 6 | omptm_funcs.into_parallel_implicit_barrier(data); | |
| 355 | } | ||
| 356 | |||
| 357 | instrument_event(BINDINGS_EVENT, 0, EVENT_END); | ||
| 358 | 12 | } | |
| 359 | |||
| 360 | ✗ | static inline void into_parallel_sync(omptool_parallel_data_t *data) { | |
| 361 | ✗ | if (talp_funcs.into_parallel_sync) { | |
| 362 | ✗ | talp_funcs.into_parallel_sync(data); | |
| 363 | } | ||
| 364 | ✗ | if (omptm_funcs.into_parallel_sync) { | |
| 365 | ✗ | omptm_funcs.into_parallel_sync(data); | |
| 366 | } | ||
| 367 | } | ||
| 368 | |||
| 369 | ✗ | static inline void outof_parallel_sync(omptool_parallel_data_t *data) { | |
| 370 | ✗ | if (talp_funcs.outof_parallel_sync) { | |
| 371 | ✗ | talp_funcs.outof_parallel_sync(data); | |
| 372 | } | ||
| 373 | ✗ | if (omptm_funcs.outof_parallel_sync) { | |
| 374 | ✗ | omptm_funcs.outof_parallel_sync(data); | |
| 375 | } | ||
| 376 | } | ||
| 377 | |||
| 378 | /* Warning: Newer LLVM versions already use the new | ||
| 379 | * `ompt_sync_region_barrier_implicit_parallel` enum, but older versions and | ||
| 380 | * Cray runtimes still use the deprecated `ompt_sync_region_barrier_implicit`. | ||
| 381 | * This enum value is also used for implicit barriers in a **single** region, | ||
| 382 | * but we can still identify the implicit barrier of a parallel region | ||
| 383 | * comparing the codeptr_ra, which will be NULL for all team-worker threads, | ||
| 384 | * and equal to the codeptr_ra from parallel_begin for the primary thread. | ||
| 385 | */ | ||
| 386 | 12 | static void omptool_callback__sync_region( | |
| 387 | ompt_sync_region_t kind, | ||
| 388 | ompt_scope_endpoint_t endpoint, | ||
| 389 | ompt_data_t *parallel_data, | ||
| 390 | ompt_data_t *task_data, | ||
| 391 | const void *codeptr_ra) { | ||
| 392 |
2/4✓ Branch 0 taken 12 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 12 times.
✗ Branch 3 not taken.
|
12 | if (parallel_data != NULL && parallel_data->ptr != NULL) { |
| 393 | 12 | omptool_parallel_data_t *data = parallel_data->ptr; | |
| 394 |
2/10✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
✗ Branch 7 not taken.
✓ Branch 8 taken 4 times.
✗ Branch 9 not taken.
|
12 | switch (kind) { |
| 395 | 8 | case ompt_sync_region_barrier_implicit: | |
| 396 | /* deprecated enum, includes implicit barriers from parallel, | ||
| 397 | * single, workshare, etc. */ | ||
| 398 |
2/4✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 8 times.
✗ Branch 3 not taken.
|
8 | if (codeptr_ra == NULL || codeptr_ra == data->codeptr_ra) { |
| 399 | |||
| 400 | /* Note: Cray OpenMP still uses this enum, but apparently | ||
| 401 | * only for parallel region with no tasks */ | ||
| 402 | |||
| 403 | /* implicit barrier in parallel region only if codeptr_ra is NULL | ||
| 404 | * or equal to parallel region's */ | ||
| 405 | |||
| 406 |
1/2✓ Branch 0 taken 8 times.
✗ Branch 1 not taken.
|
8 | if (endpoint == ompt_scope_begin) { |
| 407 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
|
8 | verbose(VB_OMPT, "implicit-barrier-begin event: " |
| 408 | "end of parallel function (deprecated enum)"); | ||
| 409 | 8 | into_parallel_implicit_barrier(data); | |
| 410 | } | ||
| 411 | ✗ | else if (endpoint == ompt_scope_end) { | |
| 412 | ✗ | verbose(VB_OMPT, "implicit-barrier-end event: " | |
| 413 | "end of parallel function (deprecated enum)"); | ||
| 414 | } | ||
| 415 | |||
| 416 | } else { | ||
| 417 | |||
| 418 | /* other implicit barriers */ | ||
| 419 | |||
| 420 | ✗ | if (endpoint == ompt_scope_begin) { | |
| 421 | ✗ | verbose(VB_OMPT, "implicit-barrier-begin event: sync region"); | |
| 422 | ✗ | into_parallel_sync(data); | |
| 423 | ✗ | } else if (endpoint == ompt_scope_end) { | |
| 424 | ✗ | verbose(VB_OMPT, "implicit-barrier-end event: sync region"); | |
| 425 | ✗ | outof_parallel_sync(data); | |
| 426 | } | ||
| 427 | } | ||
| 428 | 8 | break; | |
| 429 | |||
| 430 | ✗ | case ompt_sync_region_barrier_explicit: | |
| 431 | ✗ | if (endpoint == ompt_scope_begin) { | |
| 432 | ✗ | verbose(VB_OMPT, "explicit-barrier-begin event"); | |
| 433 | ✗ | into_parallel_sync(data); | |
| 434 | ✗ | } else if (endpoint == ompt_scope_end) { | |
| 435 | ✗ | verbose(VB_OMPT, "explicit-barrier-end event"); | |
| 436 | ✗ | outof_parallel_sync(data); | |
| 437 | } | ||
| 438 | ✗ | break; | |
| 439 | |||
| 440 | ✗ | case ompt_sync_region_barrier_implementation: | |
| 441 | ✗ | verbose(VB_OMPT, "Implementation-Specific Barrier event"); | |
| 442 | ✗ | if (endpoint == ompt_scope_begin) { | |
| 443 | ✗ | into_parallel_sync(data); | |
| 444 | ✗ | } else if (endpoint == ompt_scope_end) { | |
| 445 | ✗ | outof_parallel_sync(data); | |
| 446 | } | ||
| 447 | ✗ | break; | |
| 448 | |||
| 449 | ✗ | case ompt_sync_region_taskwait: | |
| 450 | ✗ | if (endpoint == ompt_scope_begin) { | |
| 451 | ✗ | verbose(VB_OMPT, "taskwait-begin event"); | |
| 452 | ✗ | into_parallel_sync(data); | |
| 453 | ✗ | } else if (endpoint == ompt_scope_end) { | |
| 454 | ✗ | verbose(VB_OMPT, "taskwait-end event"); | |
| 455 | ✗ | outof_parallel_sync(data); | |
| 456 | } | ||
| 457 | ✗ | break; | |
| 458 | |||
| 459 | ✗ | case ompt_sync_region_taskgroup: | |
| 460 | ✗ | if (endpoint == ompt_scope_begin) { | |
| 461 | ✗ | verbose(VB_OMPT, "taskgroup-begin event"); | |
| 462 | ✗ | into_parallel_sync(data); | |
| 463 | ✗ | } else if (endpoint == ompt_scope_end) { | |
| 464 | ✗ | verbose(VB_OMPT, "taskgroup-end event"); | |
| 465 | ✗ | outof_parallel_sync(data); | |
| 466 | } | ||
| 467 | ✗ | break; | |
| 468 | |||
| 469 | ✗ | case ompt_sync_region_barrier_implicit_workshare: | |
| 470 | ✗ | if (endpoint == ompt_scope_begin) { | |
| 471 | ✗ | verbose(VB_OMPT, "implicit-barrier-begin event: workshare"); | |
| 472 | ✗ | into_parallel_sync(data); | |
| 473 | ✗ | } else if (endpoint == ompt_scope_end) { | |
| 474 | ✗ | verbose(VB_OMPT, "implicit-barrier-end event: workshare"); | |
| 475 | ✗ | outof_parallel_sync(data); | |
| 476 | } | ||
| 477 | ✗ | break; | |
| 478 | |||
| 479 | ✗ | case ompt_sync_region_barrier_teams: | |
| 480 | ✗ | if (endpoint == ompt_scope_begin) { | |
| 481 | ✗ | verbose(VB_OMPT, "implicit-barrier-begin event: teams"); | |
| 482 | ✗ | into_parallel_sync(data); | |
| 483 | ✗ | } else if (endpoint == ompt_scope_end) { | |
| 484 | ✗ | verbose(VB_OMPT, "implicit-barrier-end event: teams"); | |
| 485 | ✗ | outof_parallel_sync(data); | |
| 486 | } | ||
| 487 | ✗ | break; | |
| 488 | |||
| 489 | ✗ | case ompt_sync_region_reduction: | |
| 490 | /* For now we don't do anything with reductions */ | ||
| 491 | ✗ | break; | |
| 492 | |||
| 493 | 4 | case ompt_sync_region_barrier_implicit_parallel: | |
| 494 | /* new enum in OMP 5.1 */ | ||
| 495 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (endpoint == ompt_scope_begin) { |
| 496 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "implicit-barrier-begin event: end of parallel function"); |
| 497 | 4 | into_parallel_implicit_barrier(data); | |
| 498 | } | ||
| 499 | ✗ | else if (endpoint == ompt_scope_end) { | |
| 500 | ✗ | verbose(VB_OMPT, "implicit-barrier-end event: end of parallel function"); | |
| 501 | } | ||
| 502 | 4 | break; | |
| 503 | |||
| 504 | ✗ | default: | |
| 505 | ✗ | break; | |
| 506 | } | ||
| 507 | } | ||
| 508 | 12 | } | |
| 509 | |||
| 510 | |||
| 511 | /*********************************************************************************/ | ||
| 512 | /* Function pointers setup */ | ||
| 513 | /*********************************************************************************/ | ||
| 514 | |||
| 515 | /* For testing purposes only, override function pointers */ | ||
| 516 | static const omptool_event_funcs_t *test_talp_funcs = NULL; | ||
| 517 | static const omptool_event_funcs_t *test_omptm_funcs = NULL; | ||
| 518 | 1 | void omptool_testing__setup_event_fn_ptrs( | |
| 519 | const omptool_event_funcs_t *talp_fns, | ||
| 520 | const omptool_event_funcs_t *omptm_fns) { | ||
| 521 | 1 | test_talp_funcs = talp_fns; | |
| 522 | 1 | test_omptm_funcs = omptm_fns; | |
| 523 | 1 | } | |
| 524 | |||
| 525 | 4 | static void setup_omp_fn_ptrs(omptm_version_t omptm_version, bool talp_openmp) { | |
| 526 | |||
| 527 | /* talp_funcs */ | ||
| 528 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (talp_openmp) { |
| 529 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | verbose(VB_OMPT, "Enabling OMPT support for TALP"); |
| 530 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
2 | if (test_talp_funcs) { |
| 531 | 2 | talp_funcs = *test_talp_funcs; | |
| 532 | } else { | ||
| 533 | ✗ | talp_funcs = talp_events_vtable; | |
| 534 | } | ||
| 535 | } else { | ||
| 536 | 2 | talp_funcs = (const omptool_event_funcs_t) {}; | |
| 537 | } | ||
| 538 | |||
| 539 | /* omptm_funcs */ | ||
| 540 |
2/5✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
|
4 | switch(omptm_version) { |
| 541 | 2 | case OMPTM_NONE: | |
| 542 | 2 | omptm_funcs = (const omptool_event_funcs_t) {}; | |
| 543 | 2 | break; | |
| 544 | 2 | case OMPTM_OMP5: | |
| 545 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | verbose(VB_OMPT, "Enabling OMPT support for OpenMP 5.0"); |
| 546 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
2 | if (test_omptm_funcs) { |
| 547 | 2 | omptm_funcs = *test_omptm_funcs; | |
| 548 | } else { | ||
| 549 | ✗ | omptm_funcs = omptm_omp5_events_vtable; | |
| 550 | } | ||
| 551 | 2 | break; | |
| 552 | ✗ | case OMPTM_FREE_AGENTS: | |
| 553 | ✗ | verbose(VB_OMPT, "Enabling experimental support with free agent threads"); | |
| 554 | ✗ | if (test_omptm_funcs) { | |
| 555 | ✗ | omptm_funcs = *test_omptm_funcs; | |
| 556 | } else { | ||
| 557 | ✗ | omptm_funcs = omptm_free_agents_events_vtable; | |
| 558 | } | ||
| 559 | ✗ | break; | |
| 560 | ✗ | case OMPTM_ROLE_SHIFT: | |
| 561 | ✗ | verbose(VB_OMPT, "Enabling experimental support with role shift threads"); | |
| 562 | ✗ | if (test_omptm_funcs) { | |
| 563 | ✗ | omptm_funcs = *test_omptm_funcs; | |
| 564 | } else { | ||
| 565 | ✗ | omptm_funcs = omptm_role_shift_events_vtable; | |
| 566 | } | ||
| 567 | ✗ | break; | |
| 568 | } | ||
| 569 | |||
| 570 | /* The following callbacks are centralized and always registered, even if both | ||
| 571 | * omptm and talp are disabled. It is deciced this way to allow event tracing and | ||
| 572 | * debugging events entry via verbose messages. */ | ||
| 573 | 4 | omptool_funcs = (const omptool_callback_funcs_t) { | |
| 574 | .thread_begin = omptool_callback__thread_begin, | ||
| 575 | .thread_end = omptool_callback__thread_end, | ||
| 576 | .parallel_begin = omptool_callback__parallel_begin, | ||
| 577 | .parallel_end = omptool_callback__parallel_end, | ||
| 578 | .task_create = omptool_callback__task_create, | ||
| 579 | .task_schedule = omptool_callback__task_schedule, | ||
| 580 | .implicit_task = omptool_callback__implicit_task, | ||
| 581 | .sync_region = omptool_callback__sync_region, | ||
| 582 | }; | ||
| 583 | |||
| 584 | /* The following function is a custom callback and it's only used in the | ||
| 585 | * experimental role-shift thread manager */ | ||
| 586 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (omptm_version == OMPTM_ROLE_SHIFT) { |
| 587 | ✗ | omptool_funcs.thread_role_shift = omptm_role_shift__thread_role_shift; | |
| 588 | } | ||
| 589 | 4 | } | |
| 590 | |||
| 591 | 32 | static inline int set_ompt_callback(ompt_callbacks_t event, ompt_callback_t callback) { | |
| 592 | 32 | int error = 1; | |
| 593 |
1/7✗ Branch 1 not taken.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 32 times.
✗ Branch 7 not taken.
|
32 | switch (set_callback_fn(event, callback)) { |
| 594 | ✗ | case ompt_set_error: | |
| 595 | ✗ | verbose(VB_OMPT, "OMPT set callback %d failed.", event); | |
| 596 | ✗ | break; | |
| 597 | ✗ | case ompt_set_never: | |
| 598 | ✗ | verbose(VB_OMPT, "OMPT set callback %d returned 'never'. The event was " | |
| 599 | "registered but it will never occur or the callback will never " | ||
| 600 | "be invoked at runtime.", event); | ||
| 601 | ✗ | break; | |
| 602 | ✗ | case ompt_set_impossible: | |
| 603 | ✗ | verbose(VB_OMPT, "OMPT set callback %d returned 'impossible'. The event " | |
| 604 | "may occur but the tracing of it is not possible.", event); | ||
| 605 | ✗ | break; | |
| 606 | ✗ | case ompt_set_sometimes: | |
| 607 | ✗ | verbose(VB_OMPT, "OMPT set callback %d returned 'sometimes'. The event " | |
| 608 | "may occur and the callback will be invoked at runtime, but " | ||
| 609 | "only for an implementation-defined subset of associated event " | ||
| 610 | "occurrences.", event); | ||
| 611 | ✗ | error = 0; | |
| 612 | ✗ | break; | |
| 613 | ✗ | case ompt_set_sometimes_paired: | |
| 614 | ✗ | verbose(VB_OMPT, "OMPT set callback %d returned 'sometimes paired'. " | |
| 615 | "The event may occur and the callback will be invoked at " | ||
| 616 | "runtime, but only for an implementation-defined subset of " | ||
| 617 | "associated event occurrences. If any callback is invoked " | ||
| 618 | "with a begin_scope endpoint, it will be invoked also later " | ||
| 619 | "with and end_scope endpoint.", event); | ||
| 620 | ✗ | error = 0; | |
| 621 | ✗ | break; | |
| 622 | 32 | case ompt_set_always: | |
| 623 | 32 | error = 0; | |
| 624 | 32 | break; | |
| 625 | ✗ | default: | |
| 626 | ✗ | fatal("Unsupported return code at set_ompt_callback, " | |
| 627 | "please file a bug report."); | ||
| 628 | } | ||
| 629 | 32 | return error; | |
| 630 | } | ||
| 631 | |||
| 632 | 4 | static int set_ompt_callbacks(ompt_function_lookup_t lookup, omptm_version_t omptm_version, | |
| 633 | bool talp_openmp) { | ||
| 634 | |||
| 635 | /* Populate global structs */ | ||
| 636 | 4 | setup_omp_fn_ptrs(omptm_version, talp_openmp); | |
| 637 | |||
| 638 | 4 | int error = 0; | |
| 639 | 4 | set_callback_fn = (ompt_set_callback_t)lookup("ompt_set_callback"); | |
| 640 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (set_callback_fn) { |
| 641 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (omptool_funcs.thread_begin) { |
| 642 | 4 | error += set_ompt_callback( | |
| 643 | ompt_callback_thread_begin, | ||
| 644 | 4 | (ompt_callback_t)omptool_funcs.thread_begin); | |
| 645 | } | ||
| 646 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (omptool_funcs.thread_end) { |
| 647 | 4 | error += set_ompt_callback( | |
| 648 | ompt_callback_thread_end, | ||
| 649 | 4 | (ompt_callback_t)omptool_funcs.thread_end); | |
| 650 | } | ||
| 651 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (omptool_funcs.thread_role_shift) { |
| 652 | ✗ | error += set_ompt_callback( | |
| 653 | ompt_callback_thread_role_shift, | ||
| 654 | ✗ | (ompt_callback_t)omptool_funcs.thread_role_shift); | |
| 655 | } | ||
| 656 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (omptool_funcs.parallel_begin) { |
| 657 | 4 | error += set_ompt_callback( | |
| 658 | ompt_callback_parallel_begin, | ||
| 659 | 4 | (ompt_callback_t)omptool_funcs.parallel_begin); | |
| 660 | } | ||
| 661 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (omptool_funcs.parallel_end) { |
| 662 | 4 | error += set_ompt_callback( | |
| 663 | ompt_callback_parallel_end, | ||
| 664 | 4 | (ompt_callback_t)omptool_funcs.parallel_end); | |
| 665 | } | ||
| 666 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (omptool_funcs.task_create) { |
| 667 | 4 | error += set_ompt_callback( | |
| 668 | ompt_callback_task_create, | ||
| 669 | 4 | (ompt_callback_t)omptool_funcs.task_create); | |
| 670 | } | ||
| 671 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (omptool_funcs.task_schedule) { |
| 672 | 4 | error += set_ompt_callback( | |
| 673 | ompt_callback_task_schedule, | ||
| 674 | 4 | (ompt_callback_t)omptool_funcs.task_schedule); | |
| 675 | } | ||
| 676 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (omptool_funcs.implicit_task) { |
| 677 | 4 | error += set_ompt_callback( | |
| 678 | ompt_callback_implicit_task, | ||
| 679 | 4 | (ompt_callback_t)omptool_funcs.implicit_task); | |
| 680 | } | ||
| 681 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (omptool_funcs.sync_region) { |
| 682 | 4 | error += set_ompt_callback( | |
| 683 | ompt_callback_sync_region, | ||
| 684 | 4 | (ompt_callback_t)omptool_funcs.sync_region); | |
| 685 | } | ||
| 686 | } else { | ||
| 687 | ✗ | error = 1; | |
| 688 | ✗ | verbose(VB_OMPT, "Could not look up function \"ompt_set_callback\""); | |
| 689 | } | ||
| 690 | |||
| 691 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (!error) { |
| 692 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "OMPT callbacks successfully registered"); |
| 693 | } | ||
| 694 | |||
| 695 | 4 | return error; | |
| 696 | } | ||
| 697 | |||
| 698 | 4 | static void unset_ompt_callbacks(void) { | |
| 699 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (set_callback_fn) { |
| 700 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (omptool_funcs.thread_begin) { |
| 701 | 4 | set_callback_fn(ompt_callback_thread_begin, NULL); | |
| 702 | } | ||
| 703 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (omptool_funcs.thread_end) { |
| 704 | 4 | set_callback_fn(ompt_callback_thread_end, NULL); | |
| 705 | } | ||
| 706 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | if (omptool_funcs.thread_role_shift) { |
| 707 | ✗ | set_callback_fn(ompt_callback_thread_role_shift, NULL); | |
| 708 | } | ||
| 709 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (omptool_funcs.parallel_begin) { |
| 710 | 4 | set_callback_fn(ompt_callback_parallel_begin, NULL); | |
| 711 | } | ||
| 712 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (omptool_funcs.parallel_end) { |
| 713 | 4 | set_callback_fn(ompt_callback_parallel_end, NULL); | |
| 714 | } | ||
| 715 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (omptool_funcs.task_create) { |
| 716 | 4 | set_callback_fn(ompt_callback_task_create, NULL); | |
| 717 | } | ||
| 718 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (omptool_funcs.task_schedule) { |
| 719 | 4 | set_callback_fn(ompt_callback_task_schedule, NULL); | |
| 720 | } | ||
| 721 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (omptool_funcs.implicit_task) { |
| 722 | 4 | set_callback_fn(ompt_callback_implicit_task, NULL); | |
| 723 | } | ||
| 724 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (omptool_funcs.sync_region) { |
| 725 | 4 | set_callback_fn(ompt_callback_sync_region, NULL); | |
| 726 | } | ||
| 727 | } | ||
| 728 | 4 | } | |
| 729 | |||
| 730 | |||
| 731 | /*********************************************************************************/ | ||
| 732 | /* OMPT start tool */ | ||
| 733 | /*********************************************************************************/ | ||
| 734 | |||
| 735 | static bool dlb_initialized_through_ompt = false; | ||
| 736 | static bool omptool_initialized = false; | ||
| 737 | static const char *openmp_runtime_version; | ||
| 738 | |||
| 739 | 4 | static int omptool_initialize(ompt_function_lookup_t lookup, int initial_device_num, | |
| 740 | ompt_data_t *tool_data) { | ||
| 741 | /* Parse options and get the required fields */ | ||
| 742 | options_t options; | ||
| 743 | 4 | options_init(&options, NULL); | |
| 744 | 4 | debug_init(&options); | |
| 745 | |||
| 746 | /* Print OMPT version and variables*/ | ||
| 747 | 4 | const char *omp_policy_str = getenv("OMP_WAIT_POLICY"); | |
| 748 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "Detected OpenMP runtime: %s", openmp_runtime_version); |
| 749 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "Environment variables of interest:"); |
| 750 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, " OMP_WAIT_POLICY: %s", omp_policy_str); |
| 751 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (strncmp(openmp_runtime_version, "Intel", 5) == 0 || |
| 752 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | strncmp(openmp_runtime_version, "LLVM", 4) == 0) { |
| 753 | ✗ | verbose(VB_OMPT, " KMP_LIBRARY: %s", getenv("KMP_LIBRARY")); | |
| 754 | ✗ | verbose(VB_OMPT, " KMP_BLOCKTIME: %s", getenv("KMP_BLOCKTIME")); | |
| 755 | } | ||
| 756 | /* when GCC implements OMPT: else if "gomp", print GOMP_SPINCOUNT */ | ||
| 757 | |||
| 758 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
4 | verbose(VB_OMPT, "DLB with OMPT support is %s", options.ompt ? "ENABLED" : "DISABLED"); |
| 759 | |||
| 760 | /* Enable OMPT only if requested */ | ||
| 761 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (options.ompt) { |
| 762 | |||
| 763 | /* Disable --ompt-thread-manager if no LeWI or DROM */ | ||
| 764 |
3/4✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 2 times.
✗ Branch 3 not taken.
|
4 | if (!(options.lewi || options.drom)) { |
| 765 | 2 | options.omptm_version = OMPTM_NONE; | |
| 766 | } | ||
| 767 | |||
| 768 | /* Emit warning if OMP_WAIT_POLICY is not "passive" */ | ||
| 769 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | if (options.lewi && |
| 770 |
2/4✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
|
2 | options.omptm_version == OMPTM_OMP5 && |
| 771 | ✗ | (!omp_policy_str || strcasecmp(omp_policy_str, "passive") != 0)) { | |
| 772 | 2 | warning("OMP_WAIT_POLICY value it not \"passive\". Even though the default " | |
| 773 | "value may be \"passive\", setting it explicitly is recommended " | ||
| 774 | "since it modifies other runtime related environment variables"); | ||
| 775 | } | ||
| 776 | |||
| 777 | 4 | int err = 0; | |
| 778 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (options.preinit_pid == 0) { |
| 779 | /* Force init */ | ||
| 780 | cpu_set_t process_mask; | ||
| 781 | 4 | sched_getaffinity(0, sizeof(process_mask), &process_mask); | |
| 782 | 4 | err = DLB_Init(0, &process_mask, NULL); | |
| 783 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (err == DLB_SUCCESS) { |
| 784 | 4 | dlb_initialized_through_ompt = true; | |
| 785 | ✗ | } else if (err != DLB_ERR_INIT) { | |
| 786 | ✗ | warning("DLB_Init failed: %s", DLB_Strerror(err)); | |
| 787 | } | ||
| 788 | } else { | ||
| 789 | /* Initialize DLB only if ompt is enabled, and | ||
| 790 | * remember if succeeded to finalize it when ompt_finalize is invoked. */ | ||
| 791 | ✗ | err = DLB_Init(0, NULL, NULL); | |
| 792 | ✗ | if (err == DLB_SUCCESS) { | |
| 793 | ✗ | dlb_initialized_through_ompt = true; | |
| 794 | } else { | ||
| 795 | ✗ | verbose(VB_OMPT, "DLB_Init: %s", DLB_Strerror(err)); | |
| 796 | } | ||
| 797 | } | ||
| 798 | |||
| 799 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "Initializing OMPT module"); |
| 800 | |||
| 801 | /* Register OMPT callbacks */ | ||
| 802 | 4 | err = set_ompt_callbacks(lookup, options.omptm_version, options.talp_openmp); | |
| 803 | |||
| 804 | /* If callbacks are successfully registered, initialize modules | ||
| 805 | * and return a non-zero value to activate the tool */ | ||
| 806 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (!err) { |
| 807 | 4 | omptool__init(thread_spd->id, &options); | |
| 808 | 4 | options_finalize(&options); | |
| 809 | /* we save the thread ID that initialized OMPT */ | ||
| 810 | 4 | tool_data->value = syscall(SYS_gettid); | |
| 811 | 4 | omptool_initialized = true; | |
| 812 | 4 | return 1; | |
| 813 | } | ||
| 814 | |||
| 815 | /* Otherwise, finalize DLB if init succeeded */ | ||
| 816 | ✗ | if (dlb_initialized_through_ompt) { | |
| 817 | ✗ | DLB_Finalize(); | |
| 818 | ✗ | dlb_initialized_through_ompt = false; | |
| 819 | } | ||
| 820 | |||
| 821 | ✗ | warning("DLB could not register itself as OpenMP tool"); | |
| 822 | } | ||
| 823 | |||
| 824 | ✗ | options_finalize(&options); | |
| 825 | |||
| 826 | ✗ | return 0; | |
| 827 | } | ||
| 828 | |||
| 829 | 4 | static void omptool_finalize(ompt_data_t *tool_data) { | |
| 830 | |||
| 831 | /* Protect finalization against forks and other double calls */ | ||
| 832 |
1/2✓ Branch 1 taken 4 times.
✗ Branch 2 not taken.
|
4 | if (tool_data->value == (uint64_t)syscall(SYS_gettid) |
| 833 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | && omptool_initialized) { |
| 834 | |||
| 835 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 4 times.
|
4 | verbose(VB_OMPT, "Finalizing OMPT module"); |
| 836 | |||
| 837 | /* Finalize DLB if needed */ | ||
| 838 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (dlb_initialized_through_ompt) { |
| 839 | 4 | DLB_Finalize(); | |
| 840 | } | ||
| 841 | |||
| 842 | /* Disable OMPT callbacks */ | ||
| 843 | 4 | unset_ompt_callbacks(); | |
| 844 | |||
| 845 | /* Finalize modules */ | ||
| 846 | 4 | omptool__finalize(); | |
| 847 | |||
| 848 | 4 | omptool_initialized = false; | |
| 849 | 4 | dlb_initialized_through_ompt = false; | |
| 850 | } | ||
| 851 | 4 | } | |
| 852 | |||
| 853 | |||
| 854 | #ifndef OMPT_MULTIPLEX_H | ||
| 855 | DLB_EXPORT_SYMBOL | ||
| 856 | #endif | ||
| 857 | 1 | ompt_start_tool_result_t* ompt_start_tool(unsigned int omp_version, const char *runtime_version) { | |
| 858 | 1 | openmp_runtime_version = runtime_version; | |
| 859 | static ompt_start_tool_result_t tool = { | ||
| 860 | .initialize = omptool_initialize, | ||
| 861 | .finalize = omptool_finalize, | ||
| 862 | .tool_data = {0} | ||
| 863 | }; | ||
| 864 | 1 | return &tool; | |
| 865 | } | ||
| 866 |