| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /*********************************************************************************/ | ||
| 2 | /* Copyright 2009-2025 Barcelona Supercomputing Center */ | ||
| 3 | /* */ | ||
| 4 | /* This file is part of the DLB library. */ | ||
| 5 | /* */ | ||
| 6 | /* DLB is free software: you can redistribute it and/or modify */ | ||
| 7 | /* it under the terms of the GNU Lesser General Public License as published by */ | ||
| 8 | /* the Free Software Foundation, either version 3 of the License, or */ | ||
| 9 | /* (at your option) any later version. */ | ||
| 10 | /* */ | ||
| 11 | /* DLB is distributed in the hope that it will be useful, */ | ||
| 12 | /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ | ||
| 13 | /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ | ||
| 14 | /* GNU Lesser General Public License for more details. */ | ||
| 15 | /* */ | ||
| 16 | /* You should have received a copy of the GNU Lesser General Public License */ | ||
| 17 | /* along with DLB. If not, see <https://www.gnu.org/licenses/>. */ | ||
| 18 | /*********************************************************************************/ | ||
| 19 | |||
| 20 | #include "talp/talp_openmp.h" | ||
| 21 | |||
| 22 | #include "LB_numThreads/omptool.h" | ||
| 23 | #include "LB_comm/shmem_talp.h" | ||
| 24 | #include "LB_core/DLB_kernel.h" | ||
| 25 | #include "apis/dlb_talp.h" | ||
| 26 | #include "support/debug.h" | ||
| 27 | #include "talp/regions.h" | ||
| 28 | #include "talp/talp.h" | ||
| 29 | #include "talp/talp_hwc.h" | ||
| 30 | #include "talp/talp_types.h" | ||
| 31 | |||
| 32 | #include <unistd.h> | ||
| 33 | |||
| 34 | extern __thread bool thread_is_observer; | ||
| 35 | |||
| 36 | /* Update all open nested regions (so, excluding the innermost) and add the | ||
| 37 | * time since its start time until the sample last timestamp (which is the time | ||
| 38 | * that has yet not been added to the regions) as omp_serialization_time */ | ||
| 39 | 1 | static void update_serialization_in_nested_regions(const subprocess_descriptor_t *spd, | |
| 40 | const talp_sample_t *sample) { | ||
| 41 | |||
| 42 | 1 | talp_info_t *talp_info = spd->talp_info; | |
| 43 | |||
| 44 | /* Update all open nested regions */ | ||
| 45 | 1 | pthread_mutex_lock(&talp_info->regions_mutex); | |
| 46 | { | ||
| 47 | 2 | GSList *nested_open_regions = talp_info->open_regions | |
| 48 | 1 | ? talp_info->open_regions->next | |
| 49 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | : NULL; |
| 50 | |||
| 51 | 1 | for (GSList *node = nested_open_regions; | |
| 52 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | node != NULL; |
| 53 | 1 | node = node->next) { | |
| 54 | |||
| 55 | 1 | dlb_monitor_t *monitor = node->data; | |
| 56 | 1 | monitor->omp_serialization_time += | |
| 57 | 1 | sample->last_updated_timestamp - monitor->start_time; | |
| 58 | } | ||
| 59 | } | ||
| 60 | 1 | pthread_mutex_unlock(&talp_info->regions_mutex); | |
| 61 | 1 | } | |
| 62 | |||
| 63 | |||
| 64 | /*********************************************************************************/ | ||
| 65 | /* TALP OpenMP functions */ | ||
| 66 | /*********************************************************************************/ | ||
| 67 | |||
| 68 | /* samples involved in parallel level 1 */ | ||
| 69 | static talp_sample_t** parallel_samples_l1 = NULL; | ||
| 70 | static unsigned int parallel_samples_l1_capacity = 0; | ||
| 71 | |||
| 72 | 1 | void talp_openmp_init(pid_t pid, const options_t* options) { | |
| 73 | |||
| 74 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | ensure(!thread_is_observer, "An observer thread cannot call talp_openmp_init"); |
| 75 | |||
| 76 | 1 | const subprocess_descriptor_t *spd = thread_spd; | |
| 77 | 1 | talp_info_t *talp_info = spd->talp_info; | |
| 78 | |||
| 79 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (talp_info) { |
| 80 | 1 | monitor_data_t *monitor_data = talp_info->monitor->_data; | |
| 81 | 1 | talp_info->flags.have_openmp = true; | |
| 82 | |||
| 83 | /* Fix up number of CPUs for the global region */ | ||
| 84 | 1 | float cpus = CPU_COUNT(&spd->process_mask); | |
| 85 | 1 | talp_info->monitor->avg_cpus = cpus; | |
| 86 | 1 | shmem_talp__set_avg_cpus(monitor_data->node_shared_id, cpus); | |
| 87 | |||
| 88 | /* Start global region (no-op if already started) */ | ||
| 89 | 1 | region_start(spd, talp_info->monitor); | |
| 90 | |||
| 91 | /* Set useful state */ | ||
| 92 | 1 | talp_sample_t *sample = talp_get_thread_sample(spd); | |
| 93 | 1 | talp_set_sample_state(spd, sample, TALP_STATE_USEFUL); | |
| 94 | } | ||
| 95 | 1 | } | |
| 96 | |||
| 97 | 1 | void talp_openmp_finalize(void) { | |
| 98 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (parallel_samples_l1 != NULL) { |
| 99 | 1 | free(parallel_samples_l1); | |
| 100 | 1 | parallel_samples_l1 = NULL; | |
| 101 | 1 | parallel_samples_l1_capacity = 0; | |
| 102 | } | ||
| 103 | 1 | } | |
| 104 | |||
| 105 | 2 | void talp_openmp_thread_begin(ompt_thread_t thread_type) { | |
| 106 | |||
| 107 | 2 | const subprocess_descriptor_t *spd = thread_spd; | |
| 108 | 2 | talp_info_t *talp_info = spd->talp_info; | |
| 109 | |||
| 110 |
2/4✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
|
2 | if (talp_info == NULL || !talp_info->flags.have_openmp) return; |
| 111 | |||
| 112 | /* Initial thread is already in useful state, set omp_out for others */ | ||
| 113 | 2 | talp_sample_t *sample = talp_get_thread_sample(spd); | |
| 114 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if (sample->state == TALP_STATE_DISABLED) { |
| 115 | /* Not initial thread: */ | ||
| 116 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (talp_info->flags.have_hwc) { |
| 117 | ✗ | talp_hwc_thread_init(); | |
| 118 | } | ||
| 119 | 1 | talp_set_sample_state(spd, sample, TALP_STATE_NOT_USEFUL_OMP_OUT); | |
| 120 | |||
| 121 | /* The initial time of the sample is set to match the start time of | ||
| 122 | * the innermost open region, but other nested open regions need to | ||
| 123 | * be fixed */ | ||
| 124 | 1 | update_serialization_in_nested_regions(spd, sample); | |
| 125 | } | ||
| 126 | } | ||
| 127 | |||
| 128 | 2 | void talp_openmp_thread_end(void) { | |
| 129 | |||
| 130 | 2 | const subprocess_descriptor_t *spd = thread_spd; | |
| 131 | 2 | talp_info_t *talp_info = spd->talp_info; | |
| 132 | |||
| 133 |
3/4✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
|
2 | if (talp_info == NULL || !talp_info->flags.have_openmp) return; |
| 134 | |||
| 135 | /* Update thread sample with the last microsample */ | ||
| 136 | 1 | talp_sample_t *sample = talp_get_thread_sample(spd); | |
| 137 | 1 | talp_update_sample(spd, sample, TALP_NO_TIMESTAMP); | |
| 138 | |||
| 139 | /* Update state */ | ||
| 140 | 1 | talp_set_sample_state(spd, sample, TALP_STATE_DISABLED); | |
| 141 | |||
| 142 | /* Finalize PAPI per-thread state */ | ||
| 143 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (talp_info->flags.have_hwc) { |
| 144 | ✗ | talp_hwc_thread_finalize(); | |
| 145 | } | ||
| 146 | } | ||
| 147 | |||
| 148 | 2 | void talp_openmp_parallel_begin(omptool_parallel_data_t *parallel_data) { | |
| 149 | |||
| 150 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | fatal_cond(parallel_data->requested_parallelism < 1, |
| 151 | "Requested parallel region of invalid size in %s. Please report bug at %s.", | ||
| 152 | __func__, PACKAGE_BUGREPORT); | ||
| 153 | |||
| 154 | 2 | const subprocess_descriptor_t *spd = thread_spd; | |
| 155 | 2 | talp_info_t *talp_info = spd->talp_info; | |
| 156 | |||
| 157 |
2/4✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
|
2 | if (talp_info == NULL || !talp_info->flags.have_openmp) return; |
| 158 | |||
| 159 | 2 | int parallel_level = parallel_data->level; | |
| 160 | |||
| 161 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
2 | if (parallel_level == 1) { |
| 162 | /* Resize samples of parallel 1 if needed */ | ||
| 163 | 2 | unsigned int requested_parallelism = parallel_data->requested_parallelism; | |
| 164 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
2 | if (requested_parallelism > parallel_samples_l1_capacity) { |
| 165 | 2 | void *ptr = realloc(parallel_samples_l1, | |
| 166 | sizeof(talp_sample_t*)*requested_parallelism); | ||
| 167 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | fatal_cond(!ptr, "realloc failed in %s", __func__); |
| 168 | 2 | parallel_samples_l1 = ptr; | |
| 169 | 2 | parallel_samples_l1_capacity = requested_parallelism; | |
| 170 | } | ||
| 171 | |||
| 172 | /* Assign local data */ | ||
| 173 | 2 | parallel_data->talp_parallel_data = parallel_samples_l1; | |
| 174 | |||
| 175 | ✗ | } else if (parallel_level > 1) { | |
| 176 | /* Allocate parallel samples array */ | ||
| 177 | ✗ | unsigned int requested_parallelism = parallel_data->requested_parallelism; | |
| 178 | ✗ | void *ptr = malloc(sizeof(talp_sample_t*)*requested_parallelism); | |
| 179 | ✗ | fatal_cond(!ptr, "malloc failed in %s", __func__); | |
| 180 | |||
| 181 | /* Assign local data */ | ||
| 182 | ✗ | parallel_data->talp_parallel_data = ptr; | |
| 183 | } | ||
| 184 | |||
| 185 | /* Update stats */ | ||
| 186 | 2 | talp_sample_t *sample = talp_get_thread_sample(spd); | |
| 187 | 2 | DLB_ATOMIC_ADD_RLX(&sample->stats.num_omp_parallels, 1); | |
| 188 | |||
| 189 | /* Update main thread serial mode if this is the outermost parallel region */ | ||
| 190 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
2 | if (parallel_level == 1) { |
| 191 | 2 | talp_set_main_sample_in_serial_mode(false); | |
| 192 | } | ||
| 193 | } | ||
| 194 | |||
| 195 | 2 | void talp_openmp_parallel_end(omptool_parallel_data_t *parallel_data) { | |
| 196 | |||
| 197 | 2 | const subprocess_descriptor_t *spd = thread_spd; | |
| 198 | 2 | talp_info_t *talp_info = spd->talp_info; | |
| 199 | |||
| 200 |
2/4✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 2 times.
|
2 | if (talp_info == NULL || !talp_info->flags.have_openmp) return; |
| 201 | |||
| 202 | /* Update thread sample with the last microsample */ | ||
| 203 | 2 | talp_sample_t *sample = talp_get_thread_sample(spd); | |
| 204 | 2 | talp_update_sample(spd, sample, TALP_NO_TIMESTAMP); | |
| 205 | |||
| 206 | 2 | int parallel_level = parallel_data->level; | |
| 207 | |||
| 208 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
2 | if (parallel_level == 1) { |
| 209 | /* Flush and aggregate all samples of the parallel region */ | ||
| 210 | 2 | talp_flush_sample_subset_to_regions(spd, | |
| 211 | 2 | parallel_data->talp_parallel_data, | |
| 212 | parallel_data->actual_parallelism); | ||
| 213 | |||
| 214 | ✗ | } else if (parallel_level > 1) { | |
| 215 | /* Flush and aggregate all samples of this parallel except this | ||
| 216 | * thread's sample. The primary thread of a nested parallel region | ||
| 217 | * will keep its samples until it finishes as non-primary | ||
| 218 | * team-worker or reaches the level 1 parallel region */ | ||
| 219 | ✗ | talp_sample_t **parallel_samples = parallel_data->talp_parallel_data; | |
| 220 | ✗ | talp_flush_sample_subset_to_regions(spd, | |
| 221 | ¶llel_samples[1], | ||
| 222 | ✗ | parallel_data->actual_parallelism-1); | |
| 223 | |||
| 224 | /* free local data */ | ||
| 225 | ✗ | free(parallel_data->talp_parallel_data); | |
| 226 | ✗ | parallel_data->talp_parallel_data = NULL; | |
| 227 | } | ||
| 228 | |||
| 229 | /* Update current threads's state */ | ||
| 230 | 2 | talp_set_sample_state(spd, sample, TALP_STATE_USEFUL); | |
| 231 | |||
| 232 | /* Update the state of the rest of team-worker threads | ||
| 233 | * (note that talp_set_sample_state cannot be used here because we are | ||
| 234 | * impersonating a worker thread) */ | ||
| 235 | 2 | talp_sample_t **parallel_samples = parallel_data->talp_parallel_data; | |
| 236 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
|
3 | for (unsigned int i = 1; i < parallel_data->actual_parallelism; ++i) { |
| 237 | 1 | talp_sample_t *worker_sample = parallel_samples[i]; | |
| 238 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (worker_sample->state == TALP_STATE_NOT_USEFUL_OMP_IN) { |
| 239 | ✗ | worker_sample->state = TALP_STATE_NOT_USEFUL_OMP_OUT; | |
| 240 | } | ||
| 241 | } | ||
| 242 | |||
| 243 | /* Update main thread serial mode if this was the outermost parallel region */ | ||
| 244 |
1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
|
2 | if (parallel_level == 1) { |
| 245 | 2 | talp_set_main_sample_in_serial_mode(true); | |
| 246 | } | ||
| 247 | } | ||
| 248 | |||
| 249 | 3 | void talp_openmp_into_parallel_function( | |
| 250 | omptool_parallel_data_t *parallel_data, unsigned int index) { | ||
| 251 | |||
| 252 | 3 | const subprocess_descriptor_t *spd = thread_spd; | |
| 253 | 3 | talp_info_t *talp_info = spd->talp_info; | |
| 254 | |||
| 255 |
2/4✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 3 times.
|
3 | if (talp_info == NULL || !talp_info->flags.have_openmp) return; |
| 256 | |||
| 257 | /* Assign thread sample as team-worker of this parallel */ | ||
| 258 | 3 | talp_sample_t *sample = talp_get_thread_sample(spd); | |
| 259 | 3 | talp_sample_t **parallel_samples = parallel_data->talp_parallel_data; | |
| 260 | /* Probably optimized, but try to avoid invalidating | ||
| 261 | * the cache line on reused parallel data */ | ||
| 262 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | if (parallel_samples[index] != sample) { |
| 263 | 2 | parallel_samples[index] = sample; | |
| 264 | } | ||
| 265 | |||
| 266 | /* Update thread sample with the last microsample */ | ||
| 267 | 3 | talp_update_sample(spd, sample, TALP_NO_TIMESTAMP); | |
| 268 | |||
| 269 | /* Update state */ | ||
| 270 | 3 | talp_set_sample_state(spd, sample, TALP_STATE_USEFUL); | |
| 271 | } | ||
| 272 | |||
| 273 | 1 | void talp_openmp_outof_parallel_function(void) { | |
| 274 | |||
| 275 | 1 | const subprocess_descriptor_t *spd = thread_spd; | |
| 276 | 1 | talp_info_t *talp_info = spd->talp_info; | |
| 277 | |||
| 278 |
2/4✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 1 times.
|
1 | if (talp_info == NULL || !talp_info->flags.have_openmp) return; |
| 279 | |||
| 280 | /* Update thread sample with the last microsample */ | ||
| 281 | 1 | talp_sample_t *sample = talp_get_thread_sample(spd); | |
| 282 | 1 | talp_update_sample(spd, sample, TALP_NO_TIMESTAMP); | |
| 283 | |||
| 284 | /* Update state */ | ||
| 285 | 1 | talp_set_sample_state(spd, sample, TALP_STATE_NOT_USEFUL_OMP_OUT); | |
| 286 | } | ||
| 287 | |||
| 288 | 3 | void talp_openmp_into_parallel_implicit_barrier(omptool_parallel_data_t *parallel_data) { | |
| 289 | |||
| 290 | 3 | const subprocess_descriptor_t *spd = thread_spd; | |
| 291 | 3 | talp_info_t *talp_info = spd->talp_info; | |
| 292 | |||
| 293 |
2/4✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 3 times.
|
3 | if (talp_info == NULL || !talp_info->flags.have_openmp) return; |
| 294 | |||
| 295 | /* Update thread sample with the last microsample */ | ||
| 296 | 3 | talp_sample_t *sample = talp_get_thread_sample(spd); | |
| 297 | 3 | talp_update_sample(spd, sample, TALP_NO_TIMESTAMP); | |
| 298 | |||
| 299 | /* Update state */ | ||
| 300 | 3 | talp_set_sample_state(spd, sample, TALP_STATE_NOT_USEFUL_OMP_IN); | |
| 301 | } | ||
| 302 | |||
| 303 | 3 | void talp_openmp_into_parallel_sync(omptool_parallel_data_t *parallel_data) { | |
| 304 | |||
| 305 | 3 | const subprocess_descriptor_t *spd = thread_spd; | |
| 306 | 3 | talp_info_t *talp_info = spd->talp_info; | |
| 307 | |||
| 308 |
2/4✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 3 times.
|
3 | if (talp_info == NULL || !talp_info->flags.have_openmp) return; |
| 309 | |||
| 310 | /* Update thread sample with the last microsample */ | ||
| 311 | 3 | talp_sample_t *sample = talp_get_thread_sample(spd); | |
| 312 | 3 | talp_update_sample(spd, sample, TALP_NO_TIMESTAMP); | |
| 313 | |||
| 314 | /* Update state */ | ||
| 315 | 3 | talp_set_sample_state(spd, sample, TALP_STATE_NOT_USEFUL_OMP_IN); | |
| 316 | } | ||
| 317 | |||
| 318 | 3 | void talp_openmp_outof_parallel_sync(omptool_parallel_data_t *parallel_data) { | |
| 319 | |||
| 320 | 3 | const subprocess_descriptor_t *spd = thread_spd; | |
| 321 | 3 | talp_info_t *talp_info = spd->talp_info; | |
| 322 | |||
| 323 |
2/4✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 3 times.
|
3 | if (talp_info == NULL || !talp_info->flags.have_openmp) return; |
| 324 | |||
| 325 | /* Update thread sample with the last microsample */ | ||
| 326 | 3 | talp_sample_t *sample = talp_get_thread_sample(spd); | |
| 327 | 3 | talp_update_sample(spd, sample, TALP_NO_TIMESTAMP); | |
| 328 | |||
| 329 | /* Update state */ | ||
| 330 | 3 | talp_set_sample_state(spd, sample, TALP_STATE_USEFUL); | |
| 331 | } | ||
| 332 | |||
| 333 | 3 | void talp_openmp_task_create(void) { | |
| 334 | |||
| 335 | 3 | const subprocess_descriptor_t *spd = thread_spd; | |
| 336 | 3 | talp_info_t *talp_info = spd->talp_info; | |
| 337 | |||
| 338 |
2/4✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 3 times.
|
3 | if (talp_info == NULL || !talp_info->flags.have_openmp) return; |
| 339 | |||
| 340 | /* Just update stats */ | ||
| 341 | 3 | talp_sample_t *sample = talp_get_thread_sample(spd); | |
| 342 | 3 | DLB_ATOMIC_ADD_RLX(&sample->stats.num_omp_tasks, 1); | |
| 343 | } | ||
| 344 | |||
| 345 | 3 | void talp_openmp_task_complete(void) { | |
| 346 | |||
| 347 | 3 | const subprocess_descriptor_t *spd = thread_spd; | |
| 348 | 3 | talp_info_t *talp_info = spd->talp_info; | |
| 349 | |||
| 350 |
2/4✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 3 times.
|
3 | if (talp_info == NULL || !talp_info->flags.have_openmp) return; |
| 351 | |||
| 352 | /* Update thread sample with the last microsample */ | ||
| 353 | 3 | talp_sample_t *sample = talp_get_thread_sample(spd); | |
| 354 | 3 | talp_update_sample(spd, sample, TALP_NO_TIMESTAMP); | |
| 355 | |||
| 356 | /* Update state (FIXME: tasks outside of parallels? */ | ||
| 357 | 3 | talp_set_sample_state(spd, sample, TALP_STATE_NOT_USEFUL_OMP_IN); | |
| 358 | } | ||
| 359 | |||
| 360 | 6 | void talp_openmp_task_switch(void) { | |
| 361 | |||
| 362 | 6 | const subprocess_descriptor_t *spd = thread_spd; | |
| 363 | 6 | talp_info_t *talp_info = spd->talp_info; | |
| 364 | |||
| 365 |
2/4✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 6 times.
|
6 | if (talp_info == NULL || !talp_info->flags.have_openmp) return; |
| 366 | |||
| 367 | /* Update thread sample with the last microsample */ | ||
| 368 | 6 | talp_sample_t *sample = talp_get_thread_sample(spd); | |
| 369 | 6 | talp_update_sample(spd, sample, TALP_NO_TIMESTAMP); | |
| 370 | |||
| 371 | /* Update state */ | ||
| 372 | 6 | talp_set_sample_state(spd, sample, TALP_STATE_USEFUL); | |
| 373 | } | ||
| 374 | |||
| 375 | |||
| 376 | /*********************************************************************************/ | ||
| 377 | /* Vtable for handling omptool events */ | ||
| 378 | /*********************************************************************************/ | ||
| 379 | |||
| 380 | const omptool_event_funcs_t talp_events_vtable = (const omptool_event_funcs_t) { | ||
| 381 | .init = talp_openmp_init, | ||
| 382 | .finalize = talp_openmp_finalize, | ||
| 383 | .into_mpi = NULL, | ||
| 384 | .outof_mpi = NULL, | ||
| 385 | .lend_from_api = NULL, | ||
| 386 | .thread_begin = talp_openmp_thread_begin, | ||
| 387 | .thread_end = talp_openmp_thread_end, | ||
| 388 | .thread_role_shift = NULL, | ||
| 389 | .parallel_begin = talp_openmp_parallel_begin, | ||
| 390 | .parallel_end = talp_openmp_parallel_end, | ||
| 391 | .into_parallel_function = talp_openmp_into_parallel_function, | ||
| 392 | .outof_parallel_function = talp_openmp_outof_parallel_function, | ||
| 393 | .into_parallel_implicit_barrier = talp_openmp_into_parallel_implicit_barrier, | ||
| 394 | .into_parallel_sync = talp_openmp_into_parallel_sync, | ||
| 395 | .outof_parallel_sync = talp_openmp_outof_parallel_sync, | ||
| 396 | .task_create = talp_openmp_task_create, | ||
| 397 | .task_complete = talp_openmp_task_complete, | ||
| 398 | .task_switch = talp_openmp_task_switch, | ||
| 399 | }; | ||
| 400 |