| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /*********************************************************************************/ | ||
| 2 | /* Copyright 2009-2026 Barcelona Supercomputing Center */ | ||
| 3 | /* */ | ||
| 4 | /* This file is part of the DLB library. */ | ||
| 5 | /* */ | ||
| 6 | /* DLB is free software: you can redistribute it and/or modify */ | ||
| 7 | /* it under the terms of the GNU Lesser General Public License as published by */ | ||
| 8 | /* the Free Software Foundation, either version 3 of the License, or */ | ||
| 9 | /* (at your option) any later version. */ | ||
| 10 | /* */ | ||
| 11 | /* DLB is distributed in the hope that it will be useful, */ | ||
| 12 | /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ | ||
| 13 | /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ | ||
| 14 | /* GNU Lesser General Public License for more details. */ | ||
| 15 | /* */ | ||
| 16 | /* You should have received a copy of the GNU Lesser General Public License */ | ||
| 17 | /* along with DLB. If not, see <https://www.gnu.org/licenses/>. */ | ||
| 18 | /*********************************************************************************/ | ||
| 19 | |||
| 20 | #ifdef HAVE_CONFIG_H | ||
| 21 | #include <config.h> | ||
| 22 | #endif | ||
| 23 | |||
| 24 | #include "talp/talp.h" | ||
| 25 | |||
| 26 | #include "LB_core/node_barrier.h" | ||
| 27 | #include "LB_core/spd.h" | ||
| 28 | #include "LB_comm/shmem_talp.h" | ||
| 29 | #include "apis/dlb_errors.h" | ||
| 30 | #include "apis/dlb_talp.h" | ||
| 31 | #include "support/atomic.h" | ||
| 32 | #include "support/debug.h" | ||
| 33 | #include "support/error.h" | ||
| 34 | #include "support/gslist.h" | ||
| 35 | #include "support/gtree.h" | ||
| 36 | #include "support/mytime.h" | ||
| 37 | #include "support/tracing.h" | ||
| 38 | #include "support/options.h" | ||
| 39 | #include "support/mask_utils.h" | ||
| 40 | #include "talp/backend.h" | ||
| 41 | #include "talp/perf_metrics.h" | ||
| 42 | #include "talp/regions.h" | ||
| 43 | #include "talp/talp_gpu.h" | ||
| 44 | #include "talp/talp_hwc.h" | ||
| 45 | #include "talp/talp_output.h" | ||
| 46 | #include "talp/talp_record.h" | ||
| 47 | #include "talp/talp_types.h" | ||
| 48 | #ifdef MPI_LIB | ||
| 49 | #include "mpi/mpi_core.h" | ||
| 50 | #endif | ||
| 51 | |||
| 52 | #include <stdlib.h> | ||
| 53 | #include <pthread.h> | ||
| 54 | |||
| 55 | extern __thread bool thread_is_observer; | ||
| 56 | |||
| 57 | static void talp_dealloc_samples(const subprocess_descriptor_t *spd); | ||
| 58 | |||
| 59 | |||
| 60 | /* Update all open regions with the macrosample */ | ||
| 61 | 5348 | static void update_regions_with_macrosample(const subprocess_descriptor_t *spd, | |
| 62 | const talp_macrosample_t *macrosample, int num_cpus) { | ||
| 63 | 5348 | talp_info_t *talp_info = spd->talp_info; | |
| 64 | |||
| 65 | /* Update all open regions */ | ||
| 66 | 5348 | pthread_mutex_lock(&talp_info->regions_mutex); | |
| 67 | { | ||
| 68 | 5348 | for (GSList *node = talp_info->open_regions; | |
| 69 |
2/2✓ Branch 0 taken 5918 times.
✓ Branch 1 taken 5348 times.
|
11266 | node != NULL; |
| 70 | 5918 | node = node->next) { | |
| 71 | 5918 | dlb_monitor_t *monitor = node->data; | |
| 72 | 5918 | monitor_data_t *monitor_data = monitor->_data; | |
| 73 | |||
| 74 | /* Update number of CPUs if needed */ | ||
| 75 | 5918 | monitor->num_cpus = max_int(monitor->num_cpus, num_cpus); | |
| 76 | |||
| 77 | /* Timers */ | ||
| 78 | 5918 | monitor->useful_time += macrosample->timers.useful; | |
| 79 | 5918 | monitor->mpi_time += macrosample->timers.not_useful_mpi; | |
| 80 | 5918 | monitor->omp_load_imbalance_time += macrosample->timers.not_useful_omp_in_lb; | |
| 81 | 5918 | monitor->omp_scheduling_time += macrosample->timers.not_useful_omp_in_sched; | |
| 82 | 5918 | monitor->omp_serialization_time += macrosample->timers.not_useful_omp_out; | |
| 83 | 5918 | monitor->gpu_runtime_time += macrosample->timers.not_useful_gpu; | |
| 84 | |||
| 85 | /* GPU Timers */ | ||
| 86 | 5918 | monitor->gpu_useful_time += macrosample->gpu_timers.useful; | |
| 87 | 5918 | monitor->gpu_communication_time += macrosample->gpu_timers.communication; | |
| 88 | 5918 | monitor->gpu_inactive_time += macrosample->gpu_timers.inactive; | |
| 89 | |||
| 90 | /* Counters */ | ||
| 91 | 5918 | monitor->cycles += macrosample->counters.cycles; | |
| 92 | 5918 | monitor->instructions += macrosample->counters.instructions; | |
| 93 | |||
| 94 | /* Stats */ | ||
| 95 | 5918 | monitor->num_mpi_calls += macrosample->stats.num_mpi_calls; | |
| 96 | 5918 | monitor->num_omp_parallels += macrosample->stats.num_omp_parallels; | |
| 97 | 5918 | monitor->num_omp_tasks += macrosample->stats.num_omp_tasks; | |
| 98 | 5918 | monitor->num_gpu_runtime_calls += macrosample->stats.num_gpu_runtime_calls; | |
| 99 | |||
| 100 | /* Update shared memory only if requested */ | ||
| 101 |
2/2✓ Branch 0 taken 3610 times.
✓ Branch 1 taken 2308 times.
|
5918 | if (talp_info->flags.external_profiler) { |
| 102 | 3610 | shmem_talp__set_times(monitor_data->node_shared_id, | |
| 103 | monitor->mpi_time, | ||
| 104 | monitor->useful_time); | ||
| 105 | } | ||
| 106 | } | ||
| 107 | } | ||
| 108 | 5348 | pthread_mutex_unlock(&talp_info->regions_mutex); | |
| 109 | 5348 | } | |
| 110 | |||
| 111 | |||
| 112 | #ifdef MPI_LIB | ||
| 113 | /* Returns the number of MPI processes that have HWC enabled */ | ||
| 114 | static int get_hwc_init_across_world(const subprocess_descriptor_t *spd) { | ||
| 115 | |||
| 116 | talp_info_t *talp_info = spd->talp_info; | ||
| 117 | |||
| 118 | // status = 1 means HWC are enabled | ||
| 119 | int hwc_local_status = talp_info->flags.have_hwc ? 1 : 0; | ||
| 120 | |||
| 121 | int hwc_global_statuses = 0; | ||
| 122 | |||
| 123 | PMPI_Allreduce(&hwc_local_status, &hwc_global_statuses, 1, | ||
| 124 | MPI_INT, MPI_SUM, getWorldComm()); | ||
| 125 | |||
| 126 | return hwc_global_statuses; | ||
| 127 | } | ||
| 128 | #endif | ||
| 129 | |||
| 130 | |||
| 131 | /*********************************************************************************/ | ||
| 132 | /* Init / Finalize */ | ||
| 133 | /*********************************************************************************/ | ||
| 134 | |||
| 135 | 23 | void talp_init(subprocess_descriptor_t *spd) { | |
| 136 | |||
| 137 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 23 times.
|
23 | ensure(!spd->talp_info, "TALP already initialized"); |
| 138 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 23 times.
|
23 | ensure(!thread_is_observer, "An observer thread cannot call talp_init"); |
| 139 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 20 times.
|
23 | verbose(VB_TALP, "Initializing TALP module with worker mask: %s", |
| 140 | mu_to_str(&spd->process_mask)); | ||
| 141 | |||
| 142 | /* Initialize talp info */ | ||
| 143 | 23 | talp_info_t *talp_info = malloc(sizeof(talp_info_t)); | |
| 144 | 23 | *talp_info = (const talp_info_t) { | |
| 145 | .flags = { | ||
| 146 | 23 | .external_profiler = spd->options.talp_external_profiler, | |
| 147 | 23 | .have_shmem = spd->options.talp_external_profiler, | |
| 148 | 23 | .have_minimal_shmem = !spd->options.talp_external_profiler | |
| 149 |
3/4✓ Branch 0 taken 14 times.
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 14 times.
|
23 | && spd->options.talp_summary & SUMMARY_NODE, |
| 150 | }, | ||
| 151 | 23 | .regions = g_tree_new_full( | |
| 152 | (GCompareDataFunc)region_compare_by_name, | ||
| 153 | NULL, NULL, region_dealloc), | ||
| 154 | .regions_mutex = PTHREAD_MUTEX_INITIALIZER, | ||
| 155 | .samples_mutex = PTHREAD_MUTEX_INITIALIZER, | ||
| 156 | }; | ||
| 157 | 23 | spd->talp_info = talp_info; | |
| 158 | |||
| 159 | /* Initialize shared memory */ | ||
| 160 |
3/4✓ Branch 0 taken 14 times.
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 14 times.
|
23 | if (talp_info->flags.have_shmem || talp_info->flags.have_minimal_shmem) { |
| 161 | /* If we only need a minimal shmem, its size will be the user-provided | ||
| 162 | * multiplier times 'system_size' (usually, 1 region per process) | ||
| 163 | * Otherwise, we multiply it by DEFAULT_REGIONS_PER_PROC. | ||
| 164 | */ | ||
| 165 | enum { DEFAULT_REGIONS_PER_PROC = 100 }; | ||
| 166 | 18 | int shmem_size_multiplier = spd->options.shm_size_multiplier | |
| 167 |
1/2✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
|
9 | * (talp_info->flags.have_shmem ? DEFAULT_REGIONS_PER_PROC : 1); |
| 168 | 9 | shmem_talp__init(spd->options.shm_key, shmem_size_multiplier); | |
| 169 | } | ||
| 170 | |||
| 171 | /* Initialize TALP components */ | ||
| 172 |
2/2✓ Branch 0 taken 15 times.
✓ Branch 1 taken 8 times.
|
23 | if (spd->options.talp & (TALP_COMPONENT_DEFAULT | TALP_COMPONENT_GPU)) { |
| 173 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 15 times.
|
15 | if (talp_gpu_init(spd) == DLB_SUCCESS) { |
| 174 | ✗ | talp_info->flags.have_gpu = true; | |
| 175 | ✗ | verbose(VB_TALP, "GPU component enabled successfully"); | |
| 176 | } else { | ||
| 177 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 15 times.
|
15 | if (spd->options.talp & TALP_COMPONENT_GPU) { |
| 178 | /* component was explicit and failed, warn user */ | ||
| 179 | ✗ | warning("TALP: Failed to load GPU component"); | |
| 180 | } | ||
| 181 | } | ||
| 182 | } | ||
| 183 |
1/2✓ Branch 0 taken 23 times.
✗ Branch 1 not taken.
|
23 | if (spd->options.talp & (TALP_COMPONENT_DEFAULT | TALP_COMPONENT_HWC)) { |
| 184 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 23 times.
|
23 | if (talp_hwc_init(spd) == DLB_SUCCESS) { |
| 185 | ✗ | talp_info->flags.have_hwc = true; | |
| 186 | ✗ | verbose(VB_TALP, "HWC component enabled successfully"); | |
| 187 | } else { | ||
| 188 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 15 times.
|
23 | if (spd->options.talp & TALP_COMPONENT_HWC) { |
| 189 | /* component was explicit and failed, warn user */ | ||
| 190 | 8 | warning("TALP: Failed to load HWC component"); | |
| 191 | } | ||
| 192 | } | ||
| 193 | } | ||
| 194 | |||
| 195 | #ifdef MPI_LIB | ||
| 196 | /* Check HWC status across all process. Every process needs to do the check | ||
| 197 | * because it's a collective operation and some process may have been started | ||
| 198 | * without the appropriate flag. */ | ||
| 199 | if (is_mpi_ready()) { | ||
| 200 | int num_procs_with_hwc = get_hwc_init_across_world(spd); | ||
| 201 | if (num_procs_with_hwc > 0 && num_procs_with_hwc < _mpi_size) { | ||
| 202 | warning0("Hardware Counters initialization has failed, disabling option."); | ||
| 203 | talp_hwc_finalize(); | ||
| 204 | talp_info->flags.have_hwc = false; | ||
| 205 | } | ||
| 206 | } | ||
| 207 | #endif | ||
| 208 | |||
| 209 | /* Initialize global region monitor | ||
| 210 | * (at this point we don't know how many CPUs, it will be fixed in talp_openmp_init) */ | ||
| 211 | 23 | talp_info->monitor = region_register(spd, region_get_global_name()); | |
| 212 | |||
| 213 | /* Start global region */ | ||
| 214 | 23 | region_start(spd, talp_info->monitor); | |
| 215 | 23 | } | |
| 216 | |||
| 217 | 23 | void talp_finalize(subprocess_descriptor_t *spd) { | |
| 218 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 23 times.
|
23 | ensure(spd->talp_info, "TALP is not initialized"); |
| 219 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 23 times.
|
23 | ensure(!thread_is_observer, "An observer thread cannot call talp_finalize"); |
| 220 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 23 times.
|
23 | verbose(VB_TALP, "Finalizing TALP module"); |
| 221 | |||
| 222 | 23 | talp_info_t *talp_info = spd->talp_info; | |
| 223 | |||
| 224 | /* Stop open regions | ||
| 225 | * (Note that region_stop need to acquire the regions_mutex | ||
| 226 | * lock, so we we need to iterate without it) */ | ||
| 227 |
2/2✓ Branch 0 taken 16 times.
✓ Branch 1 taken 23 times.
|
39 | while(talp_info->open_regions != NULL) { |
| 228 | 16 | dlb_monitor_t *monitor = talp_info->open_regions->data; | |
| 229 | 16 | region_stop(spd, monitor); | |
| 230 | } | ||
| 231 | |||
| 232 | /* Finalize TALP components */ | ||
| 233 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 23 times.
|
23 | if (talp_info->flags.have_gpu) { |
| 234 | ✗ | talp_gpu_finalize(); | |
| 235 | } | ||
| 236 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 23 times.
|
23 | if (talp_info->flags.have_hwc) { |
| 237 | ✗ | talp_hwc_finalize(); | |
| 238 | } | ||
| 239 | |||
| 240 | /* Per-process output (no MPI or requested by user) */ | ||
| 241 |
2/2✓ Branch 0 taken 8 times.
✓ Branch 1 taken 15 times.
|
23 | if (!talp_info->flags.have_mpi |
| 242 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 8 times.
|
8 | || spd->options.talp_partial_output) { |
| 243 | |||
| 244 | 15 | pthread_mutex_lock(&talp_info->regions_mutex); | |
| 245 | { | ||
| 246 | /* Record all regions */ | ||
| 247 | 15 | for (GTreeNode *node = g_tree_node_first(talp_info->regions); | |
| 248 |
2/2✓ Branch 0 taken 1221 times.
✓ Branch 1 taken 15 times.
|
1236 | node != NULL; |
| 249 | 1221 | node = g_tree_node_next(node)) { | |
| 250 | 1221 | const dlb_monitor_t *monitor = g_tree_node_value(node); | |
| 251 | 1221 | talp_record_monitor(spd, monitor); | |
| 252 | } | ||
| 253 | } | ||
| 254 | 15 | pthread_mutex_unlock(&talp_info->regions_mutex); | |
| 255 | } | ||
| 256 | |||
| 257 | /* Print/write all collected summaries */ | ||
| 258 | 23 | talp_output_finalize(spd->options.talp_output_file, spd->options.talp_partial_output); | |
| 259 | |||
| 260 | /* Deallocate samples structure */ | ||
| 261 | 23 | talp_dealloc_samples(spd); | |
| 262 | |||
| 263 | /* Finalize shared memory */ | ||
| 264 |
3/4✓ Branch 0 taken 14 times.
✓ Branch 1 taken 9 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 14 times.
|
23 | if (talp_info->flags.have_shmem || talp_info->flags.have_minimal_shmem) { |
| 265 | 9 | shmem_talp__finalize(spd->id); | |
| 266 | } | ||
| 267 | |||
| 268 | /* Deallocate monitoring regions and talp_info */ | ||
| 269 | 23 | pthread_mutex_lock(&talp_info->regions_mutex); | |
| 270 | { | ||
| 271 | /* Destroy GTree, each node is deallocated with the function region_dealloc */ | ||
| 272 | 23 | g_tree_destroy(talp_info->regions); | |
| 273 | 23 | talp_info->regions = NULL; | |
| 274 | 23 | talp_info->monitor = NULL; | |
| 275 | |||
| 276 | /* Destroy list of open regions */ | ||
| 277 | 23 | g_slist_free(talp_info->open_regions); | |
| 278 | 23 | talp_info->open_regions = NULL; | |
| 279 | } | ||
| 280 | 23 | pthread_mutex_unlock(&talp_info->regions_mutex); | |
| 281 | 23 | free(talp_info); | |
| 282 | 23 | spd->talp_info = NULL; | |
| 283 | 23 | } | |
| 284 | |||
| 285 | |||
| 286 | /*********************************************************************************/ | ||
| 287 | /* Sample functions */ | ||
| 288 | /*********************************************************************************/ | ||
| 289 | |||
| 290 | static __thread talp_sample_t* _tls_sample = NULL; | ||
| 291 | static __thread bool _is_main_sample = false; | ||
| 292 | static __thread bool _is_main_sample_in_serial_mode = false; | ||
| 293 | |||
| 294 | /* Quick test, without locking and without generating a new sample */ | ||
| 295 | ✗ | static inline bool is_talp_sample_mine(const talp_sample_t *sample) { | |
| 296 | ✗ | return sample != NULL && sample == _tls_sample; | |
| 297 | } | ||
| 298 | |||
| 299 | 23 | static void talp_dealloc_samples(const subprocess_descriptor_t *spd) { | |
| 300 | |||
| 301 | /* Warning about _tls_sample in worker threads: | ||
| 302 | * worker threads do not currently deallocate their sample. | ||
| 303 | * In some cases, it might happen that a worker thread exits without | ||
| 304 | * the main thread reducing its sample, so in these cases the sample | ||
| 305 | * needs to outlive the thread. | ||
| 306 | * The main thread could deallocate it at this point, but then the | ||
| 307 | * TLS variable would be broken if TALP is reinitialized again. | ||
| 308 | * For now we will keep it like this and will revisit if needed. */ | ||
| 309 | |||
| 310 | /* Deallocate main thread sample */ | ||
| 311 | 23 | free(_tls_sample); | |
| 312 | 23 | _tls_sample = NULL; | |
| 313 | |||
| 314 | /* Deallocate samples list */ | ||
| 315 | 23 | talp_info_t *talp_info = spd->talp_info; | |
| 316 | 23 | pthread_mutex_lock(&talp_info->samples_mutex); | |
| 317 | { | ||
| 318 | 23 | free(talp_info->samples); | |
| 319 | 23 | talp_info->samples = NULL; | |
| 320 | 23 | talp_info->ncpus = 0; | |
| 321 | } | ||
| 322 | 23 | pthread_mutex_unlock(&talp_info->samples_mutex); | |
| 323 | 23 | } | |
| 324 | |||
| 325 | /* Get the TLS associated sample */ | ||
| 326 | 5399 | talp_sample_t* talp_get_thread_sample(const subprocess_descriptor_t *spd) { | |
| 327 | |||
| 328 | /* Thread already has an allocated sample, return it */ | ||
| 329 |
2/2✓ Branch 0 taken 5375 times.
✓ Branch 1 taken 24 times.
|
5399 | if (likely(_tls_sample != NULL)) return _tls_sample; |
| 330 | |||
| 331 | /* Observer threads don't have a valid sample */ | ||
| 332 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
|
24 | if (unlikely(thread_is_observer)) return NULL; |
| 333 | |||
| 334 | /* Otherwise, allocate */ | ||
| 335 | 24 | talp_info_t *talp_info = spd->talp_info; | |
| 336 | 24 | pthread_mutex_lock(&talp_info->samples_mutex); | |
| 337 | { | ||
| 338 | 24 | int ncpus = ++talp_info->ncpus; | |
| 339 | 24 | void *samples = realloc(talp_info->samples, sizeof(talp_sample_t*)*ncpus); | |
| 340 |
1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
|
24 | if (samples) { |
| 341 | 24 | talp_info->samples = samples; | |
| 342 | void *new_sample; | ||
| 343 |
1/2✓ Branch 0 taken 24 times.
✗ Branch 1 not taken.
|
24 | if (posix_memalign(&new_sample, DLB_CACHE_LINE, sizeof(talp_sample_t)) == 0) { |
| 344 | 24 | _tls_sample = new_sample; | |
| 345 | 24 | talp_info->samples[ncpus-1] = new_sample; | |
| 346 |
2/2✓ Branch 0 taken 22 times.
✓ Branch 1 taken 2 times.
|
24 | if (ncpus == 1) { |
| 347 | 22 | _is_main_sample = true; | |
| 348 | 22 | _is_main_sample_in_serial_mode = true; | |
| 349 | } | ||
| 350 | } | ||
| 351 | } | ||
| 352 | } | ||
| 353 | 24 | pthread_mutex_unlock(&talp_info->samples_mutex); | |
| 354 | |||
| 355 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 24 times.
|
24 | fatal_cond(_tls_sample == NULL, "TALP: could not allocate thread sample"); |
| 356 | |||
| 357 | /* If a thread is created mid-region, its initial time is that of the | ||
| 358 | * innermost open region, otherwise it is the current time */ | ||
| 359 | int64_t last_updated_timestamp; | ||
| 360 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 22 times.
|
24 | if (talp_info->open_regions) { |
| 361 | 2 | const dlb_monitor_t *monitor = talp_info->open_regions->data; | |
| 362 | 2 | last_updated_timestamp = monitor->start_time; | |
| 363 | } else { | ||
| 364 | 22 | last_updated_timestamp = get_time_in_ns(); | |
| 365 | } | ||
| 366 | |||
| 367 | 24 | *_tls_sample = (const talp_sample_t) { | |
| 368 | .last_updated_timestamp = last_updated_timestamp, | ||
| 369 | }; | ||
| 370 | |||
| 371 | 24 | talp_set_sample_state(spd, _tls_sample, TALP_STATE_DISABLED); | |
| 372 | |||
| 373 | #ifdef INSTRUMENTATION_VERSION | ||
| 374 | unsigned events[] = {MONITOR_CYCLES, MONITOR_INSTR}; | ||
| 375 | long long hwc_values[] = {0, 0}; | ||
| 376 | instrument_nevent(2, events, hwc_values); | ||
| 377 | #endif | ||
| 378 | |||
| 379 | 24 | return _tls_sample; | |
| 380 | } | ||
| 381 | |||
| 382 | /* WARNING: this function may only be called when updating own thread's sample */ | ||
| 383 | 97 | void talp_set_sample_state(const subprocess_descriptor_t *spd, talp_sample_t *sample, | |
| 384 | talp_sample_state_t new_state) { | ||
| 385 | |||
| 386 | 97 | talp_info_t *talp_info = spd->talp_info; | |
| 387 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 97 times.
|
97 | if (talp_info->flags.have_hwc) { |
| 388 | ✗ | talp_sample_state_t old = sample->state; | |
| 389 | ✗ | talp_hwc_on_state_change(old, new_state); | |
| 390 | } | ||
| 391 | |||
| 392 | 97 | sample->state = new_state; | |
| 393 | |||
| 394 | instrument_event(MONITOR_STATE, | ||
| 395 | new_state == TALP_STATE_DISABLED ? MONITOR_STATE_DISABLED | ||
| 396 | : new_state == TALP_STATE_USEFUL ? MONITOR_STATE_USEFUL | ||
| 397 | : new_state == TALP_STATE_NOT_USEFUL_MPI ? MONITOR_STATE_NOT_USEFUL_MPI | ||
| 398 | : new_state == TALP_STATE_NOT_USEFUL_OMP_IN ? MONITOR_STATE_NOT_USEFUL_OMP_IN | ||
| 399 | : new_state == TALP_STATE_NOT_USEFUL_OMP_OUT ? MONITOR_STATE_NOT_USEFUL_OMP_OUT | ||
| 400 | : new_state == TALP_STATE_NOT_USEFUL_GPU ? MONITOR_STATE_NOT_USEFUL_GPU | ||
| 401 | : 0, | ||
| 402 | EVENT_BEGIN); | ||
| 403 | 97 | } | |
| 404 | |||
| 405 | /* Compute new microsample (time since last update) and update sample values */ | ||
| 406 | 5369 | void talp_update_sample(const subprocess_descriptor_t *spd, talp_sample_t *sample, | |
| 407 | int64_t timestamp) { | ||
| 408 | |||
| 409 | /* Observer threads ignore this function */ | ||
| 410 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5369 times.
|
5369 | if (unlikely(sample == NULL)) return; |
| 411 | |||
| 412 | 5369 | talp_info_t *talp_info = spd->talp_info; | |
| 413 | |||
| 414 | /* Compute duration and set new last_updated_timestamp */ | ||
| 415 |
2/2✓ Branch 0 taken 37 times.
✓ Branch 1 taken 5332 times.
|
5369 | int64_t now = timestamp == TALP_NO_TIMESTAMP ? get_time_in_ns() : timestamp; |
| 416 | 5369 | int64_t microsample_duration = now - sample->last_updated_timestamp; | |
| 417 | 5369 | sample->last_updated_timestamp = now; | |
| 418 | |||
| 419 | /* Update the appropriate sample timer */ | ||
| 420 |
5/7✓ Branch 0 taken 4 times.
✓ Branch 1 taken 5345 times.
✓ Branch 2 taken 7 times.
✓ Branch 3 taken 11 times.
✓ Branch 4 taken 2 times.
✗ Branch 5 not taken.
✗ Branch 6 not taken.
|
5369 | switch(sample->state) { |
| 421 | 4 | case TALP_STATE_DISABLED: | |
| 422 | 4 | break; | |
| 423 | 5345 | case TALP_STATE_USEFUL: | |
| 424 | 5345 | DLB_ATOMIC_ADD_RLX(&sample->timers.useful, microsample_duration); | |
| 425 | 5345 | break; | |
| 426 | 7 | case TALP_STATE_NOT_USEFUL_MPI: | |
| 427 |
1/2✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.
|
7 | if (_is_main_sample_in_serial_mode) { |
| 428 | 7 | int num_cpus = talp_info->ncpus; | |
| 429 | 7 | microsample_duration *= num_cpus; | |
| 430 | } | ||
| 431 | 7 | DLB_ATOMIC_ADD_RLX(&sample->timers.not_useful_mpi, microsample_duration); | |
| 432 | 7 | break; | |
| 433 | 11 | case TALP_STATE_NOT_USEFUL_OMP_IN: | |
| 434 | 11 | DLB_ATOMIC_ADD_RLX(&sample->timers.not_useful_omp_in, microsample_duration); | |
| 435 | 11 | break; | |
| 436 | 2 | case TALP_STATE_NOT_USEFUL_OMP_OUT: | |
| 437 | 2 | DLB_ATOMIC_ADD_RLX(&sample->timers.not_useful_omp_out, microsample_duration); | |
| 438 | 2 | break; | |
| 439 | ✗ | case TALP_STATE_NOT_USEFUL_GPU: | |
| 440 | ✗ | DLB_ATOMIC_ADD_RLX(&sample->timers.not_useful_gpu, microsample_duration); | |
| 441 | ✗ | break; | |
| 442 | } | ||
| 443 | |||
| 444 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5369 times.
|
5369 | if (talp_info->flags.have_hwc) { |
| 445 | /* Only read counters if we are updating this thread's sample */ | ||
| 446 | ✗ | if (is_talp_sample_mine(sample)) { | |
| 447 | hwc_measurements_t measurements; | ||
| 448 | ✗ | if (talp_hwc_collect(&measurements)) { | |
| 449 | /* Atomically add HWC values to sample structure */ | ||
| 450 | ✗ | DLB_ATOMIC_ADD_RLX(&sample->counters.cycles, measurements.cycles); | |
| 451 | ✗ | DLB_ATOMIC_ADD_RLX(&sample->counters.instructions, measurements.instructions); | |
| 452 | } | ||
| 453 | |||
| 454 | #ifdef INSTRUMENTATION_VERSION | ||
| 455 | // It's safe to emit even if talp_hwc_collect returned false, | ||
| 456 | // struct is zero'ed in that case | ||
| 457 | unsigned events[] = {MONITOR_CYCLES, MONITOR_INSTR}; | ||
| 458 | long long hwc_values[] = {measurements.cycles, measurements.instructions}; | ||
| 459 | instrument_nevent(2, events, hwc_values); | ||
| 460 | #endif | ||
| 461 | } | ||
| 462 | } | ||
| 463 | } | ||
| 464 | |||
| 465 | /* Flush and aggregate a single sample into a macrosample */ | ||
| 466 | 5332 | static inline void flush_sample_to_macrosample(talp_sample_t *sample, | |
| 467 | talp_macrosample_t *macrosample) { | ||
| 468 | |||
| 469 | /* Timers */ | ||
| 470 | 5332 | macrosample->timers.useful += | |
| 471 | 5332 | DLB_ATOMIC_EXCH_RLX(&sample->timers.useful, 0); | |
| 472 | 5332 | macrosample->timers.not_useful_mpi += | |
| 473 | 5332 | DLB_ATOMIC_EXCH_RLX(&sample->timers.not_useful_mpi, 0); | |
| 474 | 5332 | macrosample->timers.not_useful_omp_out += | |
| 475 | 5332 | DLB_ATOMIC_EXCH_RLX(&sample->timers.not_useful_omp_out, 0); | |
| 476 | /* timers.not_useful_omp_in is not flushed here, make sure struct is empty */ | ||
| 477 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5332 times.
|
5332 | ensure(DLB_ATOMIC_LD_RLX(&sample->timers.not_useful_omp_in) == 0, |
| 478 | "Inconsistency in TALP sample metric not_useful_omp_in." | ||
| 479 | " Please, report bug at " PACKAGE_BUGREPORT); | ||
| 480 | 5332 | macrosample->timers.not_useful_gpu += | |
| 481 | 5332 | DLB_ATOMIC_EXCH_RLX(&sample->timers.not_useful_gpu, 0); | |
| 482 | |||
| 483 | /* Counters */ | ||
| 484 | 5332 | macrosample->counters.cycles += | |
| 485 | 5332 | DLB_ATOMIC_EXCH_RLX(&sample->counters.cycles, 0); | |
| 486 | 5332 | macrosample->counters.instructions += | |
| 487 | 5332 | DLB_ATOMIC_EXCH_RLX(&sample->counters.instructions, 0); | |
| 488 | |||
| 489 | /* Stats */ | ||
| 490 | 5332 | macrosample->stats.num_mpi_calls += | |
| 491 | 5332 | DLB_ATOMIC_EXCH_RLX(&sample->stats.num_mpi_calls, 0); | |
| 492 | 5332 | macrosample->stats.num_omp_parallels += | |
| 493 | 5332 | DLB_ATOMIC_EXCH_RLX(&sample->stats.num_omp_parallels, 0); | |
| 494 | 5332 | macrosample->stats.num_omp_tasks += | |
| 495 | 5332 | DLB_ATOMIC_EXCH_RLX(&sample->stats.num_omp_tasks, 0); | |
| 496 | 5332 | macrosample->stats.num_gpu_runtime_calls += | |
| 497 | 5332 | DLB_ATOMIC_EXCH_RLX(&sample->stats.num_gpu_runtime_calls, 0); | |
| 498 | 5332 | } | |
| 499 | |||
| 500 | /* Accumulate values from samples of all threads and update regions */ | ||
| 501 | 5347 | int talp_flush_samples_to_regions(const subprocess_descriptor_t *spd) { | |
| 502 | |||
| 503 | /* Observer threads don't have a valid sample so they cannot start/stop regions */ | ||
| 504 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 5346 times.
|
5347 | if (unlikely(thread_is_observer)) return DLB_ERR_PERM; |
| 505 | |||
| 506 | int num_cpus; | ||
| 507 | 5346 | talp_info_t *talp_info = spd->talp_info; | |
| 508 | |||
| 509 | /* Accumulate samples from all threads */ | ||
| 510 | 5346 | talp_macrosample_t macrosample = (const talp_macrosample_t) {}; | |
| 511 | 5346 | pthread_mutex_lock(&talp_info->samples_mutex); | |
| 512 | { | ||
| 513 | 5346 | num_cpus = talp_info->ncpus; | |
| 514 | |||
| 515 | /* Force-update and aggregate all samples */ | ||
| 516 | 5346 | int64_t timestamp = get_time_in_ns(); | |
| 517 |
2/2✓ Branch 0 taken 5329 times.
✓ Branch 1 taken 5346 times.
|
10675 | for (int i = 0; i < num_cpus; ++i) { |
| 518 | 5329 | talp_update_sample(spd, talp_info->samples[i], timestamp); | |
| 519 | 5329 | flush_sample_to_macrosample(talp_info->samples[i], ¯osample); | |
| 520 | } | ||
| 521 | } | ||
| 522 | 5346 | pthread_mutex_unlock(&talp_info->samples_mutex); | |
| 523 | |||
| 524 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5346 times.
|
5346 | if (talp_info->flags.have_gpu) { |
| 525 | /* Collect GPU measuremnts up to this point and update macrosample */ | ||
| 526 | gpu_measurements_t measurements; | ||
| 527 | ✗ | talp_gpu_collect(&measurements); | |
| 528 | ✗ | macrosample.gpu_timers.useful = measurements.useful_time; | |
| 529 | ✗ | macrosample.gpu_timers.communication = measurements.communication_time; | |
| 530 | ✗ | macrosample.gpu_timers.inactive = measurements.inactive_time; | |
| 531 | } | ||
| 532 | |||
| 533 | /* Update all started regions */ | ||
| 534 | 5346 | update_regions_with_macrosample(spd, ¯osample, num_cpus); | |
| 535 | |||
| 536 | 5346 | return DLB_SUCCESS; | |
| 537 | } | ||
| 538 | |||
| 539 | /* Accumulate samples from only a subset of samples of a parallel region. | ||
| 540 | * Load Balance and Scheduling are computed here based on all samples. */ | ||
| 541 | 2 | void talp_flush_sample_subset_to_regions(const subprocess_descriptor_t *spd, | |
| 542 | talp_sample_t **samples, unsigned int nelems) { | ||
| 543 | |||
| 544 | 2 | talp_info_t *talp_info = spd->talp_info; | |
| 545 | 2 | talp_macrosample_t macrosample = (const talp_macrosample_t) {}; | |
| 546 | 2 | pthread_mutex_lock(&talp_info->samples_mutex); | |
| 547 | { | ||
| 548 | /* Iterate first to force-update all samples and compute the minimum | ||
| 549 | * not-useful-omp-in among them */ | ||
| 550 | 2 | int64_t timestamp = get_time_in_ns(); | |
| 551 | 2 | int64_t min_not_useful_omp_in = INT64_MAX; | |
| 552 | unsigned int i; | ||
| 553 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 2 times.
|
5 | for (i=0; i<nelems; ++i) { |
| 554 | 3 | talp_update_sample(spd, samples[i], timestamp); | |
| 555 | 3 | min_not_useful_omp_in = min_int64(min_not_useful_omp_in, | |
| 556 | 3 | DLB_ATOMIC_LD_RLX(&samples[i]->timers.not_useful_omp_in)); | |
| 557 | } | ||
| 558 | |||
| 559 | /* Iterate again to accumulate Load Balance, and to aggregate sample */ | ||
| 560 | 2 | int64_t sched_timer = min_not_useful_omp_in * nelems; | |
| 561 | 2 | int64_t lb_timer = 0; | |
| 562 |
2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 2 times.
|
5 | for (i=0; i<nelems; ++i) { |
| 563 | 3 | lb_timer += DLB_ATOMIC_EXCH_RLX(&samples[i]->timers.not_useful_omp_in, 0) | |
| 564 | 3 | - min_not_useful_omp_in; | |
| 565 | 3 | flush_sample_to_macrosample(samples[i], ¯osample); | |
| 566 | } | ||
| 567 | |||
| 568 | /* Update derived timers into macrosample */ | ||
| 569 | 2 | macrosample.timers.not_useful_omp_in_lb = lb_timer; | |
| 570 | 2 | macrosample.timers.not_useful_omp_in_sched = sched_timer; | |
| 571 | } | ||
| 572 | 2 | pthread_mutex_unlock(&talp_info->samples_mutex); | |
| 573 | |||
| 574 | /* Update all started regions */ | ||
| 575 | 2 | update_regions_with_macrosample(spd, ¯osample, nelems); | |
| 576 | 2 | } | |
| 577 | |||
| 578 | /* Sets the TLS variable _is_main_sample_in_serial_mode. This function is | ||
| 579 | * called by the main thread when beginning or ending parallel region of level 1. | ||
| 580 | * FIXME: free agent threads may break this condition. | ||
| 581 | * | ||
| 582 | * Sets whether the main thread is running in serial mode. */ | ||
| 583 | 4 | void talp_set_main_sample_in_serial_mode(bool serial_mode) { | |
| 584 | |||
| 585 |
1/2✓ Branch 0 taken 4 times.
✗ Branch 1 not taken.
|
4 | if (_is_main_sample) { |
| 586 | 4 | _is_main_sample_in_serial_mode = serial_mode; | |
| 587 | } | ||
| 588 | 4 | } | |
| 589 | |||
| 590 | |||
| 591 | /*********************************************************************************/ | ||
| 592 | /* TALP collect functions for 3rd party programs: */ | ||
| 593 | /* - It's also safe to call it from a 1st party program */ | ||
| 594 | /* - Requires --talp-external-profiler set up in application */ | ||
| 595 | /* - Does not need to synchronize with application */ | ||
| 596 | /*********************************************************************************/ | ||
| 597 | |||
| 598 | /* Function that may be called from a third-party process to compute | ||
| 599 | * node_metrics for a given region */ | ||
| 600 | 6 | int talp_query_pop_node_metrics(const char *name, dlb_node_metrics_t *node_metrics) { | |
| 601 | |||
| 602 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 5 times.
|
6 | if (name == NULL) { |
| 603 | 1 | name = region_get_global_name(); | |
| 604 | } | ||
| 605 | |||
| 606 | 6 | int error = DLB_SUCCESS; | |
| 607 | 6 | int64_t total_mpi_time = 0; | |
| 608 | 6 | int64_t total_useful_time = 0; | |
| 609 | 6 | int64_t max_mpi_time = 0; | |
| 610 | 6 | int64_t max_useful_time = 0; | |
| 611 | |||
| 612 | /* Obtain a list of regions in the node associated with given region */ | ||
| 613 | 6 | int max_procs = mu_get_system_size(); | |
| 614 | 6 | talp_region_list_t *region_list = malloc(max_procs * sizeof(talp_region_list_t)); | |
| 615 | int nelems; | ||
| 616 | 6 | shmem_talp__get_regionlist(region_list, &nelems, max_procs, name); | |
| 617 | |||
| 618 | /* Count how many processes have started the region */ | ||
| 619 | 6 | int processes_per_node = 0; | |
| 620 | |||
| 621 | /* Iterate the PID list and gather times of every process */ | ||
| 622 | int i; | ||
| 623 |
2/2✓ Branch 0 taken 7 times.
✓ Branch 1 taken 6 times.
|
13 | for (i = 0; i <nelems; ++i) { |
| 624 | 7 | int64_t mpi_time = region_list[i].mpi_time; | |
| 625 | 7 | int64_t useful_time = region_list[i].useful_time; | |
| 626 | |||
| 627 | /* Accumulate total and max values */ | ||
| 628 |
3/4✓ Branch 0 taken 4 times.
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 4 times.
✗ Branch 3 not taken.
|
7 | if (mpi_time > 0 || useful_time > 0) { |
| 629 | 7 | ++processes_per_node; | |
| 630 | 7 | total_mpi_time += mpi_time; | |
| 631 | 7 | total_useful_time += useful_time; | |
| 632 | 7 | max_mpi_time = max_int64(mpi_time, max_mpi_time); | |
| 633 | 7 | max_useful_time = max_int64(useful_time, max_useful_time); | |
| 634 | } | ||
| 635 | } | ||
| 636 | 6 | free(region_list); | |
| 637 | |||
| 638 | #if MPI_LIB | ||
| 639 | int node_id = _node_id; | ||
| 640 | #else | ||
| 641 | 6 | int node_id = 0; | |
| 642 | #endif | ||
| 643 | |||
| 644 |
1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.
|
6 | if (processes_per_node > 0) { |
| 645 | /* Compute POP metrics with some inferred values */ | ||
| 646 | perf_metrics_mpi_t metrics; | ||
| 647 | 6 | perf_metrics__infer_mpi_model( | |
| 648 | &metrics, | ||
| 649 | processes_per_node, | ||
| 650 | total_useful_time, | ||
| 651 | total_mpi_time, | ||
| 652 | max_useful_time); | ||
| 653 | |||
| 654 | /* Initialize structure */ | ||
| 655 | 6 | *node_metrics = (const dlb_node_metrics_t) { | |
| 656 | .node_id = node_id, | ||
| 657 | .processes_per_node = processes_per_node, | ||
| 658 | .total_useful_time = total_useful_time, | ||
| 659 | .total_mpi_time = total_mpi_time, | ||
| 660 | .max_useful_time = max_useful_time, | ||
| 661 | .max_mpi_time = max_mpi_time, | ||
| 662 | 6 | .parallel_efficiency = metrics.parallel_efficiency, | |
| 663 | 6 | .communication_efficiency = metrics.communication_efficiency, | |
| 664 | 6 | .load_balance = metrics.load_balance, | |
| 665 | }; | ||
| 666 | 6 | snprintf(node_metrics->name, DLB_MONITOR_NAME_MAX, "%s", name); | |
| 667 | } else { | ||
| 668 | ✗ | error = DLB_ERR_NOENT; | |
| 669 | } | ||
| 670 | |||
| 671 | 6 | return error; | |
| 672 | } | ||
| 673 | |||
| 674 | |||
| 675 | /*********************************************************************************/ | ||
| 676 | /* TALP collect functions for 1st party programs */ | ||
| 677 | /* - Requires synchronization (MPI or node barrier) among all processes */ | ||
| 678 | /*********************************************************************************/ | ||
| 679 | |||
| 680 | /* Compute the current POP metrics for the specified monitor. If monitor is NULL, | ||
| 681 | * the global monitoring region is assumed. | ||
| 682 | * Pre-conditions: | ||
| 683 | * - if MPI, the given monitor must have been registered in all MPI ranks | ||
| 684 | * - pop_metrics is an allocated structure | ||
| 685 | */ | ||
| 686 | 1 | int talp_collect_pop_metrics(const subprocess_descriptor_t *spd, | |
| 687 | dlb_monitor_t *monitor, dlb_pop_metrics_t *pop_metrics) { | ||
| 688 | 1 | talp_info_t *talp_info = spd->talp_info; | |
| 689 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (monitor == NULL) { |
| 690 | ✗ | monitor = talp_info->monitor; | |
| 691 | } | ||
| 692 | |||
| 693 | /* Stop monitor so that metrics are updated */ | ||
| 694 | 1 | bool resume_region = region_stop(spd, monitor) == DLB_SUCCESS; | |
| 695 | |||
| 696 | pop_base_metrics_t base_metrics; | ||
| 697 | #ifdef MPI_LIB | ||
| 698 | /* Reduce monitor among all MPI ranks and everbody collects (all-to-all) */ | ||
| 699 | perf_metrics__reduce_monitor_into_base_metrics(&base_metrics, monitor, true); | ||
| 700 | #else | ||
| 701 | /* Construct base metrics using only the monitor from this process */ | ||
| 702 | 1 | perf_metrics__local_monitor_into_base_metrics(&base_metrics, monitor); | |
| 703 | #endif | ||
| 704 | |||
| 705 | /* Construct output pop_metrics out of base metrics */ | ||
| 706 | 1 | perf_metrics__base_to_pop_metrics(monitor->name, &base_metrics, pop_metrics); | |
| 707 | |||
| 708 | /* Resume monitor */ | ||
| 709 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 1 times.
|
1 | if (resume_region) { |
| 710 | ✗ | region_start(spd, monitor); | |
| 711 | } | ||
| 712 | |||
| 713 | 1 | return DLB_SUCCESS; | |
| 714 | } | ||
| 715 | |||
| 716 | /* Node-collective function to compute node_metrics for a given region */ | ||
| 717 | 5 | int talp_collect_pop_node_metrics(const subprocess_descriptor_t *spd, | |
| 718 | dlb_monitor_t *monitor, dlb_node_metrics_t *node_metrics) { | ||
| 719 | |||
| 720 | 5 | talp_info_t *talp_info = spd->talp_info; | |
| 721 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 3 times.
|
5 | monitor = monitor ? monitor : talp_info->monitor; |
| 722 | 5 | monitor_data_t *monitor_data = monitor->_data; | |
| 723 | |||
| 724 | /* Stop monitor so that metrics are updated */ | ||
| 725 | 5 | bool resume_region = region_stop(spd, monitor) == DLB_SUCCESS; | |
| 726 | |||
| 727 | /* This functionality needs a shared memory, create a temporary one if needed */ | ||
| 728 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
5 | if (!talp_info->flags.have_shmem) { |
| 729 | 5 | shmem_talp__init(spd->options.shm_key, 1); | |
| 730 | 5 | shmem_talp__register(spd->id, monitor->avg_cpus, monitor->name, | |
| 731 | &monitor_data->node_shared_id); | ||
| 732 | } | ||
| 733 | |||
| 734 | /* Update the shared memory with this process' metrics */ | ||
| 735 | 5 | shmem_talp__set_times(monitor_data->node_shared_id, | |
| 736 | monitor->mpi_time, | ||
| 737 | monitor->useful_time); | ||
| 738 | |||
| 739 | /* Perform a node barrier to ensure everyone has updated their metrics */ | ||
| 740 | 5 | node_barrier(spd, NULL); | |
| 741 | |||
| 742 | /* Compute node metrics for that region name */ | ||
| 743 | 5 | talp_query_pop_node_metrics(monitor->name, node_metrics); | |
| 744 | |||
| 745 | /* Remove shared memory if it was a temporary one */ | ||
| 746 |
1/2✓ Branch 0 taken 5 times.
✗ Branch 1 not taken.
|
5 | if (!talp_info->flags.have_shmem) { |
| 747 | 5 | shmem_talp__finalize(spd->id); | |
| 748 | } | ||
| 749 | |||
| 750 | /* Resume monitor */ | ||
| 751 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 4 times.
|
5 | if (resume_region) { |
| 752 | 1 | region_start(spd, monitor); | |
| 753 | } | ||
| 754 | |||
| 755 | 5 | return DLB_SUCCESS; | |
| 756 | } | ||
| 757 |