| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /*********************************************************************************/ | ||
| 2 | /* Copyright 2009-2024 Barcelona Supercomputing Center */ | ||
| 3 | /* */ | ||
| 4 | /* This file is part of the DLB library. */ | ||
| 5 | /* */ | ||
| 6 | /* DLB is free software: you can redistribute it and/or modify */ | ||
| 7 | /* it under the terms of the GNU Lesser General Public License as published by */ | ||
| 8 | /* the Free Software Foundation, either version 3 of the License, or */ | ||
| 9 | /* (at your option) any later version. */ | ||
| 10 | /* */ | ||
| 11 | /* DLB is distributed in the hope that it will be useful, */ | ||
| 12 | /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ | ||
| 13 | /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ | ||
| 14 | /* GNU Lesser General Public License for more details. */ | ||
| 15 | /* */ | ||
| 16 | /* You should have received a copy of the GNU Lesser General Public License */ | ||
| 17 | /* along with DLB. If not, see <https://www.gnu.org/licenses/>. */ | ||
| 18 | /*********************************************************************************/ | ||
| 19 | |||
| 20 | /* Tracking Application Live Performance */ | ||
| 21 | |||
| 22 | #include "apis/dlb_talp.h" | ||
| 23 | |||
| 24 | #include "apis/dlb_errors.h" | ||
| 25 | #include "LB_core/spd.h" | ||
| 26 | #include "LB_core/DLB_kernel.h" | ||
| 27 | #include "LB_comm/shmem_cpuinfo.h" | ||
| 28 | #include "LB_comm/shmem_procinfo.h" | ||
| 29 | #include "LB_comm/shmem_talp.h" | ||
| 30 | #include "support/dlb_common.h" | ||
| 31 | #include "support/mask_utils.h" | ||
| 32 | #include "support/mytime.h" | ||
| 33 | #include "talp/regions.h" | ||
| 34 | #include "talp/talp.h" | ||
| 35 | |||
| 36 | |||
| 37 | /*********************************************************************************/ | ||
| 38 | /* TALP */ | ||
| 39 | /*********************************************************************************/ | ||
| 40 | |||
| 41 | DLB_EXPORT_SYMBOL | ||
| 42 | 9 | int DLB_TALP_Attach(void) { | |
| 43 | int lewi_color; | ||
| 44 | char shm_key[MAX_OPTION_LENGTH]; | ||
| 45 | char *shm_key_ptr; | ||
| 46 | int shm_size_multiplier; | ||
| 47 | 9 | spd_enter_dlb(thread_spd); | |
| 48 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 3 times.
|
9 | if (!thread_spd->dlb_initialized) { |
| 49 | 6 | set_observer_role(true); | |
| 50 | 6 | options_parse_entry("--lewi-color", &lewi_color); | |
| 51 | 6 | options_parse_entry("--shm-key", shm_key); | |
| 52 | 6 | options_parse_entry("--shm-size-multiplier", &shm_size_multiplier); | |
| 53 | 6 | shm_key_ptr = shm_key; | |
| 54 | } else { | ||
| 55 | 3 | lewi_color = thread_spd->options.lewi_color; | |
| 56 | 3 | shm_key_ptr = thread_spd->options.shm_key; | |
| 57 | 3 | shm_size_multiplier = thread_spd->options.shm_size_multiplier; | |
| 58 | } | ||
| 59 | 9 | shmem_cpuinfo_ext__init(shm_key_ptr, lewi_color); | |
| 60 | 9 | shmem_procinfo_ext__init(shm_key_ptr, shm_size_multiplier); | |
| 61 | 9 | shmem_talp_ext__init(shm_key_ptr, shm_size_multiplier); | |
| 62 | 9 | return DLB_SUCCESS; | |
| 63 | } | ||
| 64 | |||
| 65 | DLB_EXPORT_SYMBOL | ||
| 66 | 9 | int DLB_TALP_Detach(void) { | |
| 67 | 9 | int error = shmem_cpuinfo_ext__finalize(); | |
| 68 |
1/2✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
|
9 | error = error ? error : shmem_procinfo_ext__finalize(); |
| 69 |
1/2✓ Branch 0 taken 9 times.
✗ Branch 1 not taken.
|
9 | error = error ? error : shmem_talp_ext__finalize(); |
| 70 | 9 | return error; | |
| 71 | } | ||
| 72 | |||
| 73 | DLB_EXPORT_SYMBOL | ||
| 74 | 1 | int DLB_TALP_GetNumCPUs(int *ncpus) { | |
| 75 | 1 | *ncpus = mu_get_system_size(); | |
| 76 | 1 | return DLB_SUCCESS; | |
| 77 | } | ||
| 78 | |||
| 79 | DLB_EXPORT_SYMBOL | ||
| 80 | 5 | int DLB_TALP_GetPidList(int *pidlist, int *nelems, int max_len) { | |
| 81 | 5 | return shmem_procinfo__getpidlist(pidlist, nelems, max_len); | |
| 82 | } | ||
| 83 | |||
| 84 | DLB_EXPORT_SYMBOL | ||
| 85 | 5 | int DLB_TALP_GetTimes(int pid, double *mpi_time, double *useful_time) { | |
| 86 | |||
| 87 | int error; | ||
| 88 | |||
| 89 |
4/6✓ Branch 0 taken 3 times.
✓ Branch 1 taken 2 times.
✓ Branch 2 taken 3 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 3 times.
|
7 | if (pid == 0 || (thread_spd && thread_spd->id == pid)) { |
| 90 | /* Same process */ | ||
| 91 | 2 | const dlb_monitor_t *monitor = region_get_global(thread_spd); | |
| 92 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
|
2 | if (monitor != NULL) { |
| 93 | 1 | *mpi_time = nsecs_to_secs(monitor->mpi_time); | |
| 94 | 1 | *useful_time = nsecs_to_secs(monitor->useful_time); | |
| 95 | 1 | error = DLB_SUCCESS; | |
| 96 | } else { | ||
| 97 | 1 | error = DLB_ERR_NOTALP; | |
| 98 | } | ||
| 99 | } else { | ||
| 100 | /* Different process, fetch from shared memory */ | ||
| 101 | talp_region_list_t region; | ||
| 102 | 3 | error = shmem_talp__get_region(®ion, pid, region_get_global_name()); | |
| 103 | |||
| 104 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | if (error == DLB_SUCCESS) { |
| 105 | 2 | *mpi_time = nsecs_to_secs(region.mpi_time); | |
| 106 | 2 | *useful_time = nsecs_to_secs(region.useful_time); | |
| 107 | } | ||
| 108 | } | ||
| 109 | |||
| 110 | 5 | return error; | |
| 111 | } | ||
| 112 | |||
| 113 | DLB_EXPORT_SYMBOL | ||
| 114 | 4 | int DLB_TALP_GetNodeTimes(const char *name, dlb_node_times_t *node_times_list, | |
| 115 | int *nelems, int max_len) { | ||
| 116 | |||
| 117 | int error; | ||
| 118 | |||
| 119 |
2/2✓ Branch 1 taken 3 times.
✓ Branch 2 taken 1 times.
|
4 | if (shmem_talp__initialized()) { |
| 120 | /* Only if a worker process started with --talp-external-profiler */ | ||
| 121 | 3 | int shmem_max_regions = shmem_talp__get_max_regions(); | |
| 122 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 2 times.
|
3 | if (max_len > shmem_max_regions) { |
| 123 | 1 | max_len = shmem_max_regions; | |
| 124 | } | ||
| 125 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | if (name == DLB_GLOBAL_REGION) { |
| 126 | 3 | name = region_get_global_name(); | |
| 127 | } | ||
| 128 | 3 | talp_region_list_t *region_list = malloc(sizeof(talp_region_list_t)*max_len); | |
| 129 | 3 | error = shmem_talp__get_regionlist(region_list, nelems, max_len, name); | |
| 130 |
1/2✓ Branch 0 taken 3 times.
✗ Branch 1 not taken.
|
3 | if (error == DLB_SUCCESS) { |
| 131 |
2/2✓ Branch 0 taken 5 times.
✓ Branch 1 taken 3 times.
|
8 | for (int i=0; i<*nelems; ++i) { |
| 132 | 5 | node_times_list[i] = (const dlb_node_times_t) { | |
| 133 | 5 | .pid = region_list[i].pid, | |
| 134 | 5 | .mpi_time = region_list[i].mpi_time, | |
| 135 | 5 | .useful_time = region_list[i].useful_time, | |
| 136 | }; | ||
| 137 | } | ||
| 138 | } | ||
| 139 | 3 | free(region_list); | |
| 140 | } else { | ||
| 141 | /* shmem does not exist */ | ||
| 142 | 1 | error = DLB_ERR_NOSHMEM; | |
| 143 | } | ||
| 144 | |||
| 145 | 4 | return error; | |
| 146 | } | ||
| 147 | |||
| 148 | DLB_EXPORT_SYMBOL | ||
| 149 | 2 | int DLB_TALP_QueryPOPNodeMetrics(const char *name, dlb_node_metrics_t *node_metrics) { | |
| 150 |
2/2✓ Branch 1 taken 1 times.
✓ Branch 2 taken 1 times.
|
2 | if (shmem_talp__initialized()) { |
| 151 | /* Only if a worker process started with --talp-external-profiler */ | ||
| 152 | 1 | return talp_query_pop_node_metrics(name, node_metrics); | |
| 153 | } else { | ||
| 154 | 1 | return DLB_ERR_NOSHMEM; | |
| 155 | } | ||
| 156 | } | ||
| 157 | |||
| 158 | |||
| 159 | /*********************************************************************************/ | ||
| 160 | /* TALP Monitoring Regions */ | ||
| 161 | /*********************************************************************************/ | ||
| 162 | |||
| 163 | DLB_EXPORT_SYMBOL | ||
| 164 | 4 | dlb_monitor_t* DLB_MonitoringRegionGetGlobal(void) { | |
| 165 | 4 | spd_enter_dlb(thread_spd); | |
| 166 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 3 times.
|
4 | if (unlikely(!thread_spd->talp_info)) { |
| 167 | 1 | return NULL; | |
| 168 | } | ||
| 169 | 3 | return region_get_global(thread_spd); | |
| 170 | } | ||
| 171 | |||
| 172 | DLB_EXPORT_SYMBOL | ||
| 173 | DLB_ALIAS(dlb_monitor_t*, DLB_MonitoringRegionGetImplicit, (void), (), \ | ||
| 174 | DLB_MonitoringRegionGetGlobal) | ||
| 175 | |||
| 176 | DLB_EXPORT_SYMBOL | ||
| 177 | DLB_ALIAS(const dlb_monitor_t*, DLB_MonitoringRegionGetMPIRegion, (void), (), \ | ||
| 178 | DLB_MonitoringRegionGetGlobal) | ||
| 179 | |||
| 180 | DLB_EXPORT_SYMBOL | ||
| 181 | 12 | dlb_monitor_t* DLB_MonitoringRegionRegister(const char *name){ | |
| 182 | 12 | spd_enter_dlb(thread_spd); | |
| 183 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 10 times.
|
12 | if (unlikely(!thread_spd->talp_info)) { |
| 184 | 2 | return NULL; | |
| 185 | } | ||
| 186 | 10 | return region_register(thread_spd, name); | |
| 187 | } | ||
| 188 | |||
| 189 | DLB_EXPORT_SYMBOL | ||
| 190 | 3 | int DLB_MonitoringRegionReset(dlb_monitor_t *handle){ | |
| 191 | 3 | spd_enter_dlb(thread_spd); | |
| 192 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 1 times.
|
3 | if (unlikely(!thread_spd->talp_info)) { |
| 193 | 2 | return DLB_ERR_NOTALP; | |
| 194 | } | ||
| 195 | 1 | return region_reset(thread_spd, handle); | |
| 196 | } | ||
| 197 | |||
| 198 | DLB_EXPORT_SYMBOL | ||
| 199 | 311 | int DLB_MonitoringRegionStart(dlb_monitor_t *handle){ | |
| 200 | 311 | spd_enter_dlb(thread_spd); | |
| 201 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 309 times.
|
311 | if (unlikely(!thread_spd->talp_info)) { |
| 202 | 2 | return DLB_ERR_NOTALP; | |
| 203 | } | ||
| 204 | 309 | return region_start(thread_spd, handle); | |
| 205 | } | ||
| 206 | |||
| 207 | DLB_EXPORT_SYMBOL | ||
| 208 | 411 | int DLB_MonitoringRegionStop(dlb_monitor_t *handle){ | |
| 209 | 411 | spd_enter_dlb(thread_spd); | |
| 210 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 409 times.
|
411 | if (unlikely(!thread_spd->talp_info)) { |
| 211 | 2 | return DLB_ERR_NOTALP; | |
| 212 | } | ||
| 213 | 409 | return region_stop(thread_spd, handle); | |
| 214 | } | ||
| 215 | |||
| 216 | DLB_EXPORT_SYMBOL | ||
| 217 | 6 | int DLB_MonitoringRegionReport(const dlb_monitor_t *handle){ | |
| 218 | 6 | spd_enter_dlb(thread_spd); | |
| 219 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 4 times.
|
6 | if (unlikely(!thread_spd->talp_info)) { |
| 220 | 2 | return DLB_ERR_NOTALP; | |
| 221 | } | ||
| 222 | 4 | return region_report(thread_spd, handle); | |
| 223 | } | ||
| 224 | |||
| 225 | DLB_EXPORT_SYMBOL | ||
| 226 | 1 | int DLB_MonitoringRegionsUpdate(void) { | |
| 227 | 1 | spd_enter_dlb(thread_spd); | |
| 228 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (unlikely(!thread_spd->talp_info)) { |
| 229 | 1 | return DLB_ERR_NOTALP; | |
| 230 | } | ||
| 231 | ✗ | return talp_flush_samples_to_regions(thread_spd); | |
| 232 | } | ||
| 233 | |||
| 234 | DLB_EXPORT_SYMBOL | ||
| 235 | 1 | int DLB_TALP_CollectPOPMetrics(dlb_monitor_t *monitor, dlb_pop_metrics_t *pop_metrics) { | |
| 236 | 1 | spd_enter_dlb(thread_spd); | |
| 237 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (unlikely(!thread_spd->talp_info)) { |
| 238 | 1 | return DLB_ERR_NOTALP; | |
| 239 | } | ||
| 240 | ✗ | return talp_collect_pop_metrics(thread_spd, monitor, pop_metrics); | |
| 241 | } | ||
| 242 | |||
| 243 | DLB_EXPORT_SYMBOL | ||
| 244 | 1 | int DLB_TALP_CollectPOPNodeMetrics(dlb_monitor_t *monitor, dlb_node_metrics_t *node_metrics) { | |
| 245 | 1 | spd_enter_dlb(thread_spd); | |
| 246 |
1/2✓ Branch 0 taken 1 times.
✗ Branch 1 not taken.
|
1 | if (unlikely(!thread_spd->talp_info)) { |
| 247 | 1 | return DLB_ERR_NOTALP; | |
| 248 | } | ||
| 249 | ✗ | if (unlikely(!thread_spd->options.barrier)) { | |
| 250 | ✗ | return DLB_ERR_NOCOMP; | |
| 251 | } | ||
| 252 | ✗ | return talp_collect_pop_node_metrics(thread_spd, monitor, node_metrics); | |
| 253 | } | ||
| 254 | |||
| 255 | DLB_EXPORT_SYMBOL | ||
| 256 | DLB_ALIAS(int, DLB_TALP_CollectNodeMetrics, \ | ||
| 257 | (dlb_monitor_t *monitor, dlb_node_metrics_t *node_metrics), \ | ||
| 258 | (monitor, node_metrics), \ | ||
| 259 | DLB_TALP_CollectPOPNodeMetrics) | ||
| 260 |