| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /*********************************************************************************/ | ||
| 2 | /* Copyright 2009-2026 Barcelona Supercomputing Center */ | ||
| 3 | /* */ | ||
| 4 | /* This file is part of the DLB library. */ | ||
| 5 | /* */ | ||
| 6 | /* DLB is free software: you can redistribute it and/or modify */ | ||
| 7 | /* it under the terms of the GNU Lesser General Public License as published by */ | ||
| 8 | /* the Free Software Foundation, either version 3 of the License, or */ | ||
| 9 | /* (at your option) any later version. */ | ||
| 10 | /* */ | ||
| 11 | /* DLB is distributed in the hope that it will be useful, */ | ||
| 12 | /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ | ||
| 13 | /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ | ||
| 14 | /* GNU Lesser General Public License for more details. */ | ||
| 15 | /* */ | ||
| 16 | /* You should have received a copy of the GNU Lesser General Public License */ | ||
| 17 | /* along with DLB. If not, see <https://www.gnu.org/licenses/>. */ | ||
| 18 | /*********************************************************************************/ | ||
| 19 | |||
| 20 | #include "talp/backend_manager.h" | ||
| 21 | |||
| 22 | #include "apis/dlb_errors.h" | ||
| 23 | #include "support/debug.h" | ||
| 24 | #include "talp/backend.h" | ||
| 25 | #include "talp/talp_gpu.h" | ||
| 26 | #include "talp/talp_hwc.h" | ||
| 27 | |||
| 28 | #include <dirent.h> | ||
| 29 | #include <dlfcn.h> | ||
| 30 | #include <libgen.h> | ||
| 31 | #include <limits.h> | ||
| 32 | #include <stdlib.h> | ||
| 33 | #include <string.h> | ||
| 34 | |||
| 35 | |||
| 36 | // Internal Core interface that plugins may use | ||
| 37 | const core_api_t core_api = { | ||
| 38 | .abi_version = DLB_BACKEND_ABI_VERSION, | ||
| 39 | .struct_size = sizeof(core_api_t), | ||
| 40 | .gpu = { | ||
| 41 | .enter_runtime = talp_gpu_enter_runtime, | ||
| 42 | .exit_runtime = talp_gpu_exit_runtime, | ||
| 43 | .submit_measurements = talp_gpu_submit, | ||
| 44 | }, | ||
| 45 | .hwc = { | ||
| 46 | .submit_measurements = talp_hwc_submit, | ||
| 47 | }, | ||
| 48 | }; | ||
| 49 | |||
| 50 | // Loaded plugins. We only accept one plugin per capability | ||
| 51 | typedef struct plugin_t { | ||
| 52 | const backend_api_t *api; | ||
| 53 | void *handle; | ||
| 54 | } plugin_t; | ||
| 55 | |||
| 56 | |||
| 57 | /* --- GPU plguins ------------------------------------------------------------ */ | ||
| 58 | |||
| 59 | // List of possible GPU backend plugin names | ||
| 60 | static const char *gpu_plugins[] = { | ||
| 61 | "cupti", | ||
| 62 | "rocprofiler-sdk", | ||
| 63 | "rocprofilerv2", | ||
| 64 | }; | ||
| 65 | |||
| 66 | enum { num_gpu_plugins = sizeof(gpu_plugins) / sizeof(gpu_plugins[0]) }; | ||
| 67 | |||
| 68 | static plugin_t loaded_gpu_plugin = {}; | ||
| 69 | |||
| 70 | |||
| 71 | /* --- HWC plguins ------------------------------------------------------------ */ | ||
| 72 | |||
| 73 | // List of possible HWC backend plugin names | ||
| 74 | static const char *hwc_plugins[] = { | ||
| 75 | "papi", | ||
| 76 | }; | ||
| 77 | |||
| 78 | enum { num_hwc_plugins = sizeof(hwc_plugins) / sizeof(hwc_plugins[0]) }; | ||
| 79 | |||
| 80 | static plugin_t loaded_hwc_plugin = {}; | ||
| 81 | |||
| 82 | |||
| 83 | /* ----------------------------------------------------------------------------- */ | ||
| 84 | |||
| 85 | /* Obtain full path to libdlb.so (or whatever variant is loaded) */ | ||
| 86 | 10 | static const char* get_dlb_lib_path(void) { | |
| 87 | |||
| 88 | static char dlb_lib_path[PATH_MAX] = ""; | ||
| 89 | |||
| 90 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
10 | if (dlb_lib_path[0] != '\0') return dlb_lib_path; |
| 91 | |||
| 92 | // Get information of the dlb shared object currently loaded | ||
| 93 | Dl_info info; | ||
| 94 |
1/2✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
|
10 | if (dladdr((void*)__func__, &info)) { |
| 95 | 10 | snprintf(dlb_lib_path, sizeof(dlb_lib_path), "%s", dirname((char*)info.dli_fname)); | |
| 96 | } else { | ||
| 97 | ✗ | debug_warning("dladdr failed to obtain information"); | |
| 98 | ✗ | return NULL; | |
| 99 | } | ||
| 100 | |||
| 101 | 10 | return dlb_lib_path; | |
| 102 | } | ||
| 103 | |||
| 104 | /* Obtain DLB_LIB_DIR environment variable */ | ||
| 105 | 68 | static const char* get_dlb_plugin_path(void) { | |
| 106 | |||
| 107 | static char dlb_plugin_path[PATH_MAX] = ""; | ||
| 108 | |||
| 109 |
2/2✓ Branch 0 taken 58 times.
✓ Branch 1 taken 10 times.
|
68 | if (dlb_plugin_path[0] != '\0') return dlb_plugin_path; |
| 110 | |||
| 111 | // Try env var first | ||
| 112 | 10 | const char *dlb_plugin_path_env = getenv("DLB_LIB_DIR"); | |
| 113 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 10 times.
|
10 | if (dlb_plugin_path_env != NULL) { |
| 114 | ✗ | DIR* dir = opendir(dlb_plugin_path_env); | |
| 115 | ✗ | if (dir) { | |
| 116 | ✗ | snprintf(dlb_plugin_path, sizeof(dlb_plugin_path), "%s", dlb_plugin_path_env); | |
| 117 | ✗ | closedir(dir); | |
| 118 | } else { | ||
| 119 | ✗ | dlb_plugin_path_env = NULL; | |
| 120 | } | ||
| 121 | } | ||
| 122 | |||
| 123 | // Try same directoy as dlb library | ||
| 124 |
1/2✓ Branch 0 taken 10 times.
✗ Branch 1 not taken.
|
10 | if (dlb_plugin_path_env == NULL) { |
| 125 | 10 | snprintf(dlb_plugin_path, sizeof(dlb_plugin_path), "%s", get_dlb_lib_path()); | |
| 126 | } | ||
| 127 | |||
| 128 | 10 | return dlb_plugin_path; | |
| 129 | } | ||
| 130 | |||
| 131 | /* Load exact plugin name, return error otherwise */ | ||
| 132 | 68 | static int talp_backend_manager_load(const char* plugin_name) { | |
| 133 | |||
| 134 | 68 | const char *dlb_plugin_path = get_dlb_plugin_path(); | |
| 135 |
2/4✓ Branch 0 taken 68 times.
✗ Branch 1 not taken.
✗ Branch 2 not taken.
✓ Branch 3 taken 68 times.
|
68 | if (dlb_plugin_path == NULL || dlb_plugin_path[0] == '\0') { |
| 136 | ✗ | return DLB_ERR_UNKNOWN; | |
| 137 | } | ||
| 138 | |||
| 139 | #ifndef DLB_TEST_LIB | ||
| 140 | 68 | const char *plugin_suffix = ""; | |
| 141 | 68 | int dlopen_flag = RTLD_LAZY; | |
| 142 | #else | ||
| 143 | /* stub libraries contain symbols that will be called during testing, | ||
| 144 | * so we'll load the plugin and all symbols from its linked librarties. */ | ||
| 145 | const char *plugin_suffix = "_test"; | ||
| 146 | int dlopen_flag = RTLD_NOW | RTLD_GLOBAL; | ||
| 147 | #endif | ||
| 148 | |||
| 149 | // Load the plugin | ||
| 150 | char plugin_path[PATH_MAX]; | ||
| 151 | 68 | int not_truncated_len = snprintf(plugin_path, PATH_MAX, "%s/libdlb_%s%s.so", | |
| 152 | dlb_plugin_path, plugin_name, plugin_suffix); | ||
| 153 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 68 times.
|
68 | if (not_truncated_len >= PATH_MAX) { |
| 154 | // really unlikely, but avoids GCC's format-truncation warning | ||
| 155 | ✗ | return DLB_ERR_UNKNOWN; | |
| 156 | } | ||
| 157 | 68 | void *handle = dlopen(plugin_path, dlopen_flag); | |
| 158 |
1/2✓ Branch 0 taken 68 times.
✗ Branch 1 not taken.
|
68 | if (handle == NULL) { |
| 159 | 68 | debug_warning("Failed to load plugin %s: %s", plugin_name, dlerror()); | |
| 160 | 68 | return DLB_ERR_UNKNOWN; | |
| 161 | } | ||
| 162 | |||
| 163 | // Load the plugin public function | ||
| 164 | typedef backend_api_t* (*backend_get_api_func_t)(void); | ||
| 165 | ✗ | backend_get_api_func_t get_api = (backend_get_api_func_t)dlsym(handle, "DLB_Get_Backend_API"); | |
| 166 | ✗ | if (get_api == NULL) { | |
| 167 | ✗ | debug_warning("Failed to find function: %s", dlerror()); | |
| 168 | ✗ | goto close_handle_and_error; | |
| 169 | } | ||
| 170 | |||
| 171 | // Check that DLB_Get_Backend_API returns a valid API | ||
| 172 | ✗ | const backend_api_t *backend_api = get_api(); | |
| 173 | ✗ | if (backend_api == NULL) { | |
| 174 | ✗ | debug_warning("DLB_Get_Backend_API returned NULL in plugin %s", plugin_name); | |
| 175 | ✗ | goto close_handle_and_error; | |
| 176 | } | ||
| 177 | |||
| 178 | // Check ABI version | ||
| 179 | ✗ | if (backend_api->abi_version != DLB_BACKEND_ABI_VERSION | |
| 180 | ✗ | || backend_api->struct_size != sizeof(backend_api_t)) { | |
| 181 | ✗ | debug_warning("ABI compatibility check failed in plugin %s", plugin_name); | |
| 182 | ✗ | goto close_handle_and_error; | |
| 183 | } | ||
| 184 | |||
| 185 | // Check that plugin return its own name | ||
| 186 | ✗ | if (strcmp(backend_api->name, plugin_name) != 0) { | |
| 187 | ✗ | debug_warning("Plugin %s did not return an equivalent plugin name during initialization." | |
| 188 | " Deactivating...", plugin_name); | ||
| 189 | ✗ | goto close_handle_and_error; | |
| 190 | } | ||
| 191 | |||
| 192 | // Check plugin capabilities | ||
| 193 | ✗ | if (backend_api->capabilities.gpu) { | |
| 194 | ✗ | loaded_gpu_plugin.api = backend_api; | |
| 195 | ✗ | loaded_gpu_plugin.handle = handle; | |
| 196 | ✗ | } else if (backend_api->capabilities.hwc) { | |
| 197 | ✗ | loaded_hwc_plugin.api = backend_api; | |
| 198 | ✗ | loaded_hwc_plugin.handle = handle; | |
| 199 | } else { | ||
| 200 | ✗ | debug_warning("Unkown capabilities for plugin %s", plugin_name); | |
| 201 | ✗ | goto close_handle_and_error; | |
| 202 | } | ||
| 203 | |||
| 204 | ✗ | return DLB_SUCCESS; | |
| 205 | |||
| 206 | ✗ | close_handle_and_error: | |
| 207 | ✗ | dlclose(handle); | |
| 208 | ✗ | return DLB_ERR_UNKNOWN; | |
| 209 | } | ||
| 210 | |||
| 211 | 15 | const backend_api_t* talp_backend_manager_load_gpu_backend(const char *name) { | |
| 212 | |||
| 213 |
1/2✓ Branch 0 taken 15 times.
✗ Branch 1 not taken.
|
15 | if (loaded_gpu_plugin.handle == NULL) { |
| 214 |
2/4✓ Branch 0 taken 15 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 15 times.
✗ Branch 3 not taken.
|
15 | if (name == NULL || name[0] == '\0') { |
| 215 | // Try all possible GPU plugins until one succeeds | ||
| 216 |
2/2✓ Branch 0 taken 45 times.
✓ Branch 1 taken 15 times.
|
60 | for (size_t i = 0; i < num_gpu_plugins; ++i) { |
| 217 | 45 | const char *p = gpu_plugins[i]; | |
| 218 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 45 times.
|
45 | if (talp_backend_manager_load(p) == DLB_SUCCESS) { |
| 219 | ✗ | break; | |
| 220 | } | ||
| 221 | } | ||
| 222 | } else { | ||
| 223 | ✗ | talp_backend_manager_load(name); | |
| 224 | } | ||
| 225 | } | ||
| 226 | |||
| 227 | // Note that it may be NULL if no GPU plugin could be loaded | ||
| 228 | 15 | return loaded_gpu_plugin.api; | |
| 229 | } | ||
| 230 | |||
| 231 | ✗ | void talp_backend_manager_unload_gpu_backend(void) { | |
| 232 | |||
| 233 | ✗ | if (loaded_gpu_plugin.handle != NULL) { | |
| 234 | ✗ | dlclose(loaded_gpu_plugin.handle); | |
| 235 | ✗ | loaded_gpu_plugin = (const plugin_t){}; | |
| 236 | } | ||
| 237 | } | ||
| 238 | |||
| 239 | 23 | const backend_api_t* talp_backend_manager_load_hwc_backend(const char *name) { | |
| 240 | |||
| 241 |
1/2✓ Branch 0 taken 23 times.
✗ Branch 1 not taken.
|
23 | if (loaded_hwc_plugin.handle == NULL) { |
| 242 |
1/4✗ Branch 0 not taken.
✓ Branch 1 taken 23 times.
✗ Branch 2 not taken.
✗ Branch 3 not taken.
|
23 | if (name == NULL || name[0] == '\0') { |
| 243 | // Try all possible HWC plugins until one succeeds | ||
| 244 |
2/2✓ Branch 0 taken 23 times.
✓ Branch 1 taken 23 times.
|
46 | for (size_t i = 0; i < num_hwc_plugins; ++i) { |
| 245 | 23 | const char *p = hwc_plugins[i]; | |
| 246 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 23 times.
|
23 | if (talp_backend_manager_load(p) == DLB_SUCCESS) { |
| 247 | ✗ | break; | |
| 248 | } | ||
| 249 | } | ||
| 250 | } else { | ||
| 251 | ✗ | talp_backend_manager_load(name); | |
| 252 | } | ||
| 253 | } | ||
| 254 | |||
| 255 | // Note that it may be NULL if no HWC plugin could be loaded | ||
| 256 | 23 | return loaded_hwc_plugin.api; | |
| 257 | } | ||
| 258 | |||
| 259 | ✗ | void talp_backend_manager_unload_hwc_backend(void) { | |
| 260 | |||
| 261 | ✗ | if (loaded_hwc_plugin.handle != NULL) { | |
| 262 | ✗ | dlclose(loaded_hwc_plugin.handle); | |
| 263 | ✗ | loaded_hwc_plugin = (const plugin_t){}; | |
| 264 | } | ||
| 265 | } | ||
| 266 | |||
| 267 | ✗ | void* talp_backend_manager_get_symbol_from_plugin(const char *symbol, const char *plugin_name) { | |
| 268 | |||
| 269 | ✗ | if (loaded_gpu_plugin.handle != NULL | |
| 270 | ✗ | && strcmp(loaded_gpu_plugin.api->name, plugin_name) == 0) { | |
| 271 | ✗ | return dlsym(loaded_gpu_plugin.handle, symbol); | |
| 272 | } | ||
| 273 | |||
| 274 | ✗ | if (loaded_hwc_plugin.handle != NULL | |
| 275 | ✗ | && strcmp(loaded_hwc_plugin.api->name, plugin_name) == 0) { | |
| 276 | ✗ | return dlsym(loaded_hwc_plugin.handle, symbol); | |
| 277 | } | ||
| 278 | |||
| 279 | ✗ | return NULL; | |
| 280 | } | ||
| 281 | |||
| 282 | ✗ | int talp_backend_manager_get_gpu_affinity(char *buffer, size_t buffer_size, bool full_uuid) { | |
| 283 | |||
| 284 | // This function is only called from `dlb` utilility, so we don't expect | ||
| 285 | // having the plugin already loaded, but we can support it just in case | ||
| 286 | ✗ | bool plugin_needs_loading = loaded_gpu_plugin.handle == NULL; | |
| 287 | |||
| 288 | ✗ | if (plugin_needs_loading) { | |
| 289 | ✗ | talp_backend_manager_load_gpu_backend(NULL); | |
| 290 | } | ||
| 291 | |||
| 292 | ✗ | int error = DLB_ERR_UNKNOWN; | |
| 293 | ✗ | if (loaded_gpu_plugin.handle != NULL | |
| 294 | ✗ | && loaded_gpu_plugin.api->get_gpu_affinity) { | |
| 295 | ✗ | int backend_error = loaded_gpu_plugin.api->get_gpu_affinity(buffer, buffer_size, full_uuid); | |
| 296 | ✗ | if (backend_error == DLB_BACKEND_SUCCESS) { | |
| 297 | ✗ | error = DLB_SUCCESS; | |
| 298 | } | ||
| 299 | } | ||
| 300 | |||
| 301 | ✗ | if (plugin_needs_loading) { | |
| 302 | ✗ | talp_backend_manager_unload_gpu_backend(); | |
| 303 | } | ||
| 304 | |||
| 305 | ✗ | return error; | |
| 306 | } | ||
| 307 |