Dynamic Load Balance 3.6.1+32-59d1
dlbf_talp.h
Go to the documentation of this file.
1!-------------------------------------------------------------------------------!
2! Copyright 2009-2025 Barcelona Supercomputing Center !
3! !
4! This file is part of the DLB library. !
5! !
6! DLB is free software: you can redistribute it and/or modify !
7! it under the terms of the GNU Lesser General Public License as published by !
8! the Free Software Foundation, either version 3 of the License, or !
9! (at your option) any later version. !
10! !
11! DLB is distributed in the hope that it will be useful, !
12! but WITHOUT ANY WARRANTY; without even the implied warranty of !
13! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the !
14! GNU Lesser General Public License for more details. !
15! !
16! You should have received a copy of the GNU Lesser General Public License !
17! along with DLB. If not, see <https://www.gnu.org/licenses/>. !
18!-------------------------------------------------------------------------------!
19
20 character(len=*), parameter :: DLB_GLOBAL_REGION_NAME = "Global"
21 type(c_ptr), parameter :: DLB_GLOBAL_REGION = c_null_ptr
22 type(c_ptr), parameter :: DLB_MPI_REGION = c_null_ptr !! deprecated
23 type(c_ptr), parameter :: DLB_IMPLICIT_REGION = c_null_ptr !! deprecated
24 integer(c_intptr_t), parameter :: DLB_GLOBAL_REGION_INT = 0
25 integer(c_intptr_t), parameter :: DLB_LAST_OPEN_REGION_INT = 1
27
28 type, bind(c) :: dlb_monitor_t
29 type(c_ptr) :: name_
30 integer(kind=c_int) :: num_cpus
31 real(kind=c_float) :: avg_cpus
32 integer(kind=c_int64_t) :: cycles
33 integer(kind=c_int64_t) :: instructions
34 integer(kind=c_int) :: num_measurements
35 integer(kind=c_int) :: num_resets
36 integer(kind=c_int64_t) :: num_mpi_calls
37 integer(kind=c_int64_t) :: num_omp_parallels
38 integer(kind=c_int64_t) :: num_omp_tasks
39 integer(kind=c_int64_t) :: num_gpu_runtime_calls
40 integer(kind=c_int64_t) :: start_time
41 integer(kind=c_int64_t) :: stop_time
42 integer(kind=c_int64_t) :: elapsed_time
43 integer(kind=c_int64_t) :: useful_time
44 integer(kind=c_int64_t) :: mpi_time
45 integer(kind=c_int64_t) :: omp_load_imbalance_time
46 integer(kind=c_int64_t) :: omp_scheduling_time
47 integer(kind=c_int64_t) :: omp_serialization_time
48 integer(kind=c_int64_t) :: gpu_runtime_time
49 integer(kind=c_int64_t) :: gpu_useful_time
50 integer(kind=c_int64_t) :: gpu_communication_time
51 integer(kind=c_int64_t) :: gpu_inactive_time
52 type(c_ptr) :: data_
53 end type
54
55 type, bind(c) :: dlb_pop_metrics_t
56 character(kind=c_char, len=1) :: name(DLB_MONITOR_NAME_MAX)
57 integer(kind=c_int) :: num_cpus
58 integer(kind=c_int) :: num_mpi_ranks
59 integer(kind=c_int) :: num_nodes
60 real(kind=c_float) :: avg_cpus
61 integer(kind=c_int) :: num_gpus
62 real(kind=c_double) :: cycles
63 real(kind=c_double) :: instructions
64 integer(kind=c_int64_t) :: num_measurements
65 integer(kind=c_int64_t) :: num_mpi_calls
66 integer(kind=c_int64_t) :: num_omp_parallels
67 integer(kind=c_int64_t) :: num_omp_tasks
68 integer(kind=c_int64_t) :: num_gpu_runtime_calls
69 integer(kind=c_int64_t) :: elapsed_time
70 integer(kind=c_int64_t) :: useful_time
71 integer(kind=c_int64_t) :: mpi_time
72 integer(kind=c_int64_t) :: omp_load_imbalance_time
73 integer(kind=c_int64_t) :: omp_scheduling_time
74 integer(kind=c_int64_t) :: omp_serialization_time
75 integer(kind=c_int64_t) :: gpu_runtime_time
76 real(kind=c_double) :: min_mpi_normd_proc
77 real(kind=c_double) :: min_mpi_normd_node
78 integer(kind=c_int64_t) :: gpu_useful_time
79 integer(kind=c_int64_t) :: gpu_communication_time
80 integer(kind=c_int64_t) :: gpu_inactive_time
81 integer(kind=c_int64_t) :: max_gpu_useful_time
82 integer(kind=c_int64_t) :: max_gpu_active_time
83 real(kind=c_float) :: parallel_efficiency
84 real(kind=c_float) :: mpi_parallel_efficiency
85 real(kind=c_float) :: mpi_communication_efficiency
86 real(kind=c_float) :: mpi_load_balance
87 real(kind=c_float) :: mpi_load_balance_in
88 real(kind=c_float) :: mpi_load_balance_out
89 real(kind=c_float) :: omp_parallel_efficiency
90 real(kind=c_float) :: omp_load_balance
91 real(kind=c_float) :: omp_scheduling_efficiency
92 real(kind=c_float) :: omp_serialization_efficiency
93 real(kind=c_float) :: device_offload_efficiency
94 real(kind=c_float) :: gpu_parallel_efficiency
95 real(kind=c_float) :: gpu_load_balance
96 real(kind=c_float) :: gpu_communication_efficiency
97 real(kind=c_float) :: gpu_orchestration_efficiency
98 end type
99
100 type, bind(c) :: dlb_node_metrics_t
101 character(kind=c_char, len=1) :: name(DLB_MONITOR_NAME_MAX)
102 integer(kind=c_int) :: node_id
103 integer(kind=c_int) :: processes_per_node
104 integer(kind=c_int64_t) :: total_useful_time
105 integer(kind=c_int64_t) :: total_mpi_time
106 integer(kind=c_int64_t) :: max_useful_time
107 integer(kind=c_int64_t) :: max_mpi_time
108 real(kind=c_float) :: parallel_efficiency
109 real(kind=c_float) :: communication_efficiency
110 real(kind=c_float) :: load_balance
111 end type
112
113 interface
114
115 !---------------------------------------------------------------------------!
116 ! The following functions are intended to be called from 1st-party or
117 ! 3rd-party programs indistinctly; that is, DLB applications, or external
118 ! profilers as long as they invoke DLB_TALP_Attach.
119 !---------------------------------------------------------------------------!
120
121 function dlb_talp_attach() result(ierr) &
122 & bind(c,name='DLB_TALP_Attach')
123 use iso_c_binding
124 integer(kind=c_int) :: ierr
125 end function dlb_talp_attach
126
127 function dlb_talp_detach() result(ierr) &
128 & bind(c,name='DLB_TALP_Detach')
129 use iso_c_binding
130 integer(kind=c_int) :: ierr
131 end function dlb_talp_detach
132
133 function dlb_talp_getnumcpus(ncpus) result(ierr) &
134 & bind(c,name='DLB_TALP_GetNumCPUs')
135 use iso_c_binding
136 integer(kind=c_int) :: ierr
137 real(c_double), intent(out) :: ncpus
138 end function dlb_talp_getnumcpus
139
140 function dlb_talp_querypopnodemetrics(name, node_metrics) &
141 result(ierr) &
142 bind(c,name='DLB_TALP_QueryPOPNodeMetrics')
143 use iso_c_binding
144 import :: dlb_node_metrics_t
145 integer(kind=c_int) :: ierr
146 character(kind=c_char), intent(in) :: name(*)
147 type(dlb_node_metrics_t), intent(out) :: node_metrics
148 end function dlb_talp_querypopnodemetrics
149
150
151 !---------------------------------------------------------------------------!
152 ! The functions declared below are intended to be called only from
153 ! 1st-party programs, and they should return an error if they are
154 ! called from external profilers.
155 !---------------------------------------------------------------------------!
156
157 function dlb_monitoringregiongetglobal() &
158 & result (handle) &
159 & bind(c, name='DLB_MonitoringRegionGetGlobal')
160 use iso_c_binding
161 type(c_ptr) :: handle
162 end function dlb_monitoringregiongetglobal
163
164 !! deprecated: bind to DLB_MonitoringRegionGetGlobal()
165 function dlb_monitoringregiongetimplicit() &
166 & result (handle) &
167 & bind(c, name='DLB_MonitoringRegionGetGlobal')
168 use iso_c_binding
169 type(c_ptr) :: handle
170 end function dlb_monitoringregiongetimplicit
171
172 function dlb_monitoringregionregister(region_name) &
173 & result (handle) &
174 & bind(c, name='DLB_MonitoringRegionRegister')
175 use iso_c_binding
176 type(c_ptr) :: handle
177 character(kind=c_char), intent(in) :: region_name(*)
178 end function dlb_monitoringregionregister
179
180 function dlb_monitoringregionreset(handle) &
181 & result (ierr) bind(c, name='DLB_MonitoringRegionReset')
182 use iso_c_binding
183 integer(kind=c_int) :: ierr
184 type(c_ptr), value, intent(in) :: handle
185 end function dlb_monitoringregionreset
186
187 function dlb_monitoringregionstart(handle) &
188 & result (ierr) bind(c, name='DLB_MonitoringRegionStart')
189 use iso_c_binding
190 integer(kind=c_int) :: ierr
191 type(c_ptr), value, intent(in) :: handle
192 end function dlb_monitoringregionstart
193
194 function dlb_monitoringregionstop(handle) &
195 & result (ierr) bind(c, name='DLB_MonitoringRegionStop')
196 use iso_c_binding
197 integer(kind=c_int) :: ierr
198 type(c_ptr), value, intent(in) :: handle
199 end function dlb_monitoringregionstop
200
201 function dlb_monitoringregionreport(handle) &
202 & result (ierr) bind(c, name='DLB_MonitoringRegionReport')
203 use iso_c_binding
204 integer(kind=c_int) :: ierr
205 type(c_ptr), value, intent(in) :: handle
206 end function dlb_monitoringregionreport
207
208 function dlb_monitoringregionupdate() &
209 & result (ierr) bind(c, name='DLB_MonitoringRegionsUpdate')
210 use iso_c_binding
211 integer(kind=c_int) :: ierr
212 end function dlb_monitoringregionupdate
213
214 function dlb_talp_collectpopmetrics(monitor, pop_metrics) &
215 result (ierr) bind(c, name='DLB_TALP_CollectPOPMetrics')
216 use iso_c_binding
217 import :: dlb_pop_metrics_t
218 integer(kind=c_int) :: ierr
219 type(c_ptr), value, intent(in) :: monitor
220 type(dlb_pop_metrics_t), intent(out) :: pop_metrics
221 end function dlb_talp_collectpopmetrics
222
223 function dlb_talp_collectpopnodemetrics(monitor, node_metrics) &
224 result (ierr) bind(c, name='DLB_TALP_CollectPOPNodeMetrics')
225 use iso_c_binding
226 import :: dlb_node_metrics_t
227 integer(kind=c_int) :: ierr
228 type(c_ptr), value, intent(in) :: monitor
229 type(dlb_node_metrics_t), intent(out) :: node_metrics
230 end function dlb_talp_collectpopnodemetrics
231 end interface
232
233! -*- fortran -*- vim: set ft=fortran:
DLB_EXPORT_SYMBOL int DLB_TALP_QueryPOPNodeMetrics(const char *name, dlb_node_metrics_t *node_metrics)
From either 1st or 3rd party, query node metrics for one region.
Definition: DLB_interface_talp.c:149
DLB_EXPORT_SYMBOL int DLB_MonitoringRegionReport(const dlb_monitor_t *handle)
Print a report to stderr of the monitoring region.
Definition: DLB_interface_talp.c:217
DLB_EXPORT_SYMBOL int DLB_TALP_GetNumCPUs(int *ncpus)
Get the number of CPUs in the node.
Definition: DLB_interface_talp.c:74
DLB_EXPORT_SYMBOL int DLB_TALP_Detach(void)
Detach current process from DLB system.
Definition: DLB_interface_talp.c:66
DLB_EXPORT_SYMBOL int DLB_MonitoringRegionStop(dlb_monitor_t *handle)
Stop (or pause) monitoring region.
Definition: DLB_interface_talp.c:208
DLB_EXPORT_SYMBOL int DLB_MonitoringRegionStart(dlb_monitor_t *handle)
Start (or unpause) monitoring region.
Definition: DLB_interface_talp.c:199
DLB_EXPORT_SYMBOL int DLB_TALP_CollectPOPNodeMetrics(dlb_monitor_t *monitor, dlb_node_metrics_t *node_metrics)
Perform a node collective communication to collect TALP node metrics.
Definition: DLB_interface_talp.c:244
DLB_EXPORT_SYMBOL dlb_monitor_t * DLB_MonitoringRegionGetGlobal(void)
Get the pointer of the global application-wide Monitoring Region.
Definition: DLB_interface_talp.c:164
DLB_EXPORT_SYMBOL int DLB_TALP_Attach(void)
Attach current process to DLB system as TALP administrator.
Definition: DLB_interface_talp.c:42
DLB_EXPORT_SYMBOL int DLB_TALP_CollectPOPMetrics(dlb_monitor_t *monitor, dlb_pop_metrics_t *pop_metrics)
Perform an MPI collective communication to collect POP metrics.
Definition: DLB_interface_talp.c:235
DLB_EXPORT_SYMBOL int DLB_MonitoringRegionReset(dlb_monitor_t *handle)
Reset monitoring region.
Definition: DLB_interface_talp.c:190
DLB_EXPORT_SYMBOL int DLB_MonitoringRegionsUpdate(void)
Update all monitoring regions.
Definition: DLB_interface_talp.c:226
#define DLB_IMPLICIT_REGION
Definition: dlb_talp.h:29
@ DLB_MONITOR_NAME_MAX
Definition: dlb_talp.h:32
#define DLB_GLOBAL_REGION
Definition: dlb_talp.h:27
#define DLB_GLOBAL_REGION_NAME
Definition: dlb_talp.h:26
dlb_monitor_t * DLB_MonitoringRegionRegister(const char *name)
Register a new Monitoring Region, or obtain the associated pointer by name.
#define DLB_MPI_REGION
Definition: dlb_talp.h:28
!Copyright Barcelona Supercomputing Center !This file is part of the DLB library !DLB is free either version of the !but WITHOUT ANY WARRANTY
Definition: dlbf_talp.h:12
!Copyright Barcelona Supercomputing Center !This file is part of the DLB library !DLB is free either version of the or(at your option) any later version. ! ! ! ! DLB is distributed in the hope that it will be useful
without even the implied warranty of !MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE See the !GNU Lesser General Public License for more details !You should have received a copy of the GNU Lesser General Public License !along with DLB If not
Definition: dlbf_talp.h:17
!Copyright Barcelona Supercomputing Center !This file is part of the DLB library !DLB is free either version of the License
Definition: dlbf_talp.h:8
!Copyright Barcelona Supercomputing Center !This file is part of the DLB library !DLB is free software
Definition: dlbf_talp.h:8
Definition: dlb_talp.h:35
Definition: dlb_talp.h:182
Definition: dlb_talp.h:91