|
Dynamic Load Balance 3.6.1+32-59d1
|
#include <time.h>#include <stdint.h>

Go to the source code of this file.
Data Structures | |
| struct | dlb_monitor_t |
| struct | dlb_pop_metrics_t |
| struct | dlb_node_metrics_t |
| struct | dlb_node_times_t |
Macros | |
| #define | DLB_GLOBAL_REGION_NAME "Global" |
| #define | DLB_GLOBAL_REGION NULL |
| #define | DLB_MPI_REGION NULL /* deprecated in favor of DLB_GLOBAL_REGION */ |
| #define | DLB_IMPLICIT_REGION NULL /* deprecated in favor of DLB_GLOBAL_REGION */ |
| #define | DLB_LAST_OPEN_REGION (void*)1 |
Enumerations | |
| enum | { DLB_MONITOR_NAME_MAX = 128 } |
Functions | |
| int | DLB_TALP_Attach (void) |
| Attach current process to DLB system as TALP administrator. More... | |
| int | DLB_TALP_Detach (void) |
| Detach current process from DLB system. More... | |
| int | DLB_TALP_GetNumCPUs (int *ncpus) |
| Get the number of CPUs in the node. More... | |
| int | DLB_TALP_GetPidList (int *pidlist, int *nelems, int max_len) |
| Get the list of running processes registered in the DLB system. More... | |
| int | DLB_TALP_GetTimes (int pid, double *mpi_time, double *useful_time) |
| Get the CPU time spent on MPI and useful computation for the given process. More... | |
| int | DLB_TALP_GetNodeTimes (const char *name, dlb_node_times_t *node_times_list, int *nelems, int max_len) |
| Get the list of raw times for the specified region. More... | |
| int | DLB_TALP_QueryPOPNodeMetrics (const char *name, dlb_node_metrics_t *node_metrics) |
| From either 1st or 3rd party, query node metrics for one region. More... | |
| dlb_monitor_t * | DLB_MonitoringRegionGetGlobal (void) |
| Get the pointer of the global application-wide Monitoring Region. More... | |
| dlb_monitor_t * | DLB_MonitoringRegionGetImplicit (void) __attribute__((deprecated("DLB_MonitoringRegionGetGlobal"))) |
| const dlb_monitor_t * | DLB_MonitoringRegionGetMPIRegion (void) __attribute__((deprecated("DLB_MonitoringRegionGetGlobal"))) |
| dlb_monitor_t * | DLB_MonitoringRegionRegister (const char *name) |
| Register a new Monitoring Region, or obtain the associated pointer by name. More... | |
| int | DLB_MonitoringRegionReset (dlb_monitor_t *handle) |
| Reset monitoring region. More... | |
| int | DLB_MonitoringRegionStart (dlb_monitor_t *handle) |
| Start (or unpause) monitoring region. More... | |
| int | DLB_MonitoringRegionStop (dlb_monitor_t *handle) |
| Stop (or pause) monitoring region. More... | |
| int | DLB_MonitoringRegionReport (const dlb_monitor_t *handle) |
| Print a report to stderr of the monitoring region. More... | |
| int | DLB_MonitoringRegionsUpdate (void) |
| Update all monitoring regions. More... | |
| int | DLB_TALP_CollectPOPMetrics (dlb_monitor_t *monitor, dlb_pop_metrics_t *pop_metrics) |
| Perform an MPI collective communication to collect POP metrics. More... | |
| int | DLB_TALP_CollectPOPNodeMetrics (dlb_monitor_t *monitor, dlb_node_metrics_t *node_metrics) |
| Perform a node collective communication to collect TALP node metrics. More... | |
| int | DLB_TALP_CollectNodeMetrics (dlb_monitor_t *monitor, dlb_node_metrics_t *node_metrics) __attribute__((deprecated("DLB_TALP_CollectPOPNodeMetrics"))) |
| #define DLB_GLOBAL_REGION_NAME "Global" |
| #define DLB_GLOBAL_REGION NULL |
| #define DLB_MPI_REGION NULL /* deprecated in favor of DLB_GLOBAL_REGION */ |
| #define DLB_IMPLICIT_REGION NULL /* deprecated in favor of DLB_GLOBAL_REGION */ |
| #define DLB_LAST_OPEN_REGION (void*)1 |
| int DLB_TALP_Attach | ( | void | ) |
Attach current process to DLB system as TALP administrator.
Once the process is attached to DLB as TALP administrator, it may perform the below actions described in this file. This way, the process is able to obtain some TALP values such as time spent in computation or MPI for each of the DLB running processes.
| int DLB_TALP_Detach | ( | void | ) |
Detach current process from DLB system.
If previously attached, a process must call this function to correctly close internal DLB file descriptors and clean data.
| int DLB_TALP_GetNumCPUs | ( | int * | ncpus | ) |
Get the number of CPUs in the node.
| [out] | ncpus | the number of CPUs |
| int DLB_TALP_GetPidList | ( | int * | pidlist, |
| int * | nelems, | ||
| int | max_len | ||
| ) |
Get the list of running processes registered in the DLB system.
| [out] | pidlist | The output list |
| [out] | nelems | Number of elements in the list |
| [in] | max_len | Max capacity of the list |
| int DLB_TALP_GetTimes | ( | int | pid, |
| double * | mpi_time, | ||
| double * | useful_time | ||
| ) |
Get the CPU time spent on MPI and useful computation for the given process.
| [in] | pid | target Process ID, or 0 if own process |
| [out] | mpi_time | CPU time spent on MPI in seconds |
| [out] | useful_time | CPU time spend on useful computation in seconds |
| int DLB_TALP_GetNodeTimes | ( | const char * | name, |
| dlb_node_times_t * | node_times_list, | ||
| int * | nelems, | ||
| int | max_len | ||
| ) |
Get the list of raw times for the specified region.
| [in] | name | Name to identify the region |
| [out] | node_times_list | The output list |
| [out] | nelems | Number of elements in the list |
| [in] | max_len | Max capacity of the list |
Note: This function requires DLB_ARGS+=" --talp-external-profiler" even if it's called from 1st-party programs.
| int DLB_TALP_QueryPOPNodeMetrics | ( | const char * | name, |
| dlb_node_metrics_t * | node_metrics | ||
| ) |
From either 1st or 3rd party, query node metrics for one region.
| [in] | name | Name to identify the region |
| [out] | node_metrics | Allocated structure where the collected metrics will be stored |
Note: This function requires DLB_ARGS+=" --talp-external-profiler" even if it's called from 1st-party programs.
| dlb_monitor_t * DLB_MonitoringRegionGetGlobal | ( | void | ) |
Get the pointer of the global application-wide Monitoring Region.
| dlb_monitor_t * DLB_MonitoringRegionGetImplicit | ( | void | ) |
| const dlb_monitor_t * DLB_MonitoringRegionGetMPIRegion | ( | void | ) |
| dlb_monitor_t * DLB_MonitoringRegionRegister | ( | const char * | name | ) |
Register a new Monitoring Region, or obtain the associated pointer by name.
| [in] | name | Name to identify the region |
This function registers a new monitoring region or obtains the pointer to an already created region with the same name. The name "Global" is a special reserved name (case-insensitive); invoking this function with this name is equivalent as invoking DLB_MonitoringRegionGetGlobal(). Otherwise, the region name is treated case-sensitive.
| int DLB_MonitoringRegionReset | ( | dlb_monitor_t * | handle | ) |
Reset monitoring region.
| [in] | handle | Monitoring handle that identifies the region, or DLB_GLOBAL_REGION |
Reset all values of the monitoring region except num_resets, which is incremented. If the region is open, discard all intermediate values and close it.
| int DLB_MonitoringRegionStart | ( | dlb_monitor_t * | handle | ) |
Start (or unpause) monitoring region.
| [in] | handle | Monitoring handle that identifies the region, or DLB_GLOBAL_REGION |
Notes on multi-threading:
| int DLB_MonitoringRegionStop | ( | dlb_monitor_t * | handle | ) |
Stop (or pause) monitoring region.
| [in] | handle | Monitoring handle that identifies the region, DLB_GLOBAL_REGION, or DLB_LAST_OPEN_REGION |
| int DLB_MonitoringRegionReport | ( | const dlb_monitor_t * | handle | ) |
Print a report to stderr of the monitoring region.
| [in] | handle | Monitoring handle that identifies the region, or DLB_GLOBAL_REGION |
| int DLB_MonitoringRegionsUpdate | ( | void | ) |
Update all monitoring regions.
Monitoring regions are only updated in certain situations, like when starting/stopping a region, or finalizing MPI. This routine forces the update of all started monitoring regions
| int DLB_TALP_CollectPOPMetrics | ( | dlb_monitor_t * | monitor, |
| dlb_pop_metrics_t * | pop_metrics | ||
| ) |
Perform an MPI collective communication to collect POP metrics.
| [in] | monitor | Monitoring handle that identifies the region, or DLB_GLOBAL_REGION macro (NULL) if global application-wide region |
| [out] | pop_metrics | Allocated structure where the collected metrics will be stored |
| int DLB_TALP_CollectPOPNodeMetrics | ( | dlb_monitor_t * | monitor, |
| dlb_node_metrics_t * | node_metrics | ||
| ) |
Perform a node collective communication to collect TALP node metrics.
| [in] | monitor | Monitoring handle that identifies the region, or DLB_GLOBAL_REGION macro (NULL) if global application-wide region |
| [out] | node_metrics | Allocated structure where the collected metrics will be stored |
This functions performs a node barrier to collect the data. All processes that are running in the node must invoke this function.
| int DLB_TALP_CollectNodeMetrics | ( | dlb_monitor_t * | monitor, |
| dlb_node_metrics_t * | node_metrics | ||
| ) |