#include <dlb_talp.h>

Data Fields
char	name [DLB_MONITOR_NAME_MAX]

int	num_cpus

int	num_mpi_ranks

int	num_nodes

float	avg_cpus

int	num_gpus

double	cycles

double	instructions

int64_t	num_measurements

int64_t	num_mpi_calls

int64_t	num_omp_parallels

int64_t	num_omp_tasks

int64_t	num_gpu_runtime_calls

int64_t	elapsed_time

int64_t	useful_time

int64_t	mpi_time

int64_t	omp_load_imbalance_time

int64_t	omp_scheduling_time

int64_t	omp_serialization_time

int64_t	gpu_runtime_time

double	min_mpi_normd_proc

double	min_mpi_normd_node

int64_t	gpu_useful_time

int64_t	gpu_communication_time

int64_t	gpu_inactive_time

int64_t	max_gpu_useful_time

int64_t	max_gpu_active_time

float	parallel_efficiency

float	mpi_parallel_efficiency

float	mpi_communication_efficiency

float	mpi_load_balance

float	mpi_load_balance_in

float	mpi_load_balance_out

float	omp_parallel_efficiency

float	omp_load_balance

float	omp_scheduling_efficiency

float	omp_serialization_efficiency

float	device_offload_efficiency

float	gpu_parallel_efficiency

float	gpu_load_balance

float	gpu_communication_efficiency

float	gpu_orchestration_efficiency

Detailed Description

POP metrics (of one monitor) collected among all processes

Field Documentation

◆ name

char name[DLB_MONITOR_NAME_MAX]

Name of the monitor

◆ num_cpus

int num_cpus

Total number of CPUs used by the processes that have used the region

◆ num_mpi_ranks

int num_mpi_ranks

Total number of mpi processes that have used the region

◆ num_nodes

int num_nodes

Total number of nodes used by the processes that have used the region

◆ avg_cpus

float avg_cpus

Total average of CPUs used in the region. Only meaningful if LeWI enabled.

◆ num_gpus

int num_gpus

TBD

◆ cycles

double cycles

Total number of CPU cycles elapsed in that region during useful time

◆ instructions

double instructions

Total number of instructions executed during useful time

◆ num_measurements

int64_t num_measurements

Number of times that the region has been started and stopped among all processes

◆ num_mpi_calls

int64_t num_mpi_calls

Number of executed MPI calls combined among all MPI processes

◆ num_omp_parallels

int64_t num_omp_parallels

Number of encountered OpenMP parallel regions combined among all processes

◆ num_omp_tasks

int64_t num_omp_tasks

Number of encountered OpenMP tasks combined among all processes

◆ num_gpu_runtime_calls

int64_t num_gpu_runtime_calls

Number of executed GPU Runtime calls combined among all processes

◆ elapsed_time

int64_t elapsed_time

Time (in nanoseconds) of the accumulated elapsed time inside the region

◆ useful_time

int64_t useful_time

Time (in nanoseconds) of the accumulated CPU time of useful computation in the application

◆ mpi_time

int64_t mpi_time

Time (in nanoseconds) of the accumulated CPU time (not useful) in MPI

◆ omp_load_imbalance_time

int64_t omp_load_imbalance_time

Time (in nanoseconds) of the accumulated CPU time (not useful) spent due to load imbalance in OpenMP parallel regions

◆ omp_scheduling_time

int64_t omp_scheduling_time

Time (in nanoseconds) of the accumulated CPU time (not useful) spent inside OpenMP parallel regions due to scheduling and overhead, not counting load imbalance

◆ omp_serialization_time

int64_t omp_serialization_time

Time (in nanoseconds) of the accumulated CPU time (not useful) spent outside OpenMP parallel regions

◆ gpu_runtime_time

int64_t gpu_runtime_time

Time (in nanoseconds) of the accumulated CPU time (not useful) in GPU calls

◆ min_mpi_normd_proc

double min_mpi_normd_proc

MPI time normalized at process level of the process with less MPI

◆ min_mpi_normd_node

double min_mpi_normd_node

MPI time normalized at node level of the node with less MPI

◆ gpu_useful_time

int64_t gpu_useful_time

◆ gpu_communication_time

int64_t gpu_communication_time

◆ gpu_inactive_time

int64_t gpu_inactive_time

◆ max_gpu_useful_time

int64_t max_gpu_useful_time

◆ max_gpu_active_time

int64_t max_gpu_active_time

◆ parallel_efficiency

float parallel_efficiency

Efficiency number [0.0 - 1.0] of the impact in the application's parallelization

◆ mpi_parallel_efficiency

float mpi_parallel_efficiency

Efficiency number of the impact in the MPI parallelization

◆ mpi_communication_efficiency

float mpi_communication_efficiency

Efficiency lost due to MPI transfer and serialization

◆ mpi_load_balance

float mpi_load_balance

Efficiency of the MPI Load Balance

◆ mpi_load_balance_in

float mpi_load_balance_in

Intra-node MPI Load Balance coefficient

◆ mpi_load_balance_out

float mpi_load_balance_out

Inter-node MPI Load Balance coefficient

◆ omp_parallel_efficiency

float omp_parallel_efficiency

Efficiency number of the impact in the OpenMP parallelization

◆ omp_load_balance

float omp_load_balance

Efficiency of the OpenMP Load Balance inside parallel regions

◆ omp_scheduling_efficiency

float omp_scheduling_efficiency

Efficiency of the OpenMP scheduling inside parallel regions

◆ omp_serialization_efficiency

float omp_serialization_efficiency

Efficiency lost due to OpenMP threads outside of parallel regions

◆ device_offload_efficiency

float device_offload_efficiency

Efficiency of the Host offloading to the Device

◆ gpu_parallel_efficiency

float gpu_parallel_efficiency

TBD

◆ gpu_load_balance

float gpu_load_balance

TBD

◆ gpu_communication_efficiency

float gpu_communication_efficiency

TBD

◆ gpu_orchestration_efficiency

float gpu_orchestration_efficiency

TBD

The documentation for this struct was generated from the following file:

dlb_talp.h

Data Fields