Dynamic Load Balance 3.6.1+32-59d1
perf_metrics.h
Go to the documentation of this file.
1/*********************************************************************************/
2/* Copyright 2009-2025 Barcelona Supercomputing Center */
3/* */
4/* This file is part of the DLB library. */
5/* */
6/* DLB is free software: you can redistribute it and/or modify */
7/* it under the terms of the GNU Lesser General Public License as published by */
8/* the Free Software Foundation, either version 3 of the License, or */
9/* (at your option) any later version. */
10/* */
11/* DLB is distributed in the hope that it will be useful, */
12/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
13/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
14/* GNU Lesser General Public License for more details. */
15/* */
16/* You should have received a copy of the GNU Lesser General Public License */
17/* along with DLB. If not, see <https://www.gnu.org/licenses/>. */
18/*********************************************************************************/
19
20#ifndef PERF_METRICS_H
21#define PERF_METRICS_H
22
23#include <stdbool.h>
24#include <stdint.h>
25
26typedef struct dlb_monitor_t dlb_monitor_t;
28
29/*********************************************************************************/
30/* POP metrics - pure MPI model */
31/*********************************************************************************/
32
33/* POP metrics for pure MPI executions */
34typedef struct perf_metrics_mpi_t {
38 float lb_in;
39 float lb_out;
41
43 perf_metrics_mpi_t *metrics,
44 int processes_per_node,
45 int64_t node_sum_useful,
46 int64_t node_sum_mpi,
47 int64_t max_useful_time);
48
49
50/*********************************************************************************/
51/* POP metrics - hybrid MPI + OpenMP model */
52/*********************************************************************************/
53
54/* Internal struct to contain everything that's needed to actually construct a
55 * dlb_pop_metrics_t. Can be thought of as an abstract to the app_reduction_t
56 * combining serial and parallel program flow. Everything in here is coming
57 * directly from measurement, or computed in an MPI reduction. */
58typedef struct pop_base_metrics_t {
59 /* Resources */
63 float avg_cpus;
65 /* Hardware counters */
66 double cycles;
68 /* Statistics */
74 /* Sum of Host times among all processes */
75 int64_t elapsed_time;
76 int64_t useful_time;
77 int64_t mpi_time;
82 /* Normalized Host times by the number of assigned CPUs */
89 /* Sum of Device times among all processes */
93 /* Device Max Times */
97
98
99#if MPI_LIB
101 const dlb_monitor_t *monitor, bool all_to_all);
102#endif
103
105 const dlb_monitor_t *monitor);
106
107void perf_metrics__base_to_pop_metrics(const char *monitor_name,
108 const pop_base_metrics_t *base_metrics, dlb_pop_metrics_t *pop_metrics);
109
110#endif /* PERF_METRICS_H */
void perf_metrics__infer_mpi_model(perf_metrics_mpi_t *metrics, int processes_per_node, int64_t node_sum_useful, int64_t node_sum_mpi, int64_t max_useful_time)
Definition: perf_metrics.c:70
void perf_metrics__base_to_pop_metrics(const char *monitor_name, const pop_base_metrics_t *base_metrics, dlb_pop_metrics_t *pop_metrics)
Definition: perf_metrics.c:670
void perf_metrics__reduce_monitor_into_base_metrics(pop_base_metrics_t *base_metrics, const dlb_monitor_t *monitor, bool all_to_all)
Definition: perf_metrics.c:583
void perf_metrics__local_monitor_into_base_metrics(pop_base_metrics_t *base_metrics, const dlb_monitor_t *monitor)
Definition: perf_metrics.c:634
Definition: dlb_talp.h:35
Definition: dlb_talp.h:91
Definition: perf_metrics.h:34
float communication_efficiency
Definition: perf_metrics.h:36
float load_balance
Definition: perf_metrics.h:37
float lb_out
Definition: perf_metrics.h:39
float lb_in
Definition: perf_metrics.h:38
float parallel_efficiency
Definition: perf_metrics.h:35
Definition: perf_metrics.h:58
int64_t gpu_inactive_time
Definition: perf_metrics.h:92
int64_t elapsed_time
Definition: perf_metrics.h:75
int64_t num_omp_parallels
Definition: perf_metrics.h:71
double min_mpi_normd_proc
Definition: perf_metrics.h:85
double min_mpi_normd_node
Definition: perf_metrics.h:88
int64_t num_omp_tasks
Definition: perf_metrics.h:72
double instructions
Definition: perf_metrics.h:67
int64_t num_measurements
Definition: perf_metrics.h:69
int64_t num_gpu_runtime_calls
Definition: perf_metrics.h:73
int64_t num_mpi_calls
Definition: perf_metrics.h:70
int64_t gpu_useful_time
Definition: perf_metrics.h:90
int64_t gpu_communication_time
Definition: perf_metrics.h:91
int64_t omp_serialization_time
Definition: perf_metrics.h:80
int num_gpus
Definition: perf_metrics.h:64
int64_t useful_time
Definition: perf_metrics.h:76
int64_t gpu_runtime_time
Definition: perf_metrics.h:81
int64_t mpi_time
Definition: perf_metrics.h:77
int64_t omp_load_imbalance_time
Definition: perf_metrics.h:78
int64_t max_gpu_useful_time
Definition: perf_metrics.h:94
float avg_cpus
Definition: perf_metrics.h:63
double cycles
Definition: perf_metrics.h:66
int64_t omp_scheduling_time
Definition: perf_metrics.h:79
int num_nodes
Definition: perf_metrics.h:62
int num_mpi_ranks
Definition: perf_metrics.h:61
int num_cpus
Definition: perf_metrics.h:60
int64_t max_gpu_active_time
Definition: perf_metrics.h:95