Dynamic Load Balance 3.6.1+32-59d1
talp_types.h
Go to the documentation of this file.
1/*********************************************************************************/
2/* Copyright 2009-2024 Barcelona Supercomputing Center */
3/* */
4/* This file is part of the DLB library. */
5/* */
6/* DLB is free software: you can redistribute it and/or modify */
7/* it under the terms of the GNU Lesser General Public License as published by */
8/* the Free Software Foundation, either version 3 of the License, or */
9/* (at your option) any later version. */
10/* */
11/* DLB is distributed in the hope that it will be useful, */
12/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
13/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
14/* GNU Lesser General Public License for more details. */
15/* */
16/* You should have received a copy of the GNU Lesser General Public License */
17/* along with DLB. If not, see <https://www.gnu.org/licenses/>. */
18/*********************************************************************************/
19
20#ifndef TALP_TYPES_H
21#define TALP_TYPES_H
22
23#include "apis/dlb_talp.h"
24#include "apis/dlb_types.h"
25#include "support/atomic.h"
26#include "support/gtree.h"
27#include "support/gslist.h"
28
29#include <pthread.h>
30
31typedef enum {
39
40/* The sample contains the temporary per-thread accumulated values of all the
41 * measured metrics. Once the sample is flushed, a macrosample is created using
42 * samples from all* threads. The sample starts and ends on each one of the
43 * following scenarios:
44 * - MPI Init/Finalize
45 * - end of a parallel region
46 * - a region starts or stops
47 * - a request from the API
48 * (*): on nested parallelism, only threads from that parallel region are reduced
49 */
51 struct {
57 } timers;
58 struct {
61 } counters;
62 // The last_read counters contain the PAPI_Read values from the beginning of the last useful state.
63 // This enables to compute the difference without the need to call PAPI_Reset()
64 struct {
66 atomic_int_least64_t instructions;
67 } last_read_counters;
68 struct {
73 } stats;
77
78/* The macrosample is a temporary aggregation of all metrics in samples of all,
79 * or a subset of, threads. It is only constructed when samples are flushed and
80 * aggregated. The macrosample is then used to update all started monitoring
81 * regions. */
82typedef struct talp_macrosample_t {
83 struct {
84 int64_t useful;
91 struct {
92 int64_t useful;
94 int64_t inactive;
96 struct {
97 int64_t cycles;
98 int64_t instructions;
100 struct {
107
108/* TALP flags for talp_info */
109typedef struct talp_flags_t {
110 bool have_shmem:1; /* whether to record data in shmem */
111 bool have_minimal_shmem:1; /* whether to create a shmem for the global region */
112 bool external_profiler:1; /* whether to update shmem on every sample */
113 bool have_mpi:1; /* whether TALP regions have MPI events */
114 bool have_openmp:1; /* whether TALP regions have OpenMP events */
115 bool have_gpu:1; /* whether TALP regions have GPU events */
116 bool have_hwc:1; /* whether TALP regions have HWC events */
118
119/* TALP info per spd */
120typedef struct talp_info_t {
122 int ncpus; /* Number of process CPUs (also num samples) */
123 dlb_monitor_t *monitor; /* Convenience pointer to the global region */
124 GTree *regions; /* Tree of monitoring regions */
125 GSList *open_regions; /* List of open regions */
126 pthread_mutex_t regions_mutex; /* Mutex to protect regions allocation/iteration */
127 talp_sample_t **samples; /* Per-thread ongoing sample,
128 added to all monitors when finished */
129 pthread_mutex_t samples_mutex; /* Mutex to protect samples allocation/iteration */
131
132/* Private data per monitor */
133typedef struct monitor_data_t {
134 int id;
135 int node_shared_id; /* id for allocating region in the shmem */
136 struct {
137 bool started:1;
138 bool internal:1; /* internal regions are not reported */
139 bool enabled:1;
142
143
144#endif /* TALP_TYPES_H */
#define atomic_int_least64_t
Definition: atomic.h:59
#define DLB_ALIGN_CACHE
Definition: atomic.h:92
Definition: dlb_talp.h:35
Definition: talp_types.h:133
bool internal
Definition: talp_types.h:138
bool started
Definition: talp_types.h:137
struct monitor_data_t::@74 flags
int id
Definition: talp_types.h:134
bool enabled
Definition: talp_types.h:139
int node_shared_id
Definition: talp_types.h:135
Definition: talp_types.h:109
bool external_profiler
Definition: talp_types.h:112
bool have_shmem
Definition: talp_types.h:110
bool have_minimal_shmem
Definition: talp_types.h:111
bool have_gpu
Definition: talp_types.h:115
bool have_mpi
Definition: talp_types.h:113
bool have_openmp
Definition: talp_types.h:114
bool have_hwc
Definition: talp_types.h:116
Definition: talp_types.h:120
int ncpus
Definition: talp_types.h:122
talp_sample_t ** samples
Definition: talp_types.h:127
GSList * open_regions
Definition: talp_types.h:125
GTree * regions
Definition: talp_types.h:124
pthread_mutex_t regions_mutex
Definition: talp_types.h:126
dlb_monitor_t * monitor
Definition: talp_types.h:123
pthread_mutex_t samples_mutex
Definition: talp_types.h:129
talp_flags_t flags
Definition: talp_types.h:121
Definition: talp_types.h:82
struct talp_macrosample_t::@72 counters
struct talp_macrosample_t::@70 timers
int64_t useful
Definition: talp_types.h:84
int64_t num_omp_parallels
Definition: talp_types.h:102
int64_t not_useful_omp_in_sched
Definition: talp_types.h:87
int64_t not_useful_mpi
Definition: talp_types.h:85
struct talp_macrosample_t::@73 stats
int64_t not_useful_gpu
Definition: talp_types.h:89
int64_t instructions
Definition: talp_types.h:98
int64_t num_omp_tasks
Definition: talp_types.h:103
int64_t num_gpu_runtime_calls
Definition: talp_types.h:104
int64_t not_useful_omp_out
Definition: talp_types.h:88
int64_t num_mpi_calls
Definition: talp_types.h:101
int64_t cycles
Definition: talp_types.h:97
int64_t inactive
Definition: talp_types.h:94
int64_t not_useful_omp_in_lb
Definition: talp_types.h:86
int64_t communication
Definition: talp_types.h:93
struct talp_macrosample_t::@71 gpu_timers
Definition: talp_types.h:50
atomic_int_least64_t instructions
Definition: talp_types.h:60
atomic_int_least64_t useful
Definition: talp_types.h:52
atomic_int_least64_t num_mpi_calls
Definition: talp_types.h:69
atomic_int_least64_t not_useful_mpi
Definition: talp_types.h:53
atomic_int_least64_t num_gpu_runtime_calls
Definition: talp_types.h:72
talp_sample_state_t state
Definition: talp_types.h:75
atomic_int_least64_t num_omp_parallels
Definition: talp_types.h:70
atomic_int_least64_t not_useful_omp_in
Definition: talp_types.h:54
atomic_int_least64_t cycles
Definition: talp_types.h:59
int64_t last_updated_timestamp
Definition: talp_types.h:74
atomic_int_least64_t not_useful_omp_out
Definition: talp_types.h:55
atomic_int_least64_t num_omp_tasks
Definition: talp_types.h:71
atomic_int_least64_t not_useful_gpu
Definition: talp_types.h:56
talp_sample_state_t
Definition: talp_types.h:31
@ TALP_STATE_USEFUL
Definition: talp_types.h:33
@ TALP_STATE_NOT_USEFUL_MPI
Definition: talp_types.h:34
@ TALP_STATE_NOT_USEFUL_OMP_OUT
Definition: talp_types.h:36
@ TALP_STATE_DISABLED
Definition: talp_types.h:32
@ TALP_STATE_NOT_USEFUL_OMP_IN
Definition: talp_types.h:35
@ TALP_STATE_NOT_USEFUL_GPU
Definition: talp_types.h:37