GCC Code Coverage Report

Directory:	src/
File:	src/talp/talp_mpi.c
Date:	2025-11-21 10:34:40

	Exec	Total	Coverage
Lines:	43	43	100.0%
Functions:	5	5	100.0%
Branches:	17	24	70.8%

  
      Line
      Branch
      Exec
      Source
    
      /*********************************************************************************/
    
      /*  Copyright 2009-2025 Barcelona Supercomputing Center                          */
    
      /*                                                                               */
    
      /*  This file is part of the DLB library.                                        */
    
      /*                                                                               */
    
      /*  DLB is free software: you can redistribute it and/or modify                  */
    
      /*  it under the terms of the GNU Lesser General Public License as published by  */
    
      /*  the Free Software Foundation, either version 3 of the License, or            */
    
      /*  (at your option) any later version.                                          */
    
      /*                                                                               */
    
      /*  DLB is distributed in the hope that it will be useful,                       */
    
      /*  but WITHOUT ANY WARRANTY; without even the implied warranty of               */
    
      /*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the                */
    
      /*  GNU Lesser General Public License for more details.                          */
    
      /*                                                                               */
    
      /*  You should have received a copy of the GNU Lesser General Public License     */
    
      /*  along with DLB.  If not, see <https://www.gnu.org/licenses/>.                */
    
      /*********************************************************************************/
    
      #include "talp/talp_mpi.h"
    
      #include "LB_comm/shmem_talp.h"
    
      #include "LB_core/node_barrier.h"
    
      #include "LB_core/spd.h"
    
      #include "apis/dlb_talp.h"
    
      #include "support/atomic.h"
    
      #include "support/debug.h"
    
      #include "support/mask_utils.h"
    
      #include "talp/regions.h"
    
      #include "talp/talp.h"
    
      #include "talp/talp_record.h"
    
      #include "talp/talp_types.h"
    
      #ifdef MPI_LIB
    
      #include "mpi/mpi_core.h"
    
      #endif
    
      #include <stdio.h>
    
      #include <string.h>
    
      extern __thread bool thread_is_observer;
    
      #ifdef MPI_LIB
    
      /* Communicate among all MPI processes so that everyone has the same monitoring regions */
    
      static void talp_register_common_mpi_regions(const subprocess_descriptor_t *spd) {
    
          /* Note: there's a potential race condition if this function is called
    
           * (which happens on talp_mpi_finalize or talp_finalize) while another
    
           * thread creates a monitoring region. The solution would be to lock the
    
           * entire routine and call a specialized registering function that does not
    
           * lock, or use a recursive lock. The situation is strange enough to not
    
           * support it */
    
          talp_info_t *talp_info = spd->talp_info;
    
          /* Warn about open regions */
    
          for (GSList *node = talp_info->open_regions;
    
                  node != NULL;
    
                  node = node->next) {
    
              const dlb_monitor_t *monitor = node->data;
    
              warning("Region %s is still open during MPI_Finalize."
    
                      " Collected data may be incomplete.",
    
                      monitor->name);
    
          }
    
          /* Gather recvcounts for each process
    
              * (Each process may have different number of monitors) */
    
          int nregions = g_tree_nnodes(talp_info->regions);
    
          int chars_to_send = nregions * DLB_MONITOR_NAME_MAX;
    
          int *recvcounts = malloc(_mpi_size * sizeof(int));
    
          PMPI_Allgather(&chars_to_send, 1, MPI_INT,
    
                  recvcounts, 1, MPI_INT, getWorldComm());
    
          /* Compute total characters to gather via MPI */
    
          int i;
    
          int total_chars = 0;
    
          for (i=0; i<_mpi_size; ++i) {
    
              total_chars += recvcounts[i];
    
          }
    
          if (total_chars > 0) {
    
              /* Prepare sendbuffer */
    
              char *sendbuffer = malloc(nregions * DLB_MONITOR_NAME_MAX * sizeof(char));
    
              char *sendptr = sendbuffer;
    
              for (GTreeNode *node = g_tree_node_first(talp_info->regions);
    
                      node != NULL;
    
                      node = g_tree_node_next(node)) {
    
                  const dlb_monitor_t *monitor = g_tree_node_value(node);
    
                  strcpy(sendptr, monitor->name);
    
                  sendptr += DLB_MONITOR_NAME_MAX;
    
              }
    
              /* Prepare recvbuffer */
    
              char *recvbuffer = malloc(total_chars * sizeof(char));
    
              /* Compute displacements */
    
              int *displs = malloc(_mpi_size * sizeof(int));
    
              int next_disp = 0;
    
              for (i=0; i<_mpi_size; ++i) {
    
                  displs[i] = next_disp;
    
                  next_disp += recvcounts[i];
    
              }
    
              /* Gather all regions */
    
              PMPI_Allgatherv(sendbuffer, nregions * DLB_MONITOR_NAME_MAX, MPI_CHAR,
    
                      recvbuffer, recvcounts, displs, MPI_CHAR, getWorldComm());
    
              /* Register all regions. Existing ones will be skipped. */
    
              for (i=0; i<total_chars; i+=DLB_MONITOR_NAME_MAX) {
    
                  region_register(spd, &recvbuffer[i]);
    
              }
    
              free(sendbuffer);
    
              free(recvbuffer);
    
              free(displs);
    
          }
    
          free(recvcounts);
    
      }
    
      #endif
    
      /*********************************************************************************/
    
      /*    TALP MPI functions                                                         */
    
      /*********************************************************************************/
    
      /* Start global monitoring region (if not already started) */
    
      6
      void talp_mpi_init(const subprocess_descriptor_t *spd) {
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.

      6
          ensure(!thread_is_observer, "An observer thread cannot call talp_mpi_init");
    
      6
          talp_info_t *talp_info = spd->talp_info;
    
        1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.

      6
          if (talp_info) {
    
      6
              talp_info->flags.have_mpi = true;
    
              /* Start global region (no-op if already started) */
    
      6
              region_start(spd, talp_info->monitor);
    
              /* Add MPI_Init statistic and set useful state */
    
      6
              talp_sample_t *sample = talp_get_thread_sample(spd);
    
      6
              DLB_ATOMIC_ADD_RLX(&sample->stats.num_mpi_calls, 1);
    
      6
              talp_set_sample_state(sample, useful, talp_info->flags.papi);
    
          }
    
      6
      }
    
      /* Stop global monitoring region and gather APP data if needed  */
    
      6
      void talp_mpi_finalize(const subprocess_descriptor_t *spd) {
    
        1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.

      6
          ensure(!thread_is_observer, "An observer thread cannot call talp_mpi_finalize");
    
      6
          talp_info_t *talp_info = spd->talp_info;
    
        1/2✓ Branch 0 taken 6 times.
✗ Branch 1 not taken.

      6
          if (talp_info) {
    
      #ifdef MPI_LIB
    
              /* We also need to measure the waiting time of an MPI_Finalize.
    
               * For this, we call an MPI_Barrier and the appropriate TALP functions.
    
               * The num_mpi_calls variable is also incremented inside those. */
    
              talp_into_sync_call(spd, /* is_blocking_collective */ true);
    
              PMPI_Barrier(getWorldComm());
    
              talp_out_of_sync_call(spd, /* is_blocking_collective */ true);
    
      #else
    
              /* Add MPI_Finalize to the number of MPI calls.
    
               * Even though talp_mpi_finalize should never be called if no MPI_LIB,
    
               * we keep this case for testing purposes. */
    
      6
              talp_sample_t *sample = talp_get_thread_sample(spd);
    
      6
              DLB_ATOMIC_ADD_RLX(&sample->stats.num_mpi_calls, 1);
    
      #endif
    
              /* Stop global region */
    
      6
              region_stop(spd, talp_info->monitor);
    
      6
              monitor_data_t *monitor_data = talp_info->monitor->_data;
    
              /* Update shared memory values */
    
        3/4✓ Branch 0 taken 5 times.
✓ Branch 1 taken 1 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 5 times.

      6
              if (talp_info->flags.have_shmem || talp_info->flags.have_minimal_shmem) {
    
                  // TODO: is it needed? isn't it updated when stopped?
    
      1
                  shmem_talp__set_times(monitor_data->node_shared_id,
    
      1
                          talp_info->monitor->mpi_time,
    
      1
                          talp_info->monitor->useful_time);
    
              }
    
      #ifdef MPI_LIB
    
              /* If performing any kind of TALP summary, check that the number of processes
    
               * registered in the shared memory matches with the number of MPI processes in the node.
    
               * This check is needed to avoid deadlocks on finalize. */
    
              if (spd->options.talp_summary) {
    
                  verbose(VB_TALP, "Gathering TALP metrics");
    
                  /* FIXME: use Named Barrier */
    
                  /* if (shmem_barrier__get_num_participants(spd->options.barrier_id) == _mpis_per_node) { */
    
                      /* Gather data among processes in the node if node summary is enabled */
    
                      if (spd->options.talp_summary & SUMMARY_NODE) {
    
                          talp_record_node_summary(spd);
    
                      }
    
                      /* Gather data among MPIs if any of these summaries is enabled  */
    
                      if (spd->options.talp_summary
    
                              & (SUMMARY_POP_METRICS | SUMMARY_PROCESS)) {
    
                          /* Ensure everyone has the same monitoring regions */
    
                          talp_register_common_mpi_regions(spd);
    
                          /* Finally, reduce data */
    
                          for (GTreeNode *node = g_tree_node_first(talp_info->regions);
    
                                  node != NULL;
    
                                  node = g_tree_node_next(node)) {
    
                              const dlb_monitor_t *monitor = g_tree_node_value(node);
    
                              if (spd->options.talp_summary & SUMMARY_POP_METRICS) {
    
                                  talp_record_pop_summary(spd, monitor);
    
                              }
    
                              if (spd->options.talp_summary & SUMMARY_PROCESS) {
    
                                  talp_record_process_summary(spd, monitor);
    
                              }
    
                          }
    
                      }
    
                      /* Synchronize all processes in node before continuing with DLB finalization  */
    
                      node_barrier(spd, NULL);
    
                  /* } else { */
    
                  /*     warning("The number of MPI processes and processes registered in DLB differ." */
    
                  /*             " TALP will not print any summary."); */
    
                  /* } */
    
              }
    
      #endif
    
          }
    
      6
      }
    
      /* Decide whether to update the current sample (most likely and cheaper)
    
       * or to aggregate all samples.
    
       * We will only aggregate all samples if the external profiler is enabled
    
       * and this MPI call is a blocking collective call. */
    
      7
      static inline void update_sample_on_sync_call(const subprocess_descriptor_t *spd,
    
              const talp_info_t *talp_info, talp_sample_t *sample, bool is_blocking_collective) {
    
        4/4✓ Branch 0 taken 4 times.
✓ Branch 1 taken 3 times.
✓ Branch 2 taken 2 times.
✓ Branch 3 taken 2 times.

      7
          if (!talp_info->flags.external_profiler || !is_blocking_collective) {
    
              /* Likely scenario, just update the sample */
    
      5
              talp_update_sample(sample, talp_info->flags.papi, TALP_NO_TIMESTAMP);
    
          } else {
    
              /* If talp_info->flags.external_profiler && is_blocking_collective:
    
               * aggregate samples and update all monitoring regions */
    
      2
              talp_flush_samples_to_regions(spd);
    
          }
    
      7
      }
    
      8
      void talp_into_sync_call(const subprocess_descriptor_t *spd, bool is_blocking_collective) {
    
          /* Observer threads may call MPI functions, but TALP must ignore them */
    
        2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 7 times.

      8
          if (unlikely(thread_is_observer)) return;
    
      7
          const talp_info_t *talp_info = spd->talp_info;
    
        1/2✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.

      7
          if (talp_info) {
    
              /* Update sample */
    
      7
              talp_sample_t *sample = talp_get_thread_sample(spd);
    
      7
              talp_update_sample(sample, talp_info->flags.papi, TALP_NO_TIMESTAMP);
    
              /* Into Sync call -> not_useful_mpi */
    
      7
              talp_set_sample_state(sample, not_useful_mpi, talp_info->flags.papi);
    
          }
    
      }
    
      8
      void talp_out_of_sync_call(const subprocess_descriptor_t *spd, bool is_blocking_collective) {
    
          /* Observer threads may call MPI functions, but TALP must ignore them */
    
        2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 7 times.

      8
          if (unlikely(thread_is_observer)) return;
    
      7
          const talp_info_t *talp_info = spd->talp_info;
    
        1/2✓ Branch 0 taken 7 times.
✗ Branch 1 not taken.

      7
          if (talp_info) {
    
              /* Update sample (and maybe flush) */
    
      7
              talp_sample_t *sample = talp_get_thread_sample(spd);
    
      7
              DLB_ATOMIC_ADD_RLX(&sample->stats.num_mpi_calls, 1);
    
      7
              update_sample_on_sync_call(spd, talp_info, sample, is_blocking_collective);
    
              /* Out of Sync call -> useful */
    
      7
              talp_set_sample_state(sample, useful, talp_info->flags.papi);
    
          }
    
      }

Line	Branch	Exec	Source
1			/*********************************************************************************/
2			/* Copyright 2009-2025 Barcelona Supercomputing Center */
3			/* */
4			/* This file is part of the DLB library. */
5			/* */
6			/* DLB is free software: you can redistribute it and/or modify */
7			/* it under the terms of the GNU Lesser General Public License as published by */
8			/* the Free Software Foundation, either version 3 of the License, or */
9			/* (at your option) any later version. */
10			/* */
11			/* DLB is distributed in the hope that it will be useful, */
12			/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
13			/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
14			/* GNU Lesser General Public License for more details. */
15			/* */
16			/* You should have received a copy of the GNU Lesser General Public License */
17			/* along with DLB. If not, see <https://www.gnu.org/licenses/>. */
18			/*********************************************************************************/
19
20			#include "talp/talp_mpi.h"
21
22			#include "LB_comm/shmem_talp.h"
23			#include "LB_core/node_barrier.h"
24			#include "LB_core/spd.h"
25			#include "apis/dlb_talp.h"
26			#include "support/atomic.h"
27			#include "support/debug.h"
28			#include "support/mask_utils.h"
29			#include "talp/regions.h"
30			#include "talp/talp.h"
31			#include "talp/talp_record.h"
32			#include "talp/talp_types.h"
33			#ifdef MPI_LIB
34			#include "mpi/mpi_core.h"
35			#endif
36
37			#include <stdio.h>
38			#include <string.h>
39
40			extern __thread bool thread_is_observer;
41
42
43			#ifdef MPI_LIB
44			/* Communicate among all MPI processes so that everyone has the same monitoring regions */
45			static void talp_register_common_mpi_regions(const subprocess_descriptor_t *spd) {
46			/* Note: there's a potential race condition if this function is called
47			* (which happens on talp_mpi_finalize or talp_finalize) while another
48			* thread creates a monitoring region. The solution would be to lock the
49			* entire routine and call a specialized registering function that does not
50			* lock, or use a recursive lock. The situation is strange enough to not
51			* support it */
52
53			talp_info_t *talp_info = spd->talp_info;
54
55			/* Warn about open regions */
56			for (GSList *node = talp_info->open_regions;
57			node != NULL;
58			node = node->next) {
59			const dlb_monitor_t *monitor = node->data;
60			warning("Region %s is still open during MPI_Finalize."
61			" Collected data may be incomplete.",
62			monitor->name);
63			}
64
65			/* Gather recvcounts for each process
66			* (Each process may have different number of monitors) */
67			int nregions = g_tree_nnodes(talp_info->regions);
68			int chars_to_send = nregions * DLB_MONITOR_NAME_MAX;
69			int recvcounts = malloc(_mpi_size sizeof(int));
70			PMPI_Allgather(&chars_to_send, 1, MPI_INT,
71			recvcounts, 1, MPI_INT, getWorldComm());
72
73			/* Compute total characters to gather via MPI */
74			int i;
75			int total_chars = 0;
76			for (i=0; i<_mpi_size; ++i) {
77			total_chars += recvcounts[i];
78			}
79
80			if (total_chars > 0) {
81			/* Prepare sendbuffer */
82			char sendbuffer = malloc(nregions DLB_MONITOR_NAME_MAX * sizeof(char));
83			char *sendptr = sendbuffer;
84			for (GTreeNode *node = g_tree_node_first(talp_info->regions);
85			node != NULL;
86			node = g_tree_node_next(node)) {
87			const dlb_monitor_t *monitor = g_tree_node_value(node);
88			strcpy(sendptr, monitor->name);
89			sendptr += DLB_MONITOR_NAME_MAX;
90			}
91
92			/* Prepare recvbuffer */
93			char recvbuffer = malloc(total_chars sizeof(char));
94
95			/* Compute displacements */
96			int displs = malloc(_mpi_size sizeof(int));
97			int next_disp = 0;
98			for (i=0; i<_mpi_size; ++i) {
99			displs[i] = next_disp;
100			next_disp += recvcounts[i];
101			}
102
103			/* Gather all regions */
104			PMPI_Allgatherv(sendbuffer, nregions * DLB_MONITOR_NAME_MAX, MPI_CHAR,
105			recvbuffer, recvcounts, displs, MPI_CHAR, getWorldComm());
106
107			/* Register all regions. Existing ones will be skipped. */
108			for (i=0; i<total_chars; i+=DLB_MONITOR_NAME_MAX) {
109			region_register(spd, &recvbuffer[i]);
110			}
111
112			free(sendbuffer);
113			free(recvbuffer);
114			free(displs);
115			}
116
117			free(recvcounts);
118			}
119			#endif
120
121
122			/*********************************************************************************/
123			/* TALP MPI functions */
124			/*********************************************************************************/
125
126			/* Start global monitoring region (if not already started) */
127		6	void talp_mpi_init(const subprocess_descriptor_t *spd) {
128	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 6 times.	6	ensure(!thread_is_observer, "An observer thread cannot call talp_mpi_init");
129
130		6	talp_info_t *talp_info = spd->talp_info;
131	1/2 ✓ Branch 0 taken 6 times. ✗ Branch 1 not taken.	6	if (talp_info) {
132		6	talp_info->flags.have_mpi = true;
133
134			/* Start global region (no-op if already started) */
135		6	region_start(spd, talp_info->monitor);
136
137			/* Add MPI_Init statistic and set useful state */
138		6	talp_sample_t *sample = talp_get_thread_sample(spd);
139		6	DLB_ATOMIC_ADD_RLX(&sample->stats.num_mpi_calls, 1);
140		6	talp_set_sample_state(sample, useful, talp_info->flags.papi);
141			}
142		6	}
143
144			/* Stop global monitoring region and gather APP data if needed */
145		6	void talp_mpi_finalize(const subprocess_descriptor_t *spd) {
146	1/2 ✗ Branch 0 not taken. ✓ Branch 1 taken 6 times.	6	ensure(!thread_is_observer, "An observer thread cannot call talp_mpi_finalize");
147		6	talp_info_t *talp_info = spd->talp_info;
148	1/2 ✓ Branch 0 taken 6 times. ✗ Branch 1 not taken.	6	if (talp_info) {
149
150			#ifdef MPI_LIB
151			/* We also need to measure the waiting time of an MPI_Finalize.
152			* For this, we call an MPI_Barrier and the appropriate TALP functions.
153			* The num_mpi_calls variable is also incremented inside those. */
154			talp_into_sync_call(spd, /* is_blocking_collective */ true);
155			PMPI_Barrier(getWorldComm());
156			talp_out_of_sync_call(spd, /* is_blocking_collective */ true);
157			#else
158			/* Add MPI_Finalize to the number of MPI calls.
159			* Even though talp_mpi_finalize should never be called if no MPI_LIB,
160			* we keep this case for testing purposes. */
161		6	talp_sample_t *sample = talp_get_thread_sample(spd);
162		6	DLB_ATOMIC_ADD_RLX(&sample->stats.num_mpi_calls, 1);
163			#endif
164
165			/* Stop global region */
166		6	region_stop(spd, talp_info->monitor);
167
168		6	monitor_data_t *monitor_data = talp_info->monitor->_data;
169
170			/* Update shared memory values */
171	3/4 ✓ Branch 0 taken 5 times. ✓ Branch 1 taken 1 times. ✗ Branch 2 not taken. ✓ Branch 3 taken 5 times.	6	if (talp_info->flags.have_shmem \|\| talp_info->flags.have_minimal_shmem) {
172			// TODO: is it needed? isn't it updated when stopped?
173		1	shmem_talp__set_times(monitor_data->node_shared_id,
174		1	talp_info->monitor->mpi_time,
175		1	talp_info->monitor->useful_time);
176			}
177
178			#ifdef MPI_LIB
179			/* If performing any kind of TALP summary, check that the number of processes
180			* registered in the shared memory matches with the number of MPI processes in the node.
181			* This check is needed to avoid deadlocks on finalize. */
182			if (spd->options.talp_summary) {
183			verbose(VB_TALP, "Gathering TALP metrics");
184			/* FIXME: use Named Barrier */
185			/* if (shmem_barrier__get_num_participants(spd->options.barrier_id) == _mpis_per_node) { */
186
187			/* Gather data among processes in the node if node summary is enabled */
188			if (spd->options.talp_summary & SUMMARY_NODE) {
189			talp_record_node_summary(spd);
190			}
191
192			/* Gather data among MPIs if any of these summaries is enabled */
193			if (spd->options.talp_summary
194			& (SUMMARY_POP_METRICS \| SUMMARY_PROCESS)) {
195			/* Ensure everyone has the same monitoring regions */
196			talp_register_common_mpi_regions(spd);
197
198			/* Finally, reduce data */
199			for (GTreeNode *node = g_tree_node_first(talp_info->regions);
200			node != NULL;
201			node = g_tree_node_next(node)) {
202			const dlb_monitor_t *monitor = g_tree_node_value(node);
203			if (spd->options.talp_summary & SUMMARY_POP_METRICS) {
204			talp_record_pop_summary(spd, monitor);
205			}
206			if (spd->options.talp_summary & SUMMARY_PROCESS) {
207			talp_record_process_summary(spd, monitor);
208			}
209			}
210			}
211
212			/* Synchronize all processes in node before continuing with DLB finalization */
213			node_barrier(spd, NULL);
214			/* } else { */
215			/* warning("The number of MPI processes and processes registered in DLB differ." */
216			/* " TALP will not print any summary."); */
217			/* } */
218			}
219			#endif
220			}
221		6	}
222
223			/* Decide whether to update the current sample (most likely and cheaper)
224			* or to aggregate all samples.
225			* We will only aggregate all samples if the external profiler is enabled
226			* and this MPI call is a blocking collective call. */
227		7	static inline void update_sample_on_sync_call(const subprocess_descriptor_t *spd,
228			const talp_info_t talp_info, talp_sample_t sample, bool is_blocking_collective) {
229
230	4/4 ✓ Branch 0 taken 4 times. ✓ Branch 1 taken 3 times. ✓ Branch 2 taken 2 times. ✓ Branch 3 taken 2 times.	7	if (!talp_info->flags.external_profiler \|\| !is_blocking_collective) {
231			/* Likely scenario, just update the sample */
232		5	talp_update_sample(sample, talp_info->flags.papi, TALP_NO_TIMESTAMP);
233			} else {
234			/* If talp_info->flags.external_profiler && is_blocking_collective:
235			* aggregate samples and update all monitoring regions */
236		2	talp_flush_samples_to_regions(spd);
237			}
238		7	}
239
240		8	void talp_into_sync_call(const subprocess_descriptor_t *spd, bool is_blocking_collective) {
241			/* Observer threads may call MPI functions, but TALP must ignore them */
242	2/2 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 7 times.	8	if (unlikely(thread_is_observer)) return;
243
244		7	const talp_info_t *talp_info = spd->talp_info;
245	1/2 ✓ Branch 0 taken 7 times. ✗ Branch 1 not taken.	7	if (talp_info) {
246			/* Update sample */
247		7	talp_sample_t *sample = talp_get_thread_sample(spd);
248		7	talp_update_sample(sample, talp_info->flags.papi, TALP_NO_TIMESTAMP);
249
250			/* Into Sync call -> not_useful_mpi */
251		7	talp_set_sample_state(sample, not_useful_mpi, talp_info->flags.papi);
252			}
253			}
254
255		8	void talp_out_of_sync_call(const subprocess_descriptor_t *spd, bool is_blocking_collective) {
256			/* Observer threads may call MPI functions, but TALP must ignore them */
257	2/2 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 7 times.	8	if (unlikely(thread_is_observer)) return;
258
259		7	const talp_info_t *talp_info = spd->talp_info;
260	1/2 ✓ Branch 0 taken 7 times. ✗ Branch 1 not taken.	7	if (talp_info) {
261			/* Update sample (and maybe flush) */
262		7	talp_sample_t *sample = talp_get_thread_sample(spd);
263		7	DLB_ATOMIC_ADD_RLX(&sample->stats.num_mpi_calls, 1);
264		7	update_sample_on_sync_call(spd, talp_info, sample, is_blocking_collective);
265
266			/* Out of Sync call -> useful */
267		7	talp_set_sample_state(sample, useful, talp_info->flags.papi);
268			}
269			}
270