diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 0000000000000000000000000000000000000000..95c3117c7c10c8bf7719b6404e5b50edf5fe09fb
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,32 @@
+{
+ // Use IntelliSense to learn about possible attributes.
+ // Hover to view descriptions of existing attributes.
+ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+ "version": "0.2.0",
+ "configurations": [
+ {
+ "name": "Python Debugger: Current File with Arguments",
+ "type": "debugpy",
+ "request": "launch",
+ "program": "nsys2prv/parse_nsys_stats.py",
+ "console": "integratedTerminal",
+ "args": [
+ "-t",
+ "cuda_api_trace,mpi_event_trace,gpu_metrics",
+ "-m",
+ "../proves/multi_4nodes_gpumetrics_11finalistep_1maxiter/sod2d_0.nsys-rep",
+ "../proves/multi_4nodes_gpumetrics_11finalistep_1maxiter/sod2d_1.nsys-rep",
+ "../proves/multi_4nodes_gpumetrics_11finalistep_1maxiter/sod2d_2.nsys-rep",
+ "../proves/multi_4nodes_gpumetrics_11finalistep_1maxiter/sod2d_3.nsys-rep",
+ "../proves/multi_4nodes_gpumetrics_11finalistep_1maxiter/sod2d_4.nsys-rep",
+ "../proves/multi_4nodes_gpumetrics_11finalistep_1maxiter/sod2d_5.nsys-rep",
+ "../proves/multi_4nodes_gpumetrics_11finalistep_1maxiter/sod2d_6.nsys-rep",
+ "../proves/multi_4nodes_gpumetrics_11finalistep_1maxiter/sod2d_7.nsys-rep",
+ "../proves/multi_4nodes_gpumetrics_11finalistep_1maxiter/sod2d_ricardo_metrics_4nodes_more"
+ ],
+ "env": {
+ "NSYS_HOME": "/home/mclasca/Apps/nsight-system/2024.5.1"
+ }
+ }
+ ]
+}
\ No newline at end of file
diff --git a/nsys2prv/NSYSInterface.py b/nsys2prv/NSYSInterface.py
new file mode 100644
index 0000000000000000000000000000000000000000..9034cf7debdc4f0566a65a8f004add68eb9c6425
--- /dev/null
+++ b/nsys2prv/NSYSInterface.py
@@ -0,0 +1,67 @@
+import subprocess
+import os
+
+class NSYSInterface():
+
+ def __init__(self, types, filter_nvtx, range_nvtx, force_sqlite):
+ self.use_path = True
+ self.nsys_binary = ("nsys",)
+
+ if 'NSYS_HOME' in os.environ:
+ self.NSYS_HOME = os.path.abspath(os.getenv('NSYS_HOME'))
+ self.use_path = False
+
+ if self.use_path:
+ self.nsys_binary = ("nsys",)
+ else:
+ self.nsys_binary = (os.path.join(self.NSYS_HOME, "bin/nsys"),)
+
+ self.types = types
+ self.filter = filter_nvtx
+ self.range_nvtx = range_nvtx
+ self.force = force_sqlite
+
+ def check_export_report(self, rf):
+ if not os.path.exists(f"{os.path.splitext(os.path.basename(rf))[0]}.sqlite") or self.force:
+ #Try exporting first
+ export_call = self.nsys_binary + ("export", "-t", "sqlite", rf)
+ try:
+ with subprocess.Popen(export_call, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) as p:
+ for line in p.stdout:
+ print(line.decode(), end='')
+
+ if p.returncode != 0:
+ raise ChildProcessError(p.returncode, p.args)
+ except FileNotFoundError:
+ print("You don't have an Nsight Systems installation in your PATH. Please install, Nsight Systems, or locate your installation using PATH or setting NSYS_HOME environment variable.")
+ exit(1)
+ except ChildProcessError:
+ print("Could not export SQLite database. Exiting.")
+ exit(1)
+
+ def call_stats(self, report):
+ nsys_call = self.nsys_binary + ("stats", "-r", ",".join(self.types),
+ "--timeunit", "nsec", "-f", "csv",
+ "--force-overwrite", "true", "-o", ".")
+ if self.filter:
+ nsys_call += ("--filter-nvtx="+self.range_nvtx,)
+
+ nsys_call += (report,)
+
+ try:
+ with subprocess.Popen(nsys_call, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) as p:
+ for line in p.stdout:
+ print(line.decode(), end='')
+
+ if p.returncode != 0:
+ raise ChildProcessError(p.returncode, p.args)
+ except FileNotFoundError:
+ print("You don't have an Nsight Systems installation in your PATH. Please install, Nsight Systems, or locate your installation using PATH or setting NSYS_HOME environment variable.")
+ exit(1)
+
+ def build_nsys_stats_name(self, rf, rd, report_name):
+ base_name = os.path.splitext(os.path.basename(rf))[0]
+ if self.filter:
+ return os.path.join(rd, base_name+"_{}_nvtx={}.csv".format(report_name, self.range_nvtx))
+ else:
+ return os.path.join(rd, base_name+"_{}.csv".format(report_name))
\ No newline at end of file
diff --git a/nsys2prv/parse_nsys_stats.py b/nsys2prv/parse_nsys_stats.py
index 8a2566008c06d800d145b6de8ee76b67188eabc0..48858feb37289aee14406ae9e14917c704774e04 100755
--- a/nsys2prv/parse_nsys_stats.py
+++ b/nsys2prv/parse_nsys_stats.py
@@ -8,11 +8,13 @@ import time
import subprocess
import os
import locale
+from functools import reduce
from sqlalchemy import create_engine, text, dialects
from sqlalchemy.exc import OperationalError
from .EventWriter import event_writer as ewr
+from .NSYSInterface import NSYSInterface
from .semantics.mpi_event_encoding import *
-
+from .semantics import *
def main():
locale.setlocale(locale.LC_ALL, '')
@@ -31,6 +33,7 @@ def main():
parser.add_argument("-v", "--version", nargs=0, help="Show version and exit.", action=ShowVersion)
parser.add_argument("-f", "--filter-nvtx", help="Filter by this NVTX range")
parser.add_argument("-t", "--trace", help="Comma separated names of events to translate: [mpi_event_trace, nvtx_pushpop_trace, nvtx_startend_trace, cuda_api_trace, gpu_metrics, openacc]")
+ parser.add_argument("-m", "--multi-report", action="store_true", help="Translate multiple reports of the same execution into one trace.")
parser.add_argument("--force-sqlite", action="store_true", help="Force Nsight System to export SQLite database")
@@ -39,23 +42,24 @@ def main():
#parser.add_argument("-n", "--nvtx-stack-range", nargs=2, type=int)
- parser.add_argument("source_rep", help="Nsight source report file")
+ parser.add_argument("source_rep", nargs="+", help="Nsight source report file")
parser.add_argument("output", help="Paraver output trace name")
args = parser.parse_args()
# # Trace configuration and setup
-
- use_path = True
-
- if 'NSYS_HOME' in os.environ:
- NSYS_HOME = os.path.abspath(os.getenv('NSYS_HOME'))
- use_path = False
PARAVER_HOME = os.getenv('PARAVER_HOME')
- REPORT_FILE = os.path.abspath(args.source_rep)
- REPORT_DIR = os.path.dirname(REPORT_FILE)
+ MULTIREPORT = args.multi_report
+ if MULTIREPORT:
+ REPORTS_LIST = [os.path.abspath(x) for x in args.source_rep]
+ REPORT_DIRS_LIST = [os.path.dirname(x) for x in REPORTS_LIST]
+ REPORT_FILE = REPORTS_LIST[0] # For fast checks, it's best to have a reference report
+ else:
+ REPORT_FILE = os.path.abspath(args.source_rep[0])
+ REPORT_DIR = os.path.dirname(REPORT_FILE)
+
trace_name = args.output
NVTX_FILTER = args.filter_nvtx != None
@@ -126,37 +130,18 @@ def main():
nvtx_stack_top = 1
nvtx_stack_bottom = 4
+ nsi = NSYSInterface(reports, NVTX_FILTER, NVTX_RANGE, args.force_sqlite)
- def build_nsys_stats_name(report_name):
- base_name = os.path.splitext(os.path.basename(REPORT_FILE))[0]
- if NVTX_FILTER:
- return os.path.join(REPORT_DIR, base_name+"_{}_nvtx={}.csv".format(report_name, NVTX_RANGE))
- else:
- return os.path.join(REPORT_DIR, base_name+"_{}.csv".format(report_name))
-
-
+ if MULTIREPORT:
+ print(f"Multiple reports provided: {REPORTS_LIST}")
print("Extracting reports for: {}".format(reports_og))
- if use_path:
- nsys_binary = ("nsys",)
- else:
- nsys_binary = (os.path.join(NSYS_HOME, "bin/nsys"),)
- if not os.path.exists(f"{os.path.splitext(os.path.basename(REPORT_FILE))[0]}.sqlite"):
- #Try exporting first
- export_call = nsys_binary + ("export", "-t", "sqlite", REPORT_FILE)
- try:
- with subprocess.Popen(export_call, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) as p:
- for line in p.stdout:
- print(line.decode(), end='')
-
- if p.returncode != 0:
- raise ChildProcessError(p.returncode, p.args)
- except FileNotFoundError:
- print("You don't have an Nsight Systems installation in your PATH. Please install, Nsight Systems, or locate your installation using PATH or setting NSYS_HOME environment variable.")
- exit(1)
- except ChildProcessError:
- print("Could not export SQLite database. Exiting.")
- exit(1)
+ if MULTIREPORT:
+ for REPORT_FILE_I in REPORTS_LIST:
+ print(f"Exporting SQLite databse for {os.path.basename(REPORT_FILE_I)}")
+ nsi.check_export_report(REPORT_FILE_I)
+ else:
+ nsi.check_export_report(REPORT_FILE)
engine = create_engine(f"sqlite:///{os.path.splitext(REPORT_FILE)[0]}.sqlite")
metadata = pd.read_sql_table("META_DATA_EXPORT", f"sqlite:///{os.path.splitext(REPORT_FILE)[0]}.sqlite")
@@ -164,149 +149,297 @@ def main():
if int(minor_version["value"].iloc[0]) > 11:
print(f"\033[93m Warning! The SQLite schema version {int(minor_version["value"].iloc[0])} is greater than the one supported (11). If unexpected behaviour occurs, please report it. \033[00m")
- nsys_call = nsys_binary + ("stats", "-r", ",".join(reports),
- "--timeunit", "nsec", "-f", "csv",
- "--force-overwrite", "true", "-o", ".")
-
- if NVTX_FILTER:
- nsys_call += ("--filter-nvtx="+NVTX_RANGE,)
-
- if args.force_sqlite:
- nsys_call += ("--force-export", "true")
-
- nsys_call += (REPORT_FILE,)
-
- try:
- with subprocess.Popen(nsys_call, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) as p:
- for line in p.stdout:
- print(line.decode(), end='')
-
- if p.returncode != 0:
- raise ChildProcessError(p.returncode, p.args)
- except FileNotFoundError:
- print("You don't have an Nsight Systems installation in your PATH. Please install, Nsight Systems, or locate your installation using PATH or setting NSYS_HOME environment variable.")
- exit(1)
+ if MULTIREPORT:
+ for REPORT_FILE_I in REPORTS_LIST:
+ print(f"Processing stats for {os.path.basename(REPORT_FILE_I)}")
+ nsi.call_stats(REPORT_FILE_I)
+ else:
+ nsi.call_stats(REPORT_FILE)
+ # MARK: IMPORT DATASETS
print("Importing datasets...")
- # kernels_df = pd.read_csv(build_nsys_stats_name("cuda_gpu_trace"))
- # kernels_df.rename(columns={"CorrId": "CorrID"}, inplace=True)
- with engine.connect() as conn, conn.begin():
- with open(os.path.join(os.path.dirname(__file__), 'scripts/kernels.sql'), 'r') as query:
- kernels_df = pd.read_sql_query(text(query.read()), conn)
+ kernels_df = []
+ if MULTIREPORT:
+ sum = 0
+ for REPORT_FILE_I in REPORTS_LIST:
+ ksi = KernelsSemantic(REPORT_FILE_I)
+ ksi.Setup()
+ ksi.load_data()
+ kernels_df.append(ksi.get_df())
+ sum += ksi.get_df().shape[0]
+ del ksi
+ else:
+ ks = KernelsSemantic(REPORT_FILE)
+ ks.Setup()
+ ks.load_data()
+ kernels_df = ks.get_df()
if t_apicalls:
- cuda_api_df = pd.read_csv(build_nsys_stats_name("cuda_api_trace"))
+ cuda_api_df = []
+ if MULTIREPORT:
+ for i, REPORT_FILE_I in enumerate(REPORTS_LIST):
+ cuda_api_df.append(pd.read_csv(nsi.build_nsys_stats_name(REPORT_FILE_I, REPORT_DIRS_LIST[i], "cuda_api_trace")))
+ else:
+ cuda_api_df = pd.read_csv(nsi.build_nsys_stats_name(REPORT_FILE, REPORT_DIR, "cuda_api_trace"))
else:
cuda_api_df = pd.DataFrame()
if t_nvtx:
- nvtx_df = pd.read_csv(build_nsys_stats_name("nvtx_pushpop_trace"))
- nvtx_df["domain"] = nvtx_df["Name"].str.split(":").str[0]
+ nvtx_df = []
+ if MULTIREPORT:
+ for i, REPORT_FILE_I in enumerate(REPORTS_LIST):
+ nvtx_df.append(pd.read_csv(nsi.build_nsys_stats_name(REPORT_FILE_I, REPORT_DIRS_LIST[i], "nvtx_pushpop_trace")))
+ nvtx_df[i]["domain"] = nvtx_df[i]["Name"].str.split(":").str[0]
+ nvtx_df[i].rename(columns={"PID":"Pid", "TID":"Tid"}, inplace=True)
+
+ else:
+ nvtx_df = pd.read_csv(nsi.build_nsys_stats_name(REPORT_FILE, REPORT_DIR, "nvtx_pushpop_trace"))
+ nvtx_df["domain"] = nvtx_df["Name"].str.split(":").str[0]
+ nvtx_df.rename(columns={"PID":"Pid", "TID":"Tid"}, inplace=True)
+
else:
nvtx_df = pd.DataFrame()
if t_nvtx_startend:
- with engine.connect() as conn, conn.begin():
- with open(os.path.join(os.path.dirname(__file__), 'scripts/nvtx_startend_trace.sql'), 'r') as query:
- nvtx_startend_df = pd.read_sql_query(text(query.read()), conn)
+ nvtx_startend_df = []
+ if MULTIREPORT:
+ for REPORT_FILE_I in REPORTS_LIST:
+ ksi = NVTXStartEndSemantic(REPORT_FILE_I)
+ ksi.Setup()
+ ksi.load_data()
+ nvtx_startend_df.append(ksi.get_df())
+ del ksi
+ else:
+ ks = NVTXStartEndSemantic(REPORT_FILE)
+ ks.Setup()
+ ks.load_data()
+ nvtx_startend_df = ks.get_df()
+ del ks
else:
nvtx_startend_df = pd.DataFrame()
if t_mpi:
- with engine.connect() as conn, conn.begin():
- try:
- with open(os.path.join(os.path.dirname(__file__), 'scripts/mpi_p2p.sql'), 'r') as query:
- if conn.dialect.has_table(connection=conn, table_name='MPI_P2P_EVENTS') and conn.dialect.has_table(connection=conn, table_name='MPI_START_WAIT_EVENTS'):
- mpi_p2p_df = pd.read_sql_query(text(query.read()), conn)
- mpi_p2p_df["event_type"] = MPITYPE_PTOP
- else: mpi_p2p_df = pd.DataFrame()
- with open(os.path.join(os.path.dirname(__file__), 'scripts/mpi_coll.sql'), 'r') as query:
- if conn.dialect.has_table(connection=conn, table_name='MPI_COLLECTIVES_EVENTS'):
- mpi_coll_df = pd.read_sql_query(text(query.read()), conn)
- mpi_coll_df = mpi_coll_df.drop(mpi_coll_df[mpi_coll_df["Event"].str.contains("File") ].index)
- mpi_coll_df["event_type"] = MPITYPE_COLLECTIVE
- else: mpi_coll_df = pd.DataFrame()
- with open(os.path.join(os.path.dirname(__file__), 'scripts/mpi_other.sql'), 'r') as query:
- if conn.dialect.has_table(connection=conn, table_name='MPI_OTHER_EVENTS'):
- mpi_other_df = pd.read_sql_query(text(query.read()), conn)
- mpi_other_df = mpi_other_df.drop(mpi_other_df[mpi_other_df["Event"].str.contains("File") ].index)
- mpi_other_df = mpi_other_df.drop(mpi_other_df[mpi_other_df["Event"].str.contains("Win|MPI_Get|MPI_Put|Accumulate") ].index)
- mpi_other_df["event_type"] = MPITYPE_OTHER
- else: mpi_other_df = pd.DataFrame()
- with open(os.path.join(os.path.dirname(__file__), 'scripts/mpi_other.sql'), 'r') as query:
- if conn.dialect.has_table(connection=conn, table_name='MPI_OTHER_EVENTS'):
- mpi_rma_df = pd.read_sql_query(text(query.read()), conn)
- mpi_rma_df = mpi_rma_df[mpi_rma_df["Event"].str.contains("Win|MPI_Get|MPI_Put|Accumulate")]
- mpi_rma_df["event_type"] = MPITYPE_RMA
- else: mpi_rma_df = pd.DataFrame()
- with open(os.path.join(os.path.dirname(__file__), 'scripts/mpi_io.sql'), 'r') as query:
- if conn.dialect.has_table(connection=conn, table_name='MPI_OTHER_EVENTS') and conn.dialect.has_table(connection=conn, table_name='MPI_COLLECTIVES_EVENTS'):
- mpi_io_df = pd.read_sql_query(text(query.read()), conn)
- mpi_io_df = mpi_io_df[mpi_io_df["Event"].str.contains("File")]
- mpi_io_df["event_type"] = MPITYPE_IO
- else: mpi_io_df = pd.DataFrame()
- mpi_df = pd.concat([mpi_p2p_df, mpi_coll_df, mpi_other_df, mpi_rma_df, mpi_io_df])
- except OperationalError as oe:
- print("There has been a problem fetching MPI information. MPI data will be skipped.")
- print(f"[ERROR]: {oe.detail}")
- t_mpi = False
- #mpi_df = pd.read_csv(build_nsys_stats_name("mpi_event_trace"))
+ mpi_df = []
+ try:
+ if MULTIREPORT:
+ for REPORT_FILE_I in REPORTS_LIST:
+ kp2pi = MPIP2PSemantic(REPORT_FILE_I)
+ kp2pi.Setup()
+ kp2pi.load_data()
+
+ kcolli = MPICollSemantic(REPORT_FILE_I)
+ kcolli.Setup()
+ kcolli.load_data()
+
+ kotheri = MPIOtherSemantic(REPORT_FILE_I)
+ kotheri.Setup()
+ kotheri.load_data()
+
+ krmai = MPIRMASemantic(REPORT_FILE_I)
+ krmai.Setup()
+ krmai.load_data()
+
+ kioi = MPIIOPSemantic(REPORT_FILE_I)
+ kioi.Setup()
+ kioi.load_data()
+
+ mpi_df.append(pd.concat([kp2pi.get_df(), kcolli.get_df(), kotheri.get_df(), kotheri.get_df(), krmai.get_df(), kioi.get_df()], ignore_index=True))
+ del kp2pi, kcolli, kotheri, krmai, kioi
+ else:
+ kmpi = MPIP2PSemantic(REPORT_FILE)
+ kmpi.Setup()
+ kmpi.load_data()
+ mpi_p2p_df = kmpi.get_df()
+
+ kmpi = MPICollSemantic(REPORT_FILE)
+ kmpi.Setup()
+ kmpi.load_data()
+ mpi_coll_df = kmpi.get_df()
+
+ kmpi = MPIOtherSemantic(REPORT_FILE)
+ kmpi.Setup()
+ kmpi.load_data()
+ mpi_other_df = kmpi.get_df()
+
+ kmpi = MPIRMASemantic(REPORT_FILE)
+ kmpi.Setup()
+ kmpi.load_data()
+ mpi_rma_df = kmpi.get_df()
+
+ kmpi = MPIIOPSemantic(REPORT_FILE)
+ kmpi.Setup()
+ kmpi.load_data()
+ mpi_io_df = kmpi.get_df()
+ mpi_df = pd.concat([mpi_p2p_df, mpi_coll_df, mpi_other_df, mpi_rma_df, mpi_io_df], ignore_index=True)
+ del kmpi, mpi_p2p_df, mpi_coll_df, mpi_other_df, mpi_rma_df, mpi_io_df
+ except OperationalError as oe:
+ print("There has been a problem fetching MPI information. MPI data will be skipped.")
+ print(f"[ERROR]: {oe.args[0]}")
+ t_mpi = False
else:
- #mpi_df = pd.DataFrame()
- mpi_p2p_df = pd.DataFrame()
- mpi_coll_df = pd.DataFrame()
- mpi_other_df = pd.DataFrame()
- mpi_rma_df = pd.DataFrame()
- mpi_io_df = pd.DataFrame()
-
- # Obtain context Info
- context_info = pd.read_sql_table("TARGET_INFO_CUDA_CONTEXT_INFO", f"sqlite:///{os.path.splitext(REPORT_FILE)[0]}.sqlite")
+ mpi_df = pd.DataFrame()
+
+ gpu_metrics_agg = []
+ metrics_event_names = []
+ if t_metrics:
+ if MULTIREPORT:
+ for REPORT_FILE_I in REPORTS_LIST:
+ ksi = GPUMetricsSemantic(REPORT_FILE_I)
+ ksi.Setup()
+ ksi.load_data()
+ gpu_metrics_agg.append(ksi.get_df())
+ metrics_event_names.append(ksi.get_names())
+ del ksi
+ else:
+ ks = GPUMetricsSemantic(REPORT_FILE)
+ ks.Setup()
+ ks.load_data()
+ gpu_metrics_agg = ks.get_df()
+ metrics_event_names = ks.get_names()
+ del ks
+
+ if t_openacc:
+ if MULTIREPORT:
+ openacc_other_df = []
+ openacc_launch_df = []
+ openacc_data_df = []
+ for REPORT_FILE_I in REPORTS_LIST:
+ ksio = OpenACCOtherSemantic(REPORT_FILE_I)
+ ksio.Setup()
+ ksio.load_data()
+ openacc_other_df.append(ksio.get_df())
+ ksil = OpenACCLaunchSemantic(REPORT_FILE_I)
+ ksil.Setup()
+ ksil.load_data()
+ openacc_launch_df.append(ksil.get_df())
+ ksid = OpenACCDataSemantic(REPORT_FILE_I)
+ ksid.Setup()
+ ksid.load_data()
+ openacc_data_df.append(ksid.get_df())
+ del ksio, ksil, ksid
+ else:
+ kso = OpenACCOtherSemantic(REPORT_FILE_I)
+ kso.Setup()
+ kso.load_data()
+ openacc_other_df = kso.get_df()
+ ksl = OpenACCLaunchSemantic(REPORT_FILE_I)
+ ksl.Setup()
+ ksl.load_data()
+ openacc_launch_df = ksl.get_df()
+ ksd = OpenACCDataSemantic(REPORT_FILE_I)
+ ksd.Setup()
+ ksd.load_data()
+ openacc_data_df = ksd.get_df()
+ del kso, ksl, ksd
+ openacc_event_kind = pd.read_sql_table("ENUM_OPENACC_EVENT_KIND", f"sqlite:///{os.path.splitext(REPORTS_LIST[0])[0]}.sqlite")
+
+
+ # MARK: CONTEXT INFO
+ list_contexts = []
+ list_hostnames = []
+ if MULTIREPORT:
+ for REPORT_FILE_I in REPORTS_LIST:
+ context_info_i = pd.read_sql_table("TARGET_INFO_CUDA_CONTEXT_INFO", f"sqlite:///{os.path.splitext(REPORT_FILE_I)[0]}.sqlite")
+ target_system_env_i = pd.read_sql_table("TARGET_INFO_SYSTEM_ENV", f"sqlite:///{os.path.splitext(REPORT_FILE_I)[0]}.sqlite")
+ hostname = target_system_env_i.loc[target_system_env_i["name"] == "Hostname"]["value"].iloc[0]
+ context_info_i["hostname"] = hostname
+ list_hostnames.append(hostname)
+ list_contexts.append(context_info_i)
+ context_info = pd.concat(list_contexts)
+ else:
+ context_info = pd.read_sql_table("TARGET_INFO_CUDA_CONTEXT_INFO", f"sqlite:///{os.path.splitext(REPORT_FILE)[0]}.sqlite")
+ context_info.sort_values(["processId"], inplace=True)
+
+ # CONTEXT INFO CHECK FOR MULTIREPORT
+ #if context_info.groupby(["hostname"]).agg({'deviceId': 'count'})
+ if context_info["deviceId"].unique().size == 1:
+ print(f"\033[93m Warning! Only one unique device ID can be detected in resource identification. If this is not intended, some features will not be available. Please, make sure that the GPU bindings are correctly done and that every process identifies its own GPU with a unique device [0 .. N-1]. \033[00m")
+
if t_mpi:
- mpi_query = "SELECT globalTid / 0x1000000 % 0x1000000 AS Pid, globalTid % 0x1000000 AS Tid, rank FROM MPI_RANKS;"
- with engine.connect() as conn, conn.begin():
- rank_info = pd.read_sql_query(mpi_query, conn)
+ if MULTIREPORT:
+ list_ranks = []
+ for REPORT_FILE_I in REPORTS_LIST:
+ mpi_query = "SELECT globalTid / 0x1000000 % 0x1000000 AS Pid, globalTid % 0x1000000 AS Tid, rank FROM MPI_RANKS;"
+ engine = create_engine(f"sqlite:///{os.path.splitext(REPORT_FILE_I)[0]}.sqlite")
+ with engine.connect() as conn, conn.begin():
+ list_ranks.append(pd.read_sql_query(mpi_query, conn))
+ rank_info = pd.concat(list_ranks)
- context_info.sort_values(["processId"], inplace=True)
- if t_metrics:
- gpu_metrics = pd.read_sql_table("GPU_METRICS", f"sqlite:///{os.path.splitext(REPORT_FILE)[0]}.sqlite")
- metrics_description = pd.read_sql_table("TARGET_INFO_GPU_METRICS", f"sqlite:///{os.path.splitext(REPORT_FILE)[0]}.sqlite")
- gpu_metrics.drop(gpu_metrics[gpu_metrics["timestamp"] < 0].index, inplace=True) # drop negative time
- metrics_event_names = metrics_description.groupby(["metricId"]).agg({'metricName': 'first'}).reset_index()
- metrics_event_names["metricId"] = metrics_event_names["metricId"] + event_type_metrics_base
- #gpu_metrics["task"] = gpu_metrics.groupby(["typeId"]).ngroup() + 1
- gpu_metrics["deviceId"] = gpu_metrics["typeId"].apply(lambda x: x & 0xFF)
- gpu_metrics_agg = gpu_metrics.groupby(["timestamp", "typeId"]).agg({'metricId': lambda x: list(x+event_type_metrics_base),
- 'value': lambda x: list(x),
- 'deviceId': 'first'})
- gpu_metrics_agg.reset_index(inplace=True)
+ # MARK: MERGING AND ALIGNING
+ if MULTIREPORT:
+ # Find delta between earliest trace start and the others
+ session_time = []
+ for REPORT_FILE_I in REPORTS_LIST:
+ session_time.append(pd.read_sql_table("TARGET_INFO_SESSION_START_TIME", f"sqlite:///{os.path.splitext(REPORT_FILE_I)[0]}.sqlite"))
+
+ session_time = [x.iloc[0,0] for x in session_time] # Get the utcEpochNs
+ earliest_time = reduce(lambda x, y: min(x, y), session_time, float('inf'))
+ deltas = [start - earliest_time for start in session_time]
+ for i, df in enumerate(kernels_df):
+ df['Start (ns)'] += deltas[i]
+ kernels_df = pd.concat(kernels_df, ignore_index=True)
+
+ if t_apicalls:
+ for i, df in enumerate(cuda_api_df):
+ df['Start (ns)'] += deltas[i]
+ cuda_api_df = pd.concat(cuda_api_df, ignore_index=True)
- if t_openacc:
- with engine.connect() as conn, conn.begin():
- with open(os.path.join(os.path.dirname(__file__), 'scripts/openacc_other.sql'), 'r') as query:
- openacc_other_df = pd.read_sql_query(text(query.read()), conn)
- with open(os.path.join(os.path.dirname(__file__), 'scripts/openacc_launch.sql'), 'r') as query:
- openacc_launch_df = pd.read_sql_query(text(query.read()), conn)
- with open(os.path.join(os.path.dirname(__file__), 'scripts/openacc_data.sql'), 'r') as query:
- openacc_data_df = pd.read_sql_query(text(query.read()), conn)
- openacc_event_kind = pd.read_sql_table("ENUM_OPENACC_EVENT_KIND", conn)
+ if t_nvtx:
+ for i, df in enumerate(nvtx_df):
+ df['Start (ns)'] += deltas[i]
+ df['End (ns)'] += deltas[i]
+ nvtx_df = pd.concat(nvtx_df, ignore_index=True)
+
+ if t_nvtx_startend:
+ for i, df in enumerate(nvtx_startend_df):
+ df['Start (ns)'] += deltas[i]
+ df['End (ns)'] += deltas[i]
+ nvtx_startend_df = pd.concat(nvtx_startend_df, ignore_index=True)
+ if t_mpi:
+ for i, df in enumerate(mpi_df):
+ df['Start:ts_ns'] += deltas[i]
+ df['End:ts_ns'] += deltas[i]
+ mpi_df = pd.concat(mpi_df, ignore_index=True)
+
+ if t_openacc:
+ for i, df in enumerate(openacc_other_df):
+ df['start'] += deltas[i]
+ df['end'] += deltas[i]
+ for i, df in enumerate(openacc_launch_df):
+ df['start'] += deltas[i]
+ df['end'] += deltas[i]
+ for i, df in enumerate(openacc_data_df):
+ df['start'] += deltas[i]
+ df['end'] += deltas[i]
+ openacc_other_df = pd.concat(openacc_other_df, ignore_index=True)
+ openacc_launch_df = pd.concat(openacc_launch_df, ignore_index=True)
+ openacc_data_df = pd.concat(openacc_data_df, ignore_index=True)
+
+ if t_metrics:
+ for i, df in enumerate(gpu_metrics_agg):
+ if not df.empty:
+ df['timestamp'] += deltas[i]
+ # Complement with processId and node info
+ df['Pid'] = df['deviceId'].map(context_info[context_info["hostname"] == list_hostnames[i]].set_index("deviceId")["processId"])
+ gpu_metrics_agg = pd.concat(gpu_metrics_agg, ignore_index=True)
+ metrics_event_names = pd.concat(metrics_event_names, ignore_index=True).drop_duplicates()
- # # Building object model
+ # MARK: PROCESS MODEL
# ## Tasks and threads
# Now, find unique appearences of ThreadID and ProcessID
if t_apicalls: print("CUDA calls unique processes: {}, and unique threads: {}".format(cuda_api_df["Pid"].unique(), cuda_api_df["Tid"].unique()))
- if t_nvtx: print("NVTX ranges unique processes: {}, and unique threads: {}".format(nvtx_df["PID"].unique(), nvtx_df["TID"].unique()))
+ if t_nvtx: print("NVTX ranges unique processes: {}, and unique threads: {}".format(nvtx_df["Pid"].unique(), nvtx_df["Tid"].unique()))
if t_nvtx_startend: print("NVTX startend unique processes: {}, and unique threads: {}".format(nvtx_startend_df["Pid"].unique(), nvtx_startend_df["Tid"].unique()))
if t_mpi: print("MPI calls unique processes: {}, and unique threads: {}".format(mpi_df["Pid"].unique(), mpi_df["Tid"].unique()))
if t_openacc: print("OpenACC calls unique processes: {}, and unique threads: {}".format(openacc_other_df["Pid"].unique(), openacc_other_df["Tid"].unique()))
- if t_nvtx: nvtx_df.rename(columns={"PID":"Pid", "TID":"Tid"}, inplace=True)
compute_threads_with = []
if t_apicalls: compute_threads_with.append(cuda_api_df[['Pid', 'Tid']])
@@ -332,6 +465,7 @@ def main():
'Tid': lambda x: set(x),
'thread': 'count',
'device': 'first' })
+ print(tasks_set)
cuda_api_df["thread"] = 0
cuda_api_df["task"] = 0
@@ -389,50 +523,51 @@ def main():
streams = kernels_df[['Device', 'Strm', 'deviceid', 'Pid']].drop_duplicates()
- streams["thread"] = streams.groupby(["Device"]).cumcount() + 1
+ streams["thread"] = streams.groupby(["Pid", "Device"]).cumcount() + 1
#streams["deviceid"] = streams.sort_values("Device").groupby(["Device"]).ngroup()
#streams["Pid"] = streams["deviceid"].map(tasks_set.set_index("device")["Pid"])
- streams["task"] = streams["deviceid"].map(tasks_set.reset_index().set_index("device")["task"])
+ streams["task"] = streams["Pid"].map(tasks_set.reset_index().set_index("Pid")["task"])
streams['row_name'] = 'CUDA-D'+streams['deviceid'].astype(str) + '.S' + streams['Strm'].astype(str)
num_streams = streams.count().iloc[0]
streams.sort_values(["Pid", "thread"], inplace=True)
streams.reset_index(inplace=True)
- devices_set = streams.groupby(["deviceid"]).agg({'Device': 'first',
+ devices_set = streams.groupby(["Pid", "deviceid"]).agg({'Device': 'first',
'Strm': lambda x: set(x),
'thread': 'count',
- 'task': 'first',
- 'Pid': 'last'})
+ 'task': 'first'})
+ print(devices_set)
# Here we finally update the threadId we are going to put in the event record of kernel executions to respect the normal threads before CUDA streams
num_normal_threads = tasks_set['thread']
num_normal_threads_repeated = num_normal_threads.repeat(devices_set["thread"]).reset_index()[["thread"]]
-
streams['thread'] = streams['thread'] + num_normal_threads_repeated["thread"]
+
# for index,row in kernels_df.iterrows():
# kernels_df.at[index, "thread"] = streams.at[(streams["Strm"] == row["Strm"]).idxmax(), "thread"]
# kernels_df.at[index, "deviceid"] = streams.at[(streams["Device"] == row["Device"]).idxmax(), "deviceid"]
# More efficient way by chatgpt
# First, let's filter streams DataFrame based on conditions
- filtered_streams = streams.groupby(["Device", "Strm"]).agg({'thread':'first', 'task':'first'}).reset_index()
+ filtered_streams = streams.groupby(["Pid", "Strm"]).agg({'thread':'first', 'task':'first'}).reset_index()
# Now, merge the filtered streams DataFrame with kernels_df
- result_df = kernels_df.merge(filtered_streams, how='left', on=['Device', 'Strm'])
+ result_df = kernels_df.merge(filtered_streams, how='left', on=["Pid", 'Strm'])
+
# Copy the results back to kernels_df
- kernels_df['thread'] = result_df['thread']
- kernels_df['task'] = result_df['task']
+ kernels_df['thread'] = result_df['thread'].to_numpy()
+ kernels_df['task'] = result_df['task'].to_numpy()
# Add auxiliary stream to streams dataframe
if t_metrics:
- aux_streams = devices_set.reset_index()[["deviceid", "Device", "thread", "task"]]
+ aux_streams = devices_set.reset_index()[["deviceid", "Device", "thread", "task", "Pid"]]
aux_streams["Strm"] = 99
aux_streams["row_name"] = "Metrics GPU"+aux_streams["deviceid"].astype(str)
- aux_streams["Pid"] = aux_streams["deviceid"].map(tasks_set.set_index('device')["Pid"])
- aux_streams["thread"] = aux_streams["thread"] + aux_streams["deviceid"].map(tasks_set.set_index('device')['thread']) + 1
- gpu_metrics_agg["task"] = gpu_metrics_agg["deviceId"].map(devices_set["task"])
+ #aux_streams["Pid"] = aux_streams["deviceid"].map(tasks_set.set_index('device')["Pid"])
+ aux_streams["thread"] = aux_streams["thread"] + aux_streams["Pid"].map(tasks_set.set_index('Pid')['thread']) + 1
+ gpu_metrics_agg["task"] = gpu_metrics_agg["Pid"].map(devices_set.reset_index().set_index("Pid")["task"])
gpu_metrics_agg["thread"] = gpu_metrics_agg["task"].map(aux_streams.set_index('task')["thread"])
streams = pd.concat([streams, aux_streams]).sort_values(['task', 'thread'])
@@ -441,9 +576,6 @@ def main():
# ## Writing ROW file
# Now we can write the _row_ file with this information
- print(tasks_set)
- print(devices_set)
-
print(" -Writing resource model to row file...")
row_df = pd.concat([threads[["thread", "task", "row_name"]], streams[["thread", "task", "row_name"]]])
@@ -460,7 +592,7 @@ def main():
row_file.write("\n")
- # # Collecting event values
+ # MARK: EVENT NAMES
# Second step is collect all different event values for CUDA API calls, kernel names, and NVTX ranges. Each of these define a different event type, and will need unique identifiers to be used as a event values. Finally these needs to be dumped to the PCF file.
print("Collecting event names and information...")
@@ -490,7 +622,7 @@ def main():
kernel_names["Name"] = kernel_names["Name"].apply(lambda x: x.replace("[", "").replace("]", ""))
if t_nvtx:
- nvtx_df_subset = nvtx_df
+ nvtx_df_subset = nvtx_df.reset_index()
lower_level = max(nvtx_df["Lvl"])
if nvtx_select_frames:
@@ -791,9 +923,9 @@ GRADIENT_NAMES
pcf_file.write("{} {}\n".format(row["func_value"], row["func"]))
pcf_file.write("\n")
+ # MARK: MEMORY
# # Split of kernel execution between compute and memory
-
memops_names = ["[CUDA memcpy Device-to-Device]", "[CUDA memcpy Device-to-Host]", "[CUDA memcpy Host-to-Device]", "[CUDA memset]", "[CUDA memcpy Peer-to-Peer]"]
memops_df = kernels_df.loc[kernels_df["Name"].isin(memops_names)]
mask = ~kernels_df.index.isin(memops_df.index)
@@ -805,6 +937,7 @@ GRADIENT_NAMES
comm_memory_df = cuda_api_df.merge(memops_df, how="inner", left_on=["CorrID", "task"], right_on=["CorrID", "task"], suffixes=("_call", "_mem"), validate="one_to_one")
+ # MARK: TIMELINE RECONS
# # Timeline reconstruction
print("Reconstructing timeline...")
@@ -924,6 +1057,8 @@ GRADIENT_NAMES
print(f"Congratulations! Trace {trace_name}.prv correctly translated.")
+
+ # MARK: POSTPROCESSING
# ## Postprocessing
# - Reorder trace
# - GZip trace
diff --git a/nsys2prv/semantics/__init__.py b/nsys2prv/semantics/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..19762807643a1502861a372168ee81c2c5a4bb3d
--- /dev/null
+++ b/nsys2prv/semantics/__init__.py
@@ -0,0 +1,5 @@
+from .kernels_semantic import KernelsSemantic
+from .mpi_semantic import *
+from .nvtx_startend_semantic import NVTXStartEndSemantic
+from .gpu_metrics_semantic import GPUMetricsSemantic
+from .openacc_semantic import *
\ No newline at end of file
diff --git a/nsys2prv/semantics/gpu_metrics_semantic.py b/nsys2prv/semantics/gpu_metrics_semantic.py
new file mode 100644
index 0000000000000000000000000000000000000000..6776bb446e1d3947ad0538c7c65e5e43c96c40a0
--- /dev/null
+++ b/nsys2prv/semantics/gpu_metrics_semantic.py
@@ -0,0 +1,33 @@
+from .nsys_event import NsysEvent
+from pandas import read_sql_table, DataFrame
+from sqlalchemy import text
+
+event_type_metrics_base = 9400
+
+
+class GPUMetricsSemantic(NsysEvent):
+ def __init__(self, report) -> None:
+ self.metrics_event_names = DataFrame()
+ super().__init__(report)
+
+ def Setup(self):
+ if self.check_table("GPU_METRICS"):
+ self.query = text("SELECT * FROM GPU_METRICS")
+ else:
+
+ self._empty = True
+
+ def _preprocess(self):
+ metrics_description = read_sql_table("TARGET_INFO_GPU_METRICS", self._dbcon)
+ self._df.drop(self._df[self._df["timestamp"] < 0].index, inplace=True) # drop negative time
+ self.metrics_event_names = metrics_description.groupby(["metricId"]).agg({'metricName': 'first'}).reset_index()
+ self.metrics_event_names["metricId"] = self.metrics_event_names["metricId"] + event_type_metrics_base
+ self._df["deviceId"] = self._df["typeId"].apply(lambda x: x & 0xFF)
+ self._df = self._df.groupby(["timestamp", "typeId"]).agg({'metricId': lambda x: list(x+event_type_metrics_base),
+ 'value': lambda x: list(x),
+ 'deviceId': 'first'})
+ self._df.reset_index(inplace=True)
+ return super()._preprocess()
+
+ def get_names(self):
+ return self.metrics_event_names
\ No newline at end of file
diff --git a/nsys2prv/semantics/kernels_semantic.py b/nsys2prv/semantics/kernels_semantic.py
new file mode 100644
index 0000000000000000000000000000000000000000..804128e614b22fb92dc08a23edab57aa0005d505
--- /dev/null
+++ b/nsys2prv/semantics/kernels_semantic.py
@@ -0,0 +1,12 @@
+from .nsys_event import NsysEvent
+import os.path
+from sqlalchemy import text
+
+class KernelsSemantic(NsysEvent):
+ def __init__(self, report) -> None:
+ super().__init__(report)
+
+ def Setup(self):
+ with open(os.path.join(os.path.dirname(__file__), '../scripts/kernels.sql'), 'r') as query:
+ self.query = text(query.read())
+
\ No newline at end of file
diff --git a/nsys2prv/semantics/mpi_semantic.py b/nsys2prv/semantics/mpi_semantic.py
new file mode 100644
index 0000000000000000000000000000000000000000..a4763766e1772da3443b5d4ce32441d0bcc38a33
--- /dev/null
+++ b/nsys2prv/semantics/mpi_semantic.py
@@ -0,0 +1,78 @@
+from .nsys_event import NsysEvent
+import os.path
+from .mpi_event_encoding import *
+from sqlalchemy import text
+
+class MPIP2PSemantic(NsysEvent):
+ def __init__(self, report) -> None:
+ super().__init__(report)
+
+ def Setup(self):
+ if self.check_table('MPI_P2P_EVENTS') and self.check_table('MPI_START_WAIT_EVENTS'):
+ with open(os.path.join(os.path.dirname(__file__), '../scripts/mpi_p2p.sql'), 'r') as query:
+ self.query = text(query.read())
+ else:
+ self._empty = True
+ def _preprocess(self):
+ self._df["event_type"] = MPITYPE_PTOP
+ return super()._preprocess()
+
+class MPICollSemantic(NsysEvent):
+ def __init__(self, report) -> None:
+ super().__init__(report)
+
+ def Setup(self):
+ if self.check_table("MPI_COLLECTIVES_EVENTS"):
+ with open(os.path.join(os.path.dirname(__file__), '../scripts/mpi_coll.sql'), 'r') as query:
+ self.query = text(query.read())
+ else:
+ self._empty = True
+
+ def _preprocess(self):
+ self._df = self._df.drop(self._df[self._df["Event"].str.contains("File") ].index)
+ self._df["event_type"] = MPITYPE_COLLECTIVE
+
+class MPIOtherSemantic(NsysEvent):
+ def __init__(self, report) -> None:
+ super().__init__(report)
+
+ def Setup(self):
+ if self.check_table("MPI_OTHER_EVENTS"):
+ with open(os.path.join(os.path.dirname(__file__), '../scripts/mpi_other.sql'), 'r') as query:
+ self.query = text(query.read())
+ else:
+ self._empty = True
+
+ def _preprocess(self):
+ self._df = self._df.drop(self._df[self._df["Event"].str.contains("File") ].index)
+ self._df = self._df.drop(self._df[self._df["Event"].str.contains("Win|MPI_Get|MPI_Put|Accumulate") ].index)
+ self._df["event_type"] = MPITYPE_OTHER
+
+class MPIRMASemantic(NsysEvent):
+ def __init__(self, report) -> None:
+ super().__init__(report)
+
+ def Setup(self):
+ if self.check_table("MPI_OTHER_EVENTS"):
+ with open(os.path.join(os.path.dirname(__file__), '../scripts/mpi_other.sql'), 'r') as query:
+ self.query = text(query.read())
+ else:
+ self._empty = True
+ def _preprocess(self):
+ self._df = self._df[self._df["Event"].str.contains("Win|MPI_Get|MPI_Put|Accumulate")]
+ self._df["event_type"] = MPITYPE_RMA
+
+class MPIIOPSemantic(NsysEvent):
+ def __init__(self, report) -> None:
+ super().__init__(report)
+
+ def Setup(self):
+ if self.check_table("MPI_OTHER_EVENTS") and self.check_table("MPI_COLLECTIVES_EVENTS"):
+ with open(os.path.join(os.path.dirname(__file__), '../scripts/mpi_io.sql'), 'r') as query:
+ self.query = text(query.read())
+ else:
+ self._empty = True
+
+ def _preprocess(self):
+ self._df = self._df[self._df["Event"].str.contains("File")]
+ self._df["event_type"] = MPITYPE_IO
\ No newline at end of file
diff --git a/nsys2prv/semantics/nsys_event.py b/nsys2prv/semantics/nsys_event.py
new file mode 100644
index 0000000000000000000000000000000000000000..9813a14ef3f7785712ad84013128e02481cf88b9
--- /dev/null
+++ b/nsys2prv/semantics/nsys_event.py
@@ -0,0 +1,68 @@
+from sqlalchemy import create_engine, exc, inspect
+import pandas as pd
+import os.path
+
+class NsysEvent:
+
+ class MissingDatabaseFile(Exception):
+ def __init__(self, filename):
+ super().__init__(f'Database file {filename} does not exist.')
+
+ class InvalidDatabaseFile(Exception):
+ def __init__(self, filename):
+ super().__init__(f'Database file {filename} could not be opened and appears to be invalid.')
+
+ class InvalidSQL(Exception):
+ def __init__(self, sql):
+ super().__init__(f'Bad SQL statement: {sql}')
+
+ query = "SELECT 1 AS 'ONE'"
+
+ def __init__(self, report) -> None:
+ self._dbcon = None
+ self._dbfile = f"{os.path.splitext(report)[0]}.sqlite"
+ self._df = pd.DataFrame()
+ self._empty = False
+
+ if not os.path.exists(self._dbfile):
+ raise self.MissingDatabaseFile(self._dbfile)
+
+ try:
+ self._dbcon = create_engine(f"sqlite:///{self._dbfile}")
+ except exc.SQLAlchemyError:
+ self._dbcon = None
+ raise self.InvalidDatabaseFile(self._dbfile)
+
+ def check_table(self, table_name):
+ insp = inspect(self._dbcon)
+ return insp.has_table(table_name)
+
+ def Setup(self):
+ pass
+
+ def _preprocess(self):
+ pass
+
+ def postprocess(self):
+ pass
+
+ def load_data(self):
+ if not self._empty:
+ try:
+ self._df = pd.read_sql_query(self.query, self._dbcon)
+ except pd.errors.DatabaseError:
+ raise self.InvalidSQL(self.query)
+ self._preprocess()
+
+ def apply_process_model(self, threads=pd.DataFrame, streams=pd.DataFrame):
+ self.df["thread"] = self.df["Tid"].map(threads.set_index('Tid')["thread"])
+ self.df["task"] = self.df["Tid"].map(threads.set_index('Tid')["task"])
+ if 'Rank' in threads.columns:
+ self.df["Rank"] = self.df["Tid"].map(threads.set_index('Tid')["Rank"])
+ pass
+
+ def get_threads(self):
+ return self._df[['Pid', 'Tid']].drop_duplicates()
+
+ def get_df(self):
+ return self._df.copy()
\ No newline at end of file
diff --git a/nsys2prv/semantics/nvtx_startend_semantic.py b/nsys2prv/semantics/nvtx_startend_semantic.py
new file mode 100644
index 0000000000000000000000000000000000000000..88241b72f2e762462e9117123a3af103ce33bab2
--- /dev/null
+++ b/nsys2prv/semantics/nvtx_startend_semantic.py
@@ -0,0 +1,11 @@
+from .nsys_event import NsysEvent
+import os.path
+from sqlalchemy import text
+
+class NVTXStartEndSemantic(NsysEvent):
+ def __init__(self, report) -> None:
+ super().__init__(report)
+
+ def Setup(self):
+ with open(os.path.join(os.path.dirname(__file__), '../scripts/nvtx_startend_trace.sql'), 'r') as query:
+ self.query = text(query.read())
\ No newline at end of file
diff --git a/nsys2prv/semantics/openacc_semantic.py b/nsys2prv/semantics/openacc_semantic.py
new file mode 100644
index 0000000000000000000000000000000000000000..105971928ecc9b0f15511d48909b433692ad8989
--- /dev/null
+++ b/nsys2prv/semantics/openacc_semantic.py
@@ -0,0 +1,27 @@
+from .nsys_event import NsysEvent
+import os.path
+from sqlalchemy import text
+
+class OpenACCOtherSemantic(NsysEvent):
+ def __init__(self, report) -> None:
+ super().__init__(report)
+
+ def Setup(self):
+ with open(os.path.join(os.path.dirname(__file__), '../scripts/openacc_other.sql'), 'r') as query:
+ self.query = text(query.read())
+
+class OpenACCLaunchSemantic(NsysEvent):
+ def __init__(self, report) -> None:
+ super().__init__(report)
+
+ def Setup(self):
+ with open(os.path.join(os.path.dirname(__file__), '../scripts/openacc_launch.sql'), 'r') as query:
+ self.query = text(query.read())
+
+class OpenACCDataSemantic(NsysEvent):
+ def __init__(self, report) -> None:
+ super().__init__(report)
+
+ def Setup(self):
+ with open(os.path.join(os.path.dirname(__file__), '../scripts/openacc_data.sql'), 'r') as query:
+ self.query = text(query.read())
\ No newline at end of file
diff --git a/parser-playground.ipynb b/parser-playground.ipynb
index 10a24122f26de9d7757f71c6283f6d662f5ba30c..c4b2ad5e8356d7be3d919434d5587b3f233925d1 100644
--- a/parser-playground.ipynb
+++ b/parser-playground.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
@@ -14,6 +14,10 @@
"import locale\n",
"import sqlite3\n",
"from sqlalchemy import create_engine, text\n",
+ "from nsys2prv.EventWriter import event_writer as ewr\n",
+ "from nsys2prv.NSYSInterface import NSYSInterface\n",
+ "from nsys2prv.semantics.mpi_event_encoding import *\n",
+ "from nsys2prv.semantics import *\n",
"\n",
"NSYS_HOME = os.path.abspath(\"/home/mclasca/Apps/nsight-system/2024.5.1/\")\n",
"#NSIGHT_HOME = os.getenv('NSIGHT_HOME')\n",
@@ -27,6 +31,12 @@
"#REPORT_NAME=\"heka-step53+accum1-profile-2023.4-5721957\"\n",
"#REPORT_NAME=\"heka-axolotl-Mistral7B0.1-profile-2110598\"\n",
"\n",
+ "MULTIREPORT = True\n",
+ "if MULTIREPORT:\n",
+ " REPORTS_LIST = [os.path.abspath(x) for x in [\"/home/mclasca/Documents/BePPP/heka/proves/multi_2nodes/sod2d_0.nsys-rep\", \"/home/mclasca/Documents/BePPP/heka/proves/multi_2nodes/sod2d_1.nsys-rep\", \"/home/mclasca/Documents/BePPP/heka/proves/multi_2nodes/sod2d_2.nsys-rep\"]]\n",
+ " REPORT_DIRS_LIST = [os.path.dirname(x) for x in REPORTS_LIST]\n",
+ " REPORT_FILE = REPORTS_LIST[0] # For fast checks, it's best to have a reference report\n",
+ "\n",
"locale.setlocale(locale.LC_ALL, '')\n",
"\n",
"trace_name = \"test-heka\"\n",
@@ -69,6 +79,7 @@
"nvtx_stack_bottom = 4\n",
"\n",
"reports = [\"cuda_api_trace\", \"cuda_gpu_trace\"]\n",
+ "nsi = NSYSInterface(reports, False, NVTX_RANGE, False)\n",
"\n",
"def build_nsys_stats_name(report_name):\n",
" base_name = os.path.splitext(os.path.basename(REPORT_FILE))[0]\n",
@@ -686,7 +697,125 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "list_contexts = []\n",
+ "if MULTIREPORT:\n",
+ " for REPORT_FILE_I in REPORTS_LIST:\n",
+ " context_info_i = pd.read_sql_table(\"TARGET_INFO_CUDA_CONTEXT_INFO\", f\"sqlite:///{os.path.splitext(REPORT_FILE_I)[0]}.sqlite\")\n",
+ " list_contexts.append(context_info_i)\n",
+ " context_info = pd.concat(list_contexts)\n",
+ "else:\n",
+ " context_info = pd.read_sql_table(\"TARGET_INFO_CUDA_CONTEXT_INFO\", f\"sqlite:///{os.path.splitext(REPORT_FILE)[0]}.sqlite\")\n",
+ "context_info.sort_values([\"processId\"], inplace=True)\n",
+ "\n",
+ "# CONTEXT INFO CHECK FOR MULTIREPORT\n",
+ "if context_info[\"deviceId\"].unique().size == 0:\n",
+ " print(f\"\\033[93m Warning! Only one unique device ID can be detected in resource identification. If this is not intended, some features will not be available. Please, make sure that the GPU bindings are correctly done and that every process identifies its own GPU with a unique device [0 .. N-1]. \\033[00m\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "t_apicalls = True\n",
+ "cuda_api_df = []\n",
+ "if MULTIREPORT:\n",
+ " for i, REPORT_FILE_I in enumerate(REPORTS_LIST):\n",
+ " cuda_api_df.append(pd.read_csv(nsi.build_nsys_stats_name(REPORT_FILE_I, REPORT_DIRS_LIST[i], \"cuda_api_trace\")))\n",
+ "else:\n",
+ " cuda_api_df = pd.read_csv(nsi.build_nsys_stats_name(REPORT_FILE, REPORT_DIR, \"cuda_api_trace\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kernels_df = []\n",
+ "if MULTIREPORT:\n",
+ " sum = 0\n",
+ " for REPORT_FILE_I in REPORTS_LIST:\n",
+ " ksi = KernelsSemantic(REPORT_FILE_I)\n",
+ " ksi.Setup()\n",
+ " ksi.load_data()\n",
+ " kernels_df.append(ksi.get_df())\n",
+ " sum += ksi.get_df().shape[0]\n",
+ " del ksi\n",
+ "else:\n",
+ " ks = KernelsSemantic(REPORT_FILE)\n",
+ " ks.Setup()\n",
+ " ks.load_data()\n",
+ " kernels_df = ks.get_df()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "After concat: (21299, 23)\n"
+ ]
+ }
+ ],
+ "source": [
+ "from functools import reduce\n",
+ "# MARK: MERGING AND ALIGNING\n",
+ "if MULTIREPORT:\n",
+ " # Find delta between earliest trace start and the others\n",
+ " session_time = []\n",
+ " for REPORT_FILE_I in REPORTS_LIST:\n",
+ " session_time.append(pd.read_sql_table(\"TARGET_INFO_SESSION_START_TIME\", f\"sqlite:///{os.path.splitext(REPORT_FILE_I)[0]}.sqlite\"))\n",
+ " \n",
+ " session_time = [x.iloc[0,0] for x in session_time] # Get the utcEpochNs\n",
+ " earliest_time = reduce(lambda x, y: min(x, y), session_time, float('inf'))\n",
+ " deltas = [start - earliest_time for start in session_time]\n",
+ " for i, df in enumerate(kernels_df):\n",
+ " df['Start (ns)'] += deltas[i]\n",
+ " kernels_df = pd.concat(kernels_df)\n",
+ " print(f\"After concat: {kernels_df.shape}\")\n",
+ "\n",
+ " if t_apicalls:\n",
+ " for i, df in enumerate(cuda_api_df):\n",
+ " df['Start (ns)'] += deltas[i]\n",
+ "\n",
+ " cuda_api_df = pd.concat(cuda_api_df)\n",
+ "\n",
+ " # if t_nvtx:\n",
+ " # for i, df in enumerate(nvtx_df):\n",
+ " # df['Start (ns)'] += deltas[i]\n",
+ " # df['End (ns)'] += deltas[i]\n",
+ " # nvtx_df = pd.concat(nvtx_df)\n",
+ "\n",
+ " # if t_nvtx_startend:\n",
+ " # for i, df in enumerate(nvtx_startend_df):\n",
+ " # df['Start (ns)'] += deltas[i]\n",
+ " # df['End (ns)'] += deltas[i]\n",
+ " # nvtx_startend_df = pd.concat(nvtx_startend_df)\n",
+ "\n",
+ " # if t_mpi:\n",
+ " # for i, df in enumerate(mpi_df):\n",
+ " # df['Start:ts_ns'] += deltas[i]\n",
+ " # df['End:ts_ns'] += deltas[i]\n",
+ " # mpi_df = pd.concat(mpi_df)\n",
+ " \n",
+ " #if t_metrics:\n",
+ "\n",
+ " #if t_openacc:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
"metadata": {},
"outputs": [
{
@@ -710,369 +839,432 @@
" \n",
" \n",
" \n",
- " \n",
- " \n",
- " Start (ns) \n",
- " Duration (ns) \n",
- " CorrID \n",
- " GrdX \n",
- " GrdY \n",
- " GrdZ \n",
- " BlkX \n",
- " BlkY \n",
- " BlkZ \n",
- " Reg/Trd \n",
- " ... \n",
- " DymSMem (MB) \n",
- " Bytes (MB) \n",
- " Throughput (MB/s) \n",
- " SrcMemKd \n",
- " DstMemKd \n",
- " Device \n",
- " Ctx \n",
- " GreenCtx \n",
- " Strm \n",
- " Name \n",
+ " Pid \n",
+ " Tid \n",
+ " thread \n",
+ " device \n",
"
\n", + " | \n", + " | Device | \n", + "Strm | \n", + "thread | \n", + "task | \n", + "Pid | \n", "||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", - "... | \n", + "Pid | \n", + "deviceid | \n", + "\n", + " | \n", + " | \n", + " | \n", + " | \n", " |
6645 | \n", - "11824673327 | \n", - "1378516 | \n", - "68613 | \n", - "65535.0 | \n", - "1.0 | \n", - "1.0 | \n", - "32.0 | \n", - "1.0 | \n", - "1.0 | \n", - "36.0 | \n", - "... | \n", - "0,001 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NVIDIA H100 (1) | \n", + "2308061 | \n", + "0 | \n", + "NVIDIA H100 (0) | \n", + "{16, 17, 18, 19} | \n", + "4 | \n", "1 | \n", - "NaN | \n", - "16 | \n", - "mod_time_ops_adapt_dt_cfl_32_gpu | \n", + "2308061 | \n", "|
6646 | \n", - "11826052707 | \n", - "99903 | \n", - "68614 | \n", - "3.0 | \n", - "1.0 | \n", - "1.0 | \n", - "256.0 | \n", - "1.0 | \n", - "1.0 | \n", - "18.0 | \n", - "... | \n", - "0,001 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", + "2308062 | \n", + "2 | \n", + "NVIDIA H100 (2) | \n", + "{16, 17, 18, 19} | \n", + "4 | \n", + "2 | \n", + "2308062 | \n", + "|||||
2308065 | \n", + "1 | \n", "NVIDIA H100 (1) | \n", + "{16, 17, 18} | \n", + "3 | \n", + "3 | \n", + "2308065 | \n", + "
\n", + " | index | \n", + "Device | \n", + "Strm | \n", + "deviceid | \n", + "Pid | \n", + "thread | \n", + "task | \n", + "row_name | \n", + "||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "0 | \n", + "NVIDIA H100 (0) | \n", + "16 | \n", + "0 | \n", + "2308061 | \n", + "2 | \n", "1 | \n", - "NaN | \n", + "CUDA-D0.S16 | \n", + "|||||||||||||||||||
1 | \n", + "190 | \n", + "NVIDIA H100 (0) | \n", + "17 | \n", + "0 | \n", + "2308061 | \n", + "3 | \n", + "1 | \n", + "CUDA-D0.S17 | \n", + "||||||||||||||||||||
2 | \n", + "191 | \n", + "NVIDIA H100 (0) | \n", + "18 | \n", + "0 | \n", + "2308061 | \n", + "4 | \n", + "1 | \n", + "CUDA-D0.S18 | \n", + "||||||||||||||||||||
3 | \n", + "204 | \n", + "NVIDIA H100 (0) | \n", + "19 | \n", + "0 | \n", + "2308061 | \n", + "5 | \n", + "1 | \n", + "CUDA-D0.S19 | \n", + "||||||||||||||||||||
4 | \n", + "0 | \n", + "NVIDIA H100 (2) | \n", "16 | \n", - "mod_time_ops_adapt_dt_cfl_32_gpu__red | \n", + "2 | \n", + "2308062 | \n", + "2 | \n", + "2 | \n", + "CUDA-D2.S16 | \n", "|||||||||||||||||||
6647 | \n", - "11826167106 | \n", - "2176 | \n", - "68616 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "... | \n", - "NaN | \n", - "0,000 | \n", - "1,838 | \n", - "Device | \n", - "Pageable | \n", + "5 | \n", + "190 | \n", + "NVIDIA H100 (2) | \n", + "17 | \n", + "2 | \n", + "2308062 | \n", + "3 | \n", + "2 | \n", + "CUDA-D2.S17 | \n", + "|||
6 | \n", + "191 | \n", + "NVIDIA H100 (2) | \n", + "18 | \n", + "2 | \n", + "2308062 | \n", + "4 | \n", + "2 | \n", + "CUDA-D2.S18 | \n", + "||||||||||||||||||||
7 | \n", + "203 | \n", + "NVIDIA H100 (2) | \n", + "19 | \n", + "2 | \n", + "2308062 | \n", + "5 | \n", + "2 | \n", + "CUDA-D2.S19 | \n", + "||||||||||||||||||||
8 | \n", + "0 | \n", "NVIDIA H100 (1) | \n", - "1 | \n", - "NaN | \n", "16 | \n", - "[CUDA memcpy Device-to-Host] | \n", + "1 | \n", + "2308065 | \n", + "2 | \n", + "3 | \n", + "CUDA-D1.S16 | \n", "|||||||||||||||||
6648 | \n", - "11826178754 | \n", - "2176 | \n", - "68617 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "... | \n", - "NaN | \n", - "0,000 | \n", - "1,838 | \n", - "Device | \n", - "Pageable | \n", + "9 | \n", + "190 | \n", "NVIDIA H100 (1) | \n", + "17 | \n", "1 | \n", - "NaN | \n", - "16 | \n", - "[CUDA memcpy Device-to-Host] | \n", + "2308065 | \n", + "3 | \n", + "3 | \n", + "CUDA-D1.S17 | \n", "
6649 | \n", - "11826190114 | \n", - "2176 | \n", - "68618 | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "NaN | \n", - "... | \n", - "NaN | \n", - "0,000 | \n", - "1,838 | \n", - "Device | \n", - "Pageable | \n", + "10 | \n", + "191 | \n", "NVIDIA H100 (1) | \n", + "18 | \n", "1 | \n", - "NaN | \n", - "16 | \n", - "[CUDA memcpy Device-to-Host] | \n", + "2308065 | \n", + "4 | \n", + "3 | \n", + "CUDA-D1.S18 | \n", "
6650 rows × 21 columns
\n", "20 rows × 25 columns
\n", + "" + ], + "text/plain": [ + " Start (ns) Duration:dur_ns CorrID GrdX GrdY GrdZ BlkX \\\n", + "2023 10256971039 2272 23576 NaN NaN NaN NaN \n", + "2024 10257043551 1105690 23590 81902.0 1.0 1.0 128.0 \n", + "2025 10258164089 51168 23595 81902.0 1.0 1.0 128.0 \n", + "2026 10258227161 768 23598 NaN NaN NaN NaN \n", + "2027 10258236281 897563 23601 65535.0 1.0 1.0 128.0 \n", + "2028 10259135028 97632 23602 1.0 1.0 1.0 256.0 \n", + "2029 10259241107 2273 23604 NaN NaN NaN NaN \n", + "2030 10259285939 768 23616 NaN NaN NaN NaN \n", + "2031 10259294836 412701 23619 65535.0 1.0 1.0 128.0 \n", + "2032 10259708465 96064 23620 1.0 1.0 1.0 256.0 \n", + "2033 10259813009 2272 23622 NaN NaN NaN NaN \n", + "2034 10259840881 751484 23634 81902.0 1.0 1.0 128.0 \n", + "2035 10260609197 149279 23640 245704.0 1.0 1.0 128.0 \n", + "2036 10260759628 55584 23642 81902.0 1.0 1.0 128.0 \n", + "2037 10260829643 7861560 23646 1246149.0 1.0 1.0 96.0 \n", + "2038 10268707939 8192 23651 3329.0 1.0 1.0 128.0 \n", + "2039 10268727331 3392 23655 3329.0 1.0 1.0 128.0 \n", + "2040 10268754723 4672 23663 NaN NaN NaN NaN \n", + "2041 10268759875 3328 23669 NaN NaN NaN NaN \n", + "2042 10268769987 5568 23688 NaN NaN NaN NaN \n", + "\n", + " BlkY BlkZ Reg/Trd ... DstMemKd Device deviceid Pid \\\n", + "2023 NaN NaN NaN ... Pageable NVIDIA H100 (2) 2 2308062 \n", + "2024 1.0 1.0 32.0 ... None NVIDIA H100 (2) 2 2308062 \n", + "2025 1.0 1.0 26.0 ... None NVIDIA H100 (2) 2 2308062 \n", + "2026 NaN NaN NaN ... None NVIDIA H100 (2) 2 2308062 \n", + "2027 1.0 1.0 56.0 ... None NVIDIA H100 (2) 2 2308062 \n", + "2028 1.0 1.0 16.0 ... None NVIDIA H100 (2) 2 2308062 \n", + "2029 NaN NaN NaN ... Pageable NVIDIA H100 (2) 2 2308062 \n", + "2030 NaN NaN NaN ... None NVIDIA H100 (2) 2 2308062 \n", + "2031 1.0 1.0 40.0 ... None NVIDIA H100 (2) 2 2308062 \n", + "2032 1.0 1.0 16.0 ... None NVIDIA H100 (2) 2 2308062 \n", + "2033 NaN NaN NaN ... Pageable NVIDIA H100 (2) 2 2308062 \n", + "2034 1.0 1.0 45.0 ... None NVIDIA H100 (2) 2 2308062 \n", + "2035 1.0 1.0 36.0 ... None NVIDIA H100 (2) 2 2308062 \n", + "2036 1.0 1.0 16.0 ... None NVIDIA H100 (2) 2 2308062 \n", + "2037 1.0 1.0 93.0 ... None NVIDIA H100 (2) 2 2308062 \n", + "2038 1.0 1.0 16.0 ... None NVIDIA H100 (2) 2 2308062 \n", + "2039 1.0 1.0 16.0 ... None NVIDIA H100 (2) 2 2308062 \n", + "2040 NaN NaN NaN ... Device NVIDIA H100 (2) 2 2308062 \n", + "2041 NaN NaN NaN ... Device NVIDIA H100 (2) 2 2308062 \n", + "2042 NaN NaN NaN ... Device NVIDIA H100 (2) 2 2308062 \n", + "\n", + " Ctx GreenCtx Strm Name \\\n", + "2023 1 None 16 [CUDA memcpy Device-to-Host] \n", + "2024 1 None 16 mod_solver_conjgrad_imex_245_gpu \n", + "2025 1 None 16 mod_bc_routines_bc_fix_dirichlet_residual_293_gpu \n", + "2026 1 None 16 [CUDA memset] \n", + "2027 1 None 16 mod_solver_conjgrad_imex_271_gpu \n", + "2028 1 None 16 mod_solver_conjgrad_imex_271_gpu__red \n", + "2029 1 None 16 [CUDA memcpy Device-to-Host] \n", + "2030 1 None 16 [CUDA memset] \n", + "2031 1 None 16 mod_solver_conjgrad_imex_297_gpu \n", + "2032 1 None 16 mod_solver_conjgrad_imex_297_gpu__red \n", + "2033 1 None 16 [CUDA memcpy Device-to-Host] \n", + "2034 1 None 16 mod_solver_conjgrad_imex_307_gpu \n", + "2035 1 None 16 elem_diffu_full_diffusion_ijk_51_gpu \n", + "2036 1 None 16 elem_diffu_full_diffusion_ijk_52_gpu \n", + "2037 1 None 16 elem_diffu_full_diffusion_ijk_60_gpu \n", + "2038 1 None 16 mod_comms_fill_sendbuffer_real_239_gpu \n", + "2039 1 None 16 mod_comms_fill_sendbuffer_real_246_gpu \n", + "2040 1 None 17 [CUDA memcpy Peer-to-Peer] \n", + "2041 1 None 19 [CUDA memcpy Peer-to-Peer] \n", + "2042 1 None 18 [CUDA memcpy Peer-to-Peer] \n", + "\n", + " thread task \n", + "2023 2 2 \n", + "2024 2 2 \n", + "2025 2 2 \n", + "2026 2 2 \n", + "2027 2 2 \n", + "2028 2 2 \n", + "2029 2 2 \n", + "2030 2 2 \n", + "2031 2 2 \n", + "2032 2 2 \n", + "2033 2 2 \n", + "2034 2 2 \n", + "2035 2 2 \n", + "2036 2 2 \n", + "2037 2 2 \n", + "2038 2 2 \n", + "2039 2 2 \n", + "2040 3 2 \n", + "2041 5 2 \n", + "2042 4 2 \n", "\n", - " isGreenContext \n", - "0 0 " + "[20 rows x 25 columns]" ] }, - "execution_count": 7, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "context_info = pd.read_sql_table(\"TARGET_INFO_CUDA_CONTEXT_INFO\", f\"sqlite:///{os.path.splitext(REPORT_FILE)[0]}.sqlite\")\n", - "context_info" + "kernels_df.iloc[16000:16020]" ] }, { diff --git a/pyproject.toml b/pyproject.toml index 5b3f16037a220e03d9a328eb0eb4e795df96cc38..d0827c318d5ac718dced6b568a287fbdc46152da 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,12 +1,13 @@ [tool.poetry] name = "nsys2prv" -version = "0.3.1" +version = "0.4.0-dev20241007" description = "Translate a NVIDIA Nsight System trace to a Paraver trace" authors = ["Marc Clascà