diff --git a/cfgs/nvtx_startend.cfg b/cfgs/nvtx_startend.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..b88ebffa006fc42f4313714dec81fe401a36d29b
--- /dev/null
+++ b/cfgs/nvtx_startend.cfg
@@ -0,0 +1,46 @@
+#ParaverCFG
+ConfigFile.Version: 3.4
+ConfigFile.NumWindows: 1
+ConfigFile.BeginDescription
+
+ConfigFile.EndDescription
+
+################################################################################
+< NEW DISPLAYING WINDOW NVTX StartEnd Ranges >
+################################################################################
+window_name NVTX StartEnd Ranges
+window_type single
+window_id 1
+window_position_x 671
+window_position_y 275
+window_width 922
+window_height 165
+window_comm_lines_enabled false
+window_flags_enabled false
+window_noncolor_mode true
+window_custom_color_enabled false
+window_semantic_scale_min_at_zero false
+window_logical_filtered true
+window_physical_filtered false
+window_comm_fromto true
+window_comm_tagsize true
+window_comm_typeval true
+window_units Nanoseconds
+window_maximum_y 52.000000000000
+window_minimum_y 1.000000000000
+window_compute_y_max false
+window_level thread
+window_scale_relative 1.000000000000
+window_end_time_relative 1.000000000000
+window_object appl { 1, { All } }
+window_begin_time_relative 0.000000000000
+window_open false
+window_drawmode draw_maximum
+window_drawmode_rows draw_last
+window_pixel_size 1
+window_labels_to_draw 1
+window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
+window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, Stacked Val}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
+window_filter_module evt_type 1 9004
+window_filter_module evt_type_label 1 "Unknown"
+
diff --git a/nvtx_startend_test.ipynb b/nvtx_startend_test.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..89e3b33175871adc3dfb3b9c22d5fe37aba65f86
--- /dev/null
+++ b/nvtx_startend_test.ipynb
@@ -0,0 +1,393 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import time\n",
+ "import subprocess\n",
+ "import os\n",
+ "import locale\n",
+ "import sqlite3\n",
+ "from sqlalchemy import create_engine\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "NSIGHT_HOME=\"/home/mclasca/Apps/nsight-system/2024.1\"\n",
+ "#NSIGHT_HOME = os.getenv('NSIGHT_HOME')\n",
+ "PARAVER_HOME = os.getenv('PARAVER_HOME')\n",
+ "NVTX_RANGE=\"step53\"\n",
+ "#REPORT_FILE = os.path.abspath(\"/home/mclasca/Documents/BePPP/heka/profiles/mistral-mn5/heka-axolotl-Mistral7B0.1-4s_withmetrics-2432719.nsys-rep\")\n",
+ "#REPORT_FILE = os.path.abspath(\"/home/mclasca/Documents/BePPP/traces/xshells/nsys/xshells.par.medium-1N_withmetrics.nsys-rep\")\n",
+ "REPORT_FILE = os.path.abspath(\"/home/mclasca/Documents/BePPP/traces/jesus/drive-download-20240628T123341Z-001/long-short.nsys-rep\")\n",
+ "REPORT_DIR = os.path.dirname(REPORT_FILE)\n",
+ "#REPORT_NAME=\"heka-step53+accum1-profile-2023.4-5721957\"\n",
+ "#REPORT_NAME=\"heka-axolotl-Mistral7B0.1-profile-2110598\"\n",
+ "\n",
+ "locale.setlocale(locale.LC_ALL, '')\n",
+ "\n",
+ "trace_name = \"test-xshells-metrics\"\n",
+ "event_type_kernels = 63000006\n",
+ "event_type_memcopy_size = 63000002\n",
+ "event_type_api = 63000000\n",
+ "event_type_nvtx = 9003\n",
+ "event_type_nvtx_startend = 9004\n",
+ "event_type_blkgrd_name = 9100\n",
+ "event_types_block_grid_values = [9101, 9102, 9103, 9104, 9105, 9106]\n",
+ "event_types_block_grid_values_names = ['GrdX', 'GrdY', 'GrdZ', 'BlkX', 'BlkY', 'BlkZ']\n",
+ "event_type_registers_thread = 9107\n",
+ "event_type_correlation = 9200\n",
+ "event_type_mpi = 9300\n",
+ "event_type_metrics_base = 9400\n",
+ "\n",
+ "comm_tag_launch = 55001\n",
+ "comm_tag_memory = 55002\n",
+ "comm_tag_dependency = 55003\n",
+ "\n",
+ "event_type_openacc = 66000000\n",
+ "event_type_openacc_data = 66000001\n",
+ "event_type_openacc_launch = 66000002\n",
+ "\n",
+ "event_type_name_openacc = 66100000\n",
+ "event_type_name_openacc_data = 66100001\n",
+ "event_type_name_openacc_launch = 66100002\n",
+ "\n",
+ "event_type_func_openacc = 66200000\n",
+ "event_type_func_openacc_data = 66200001\n",
+ "event_type_func_openacc_launch = 66200002\n",
+ "\n",
+ "event_type_openacc_data_size = 66300001\n",
+ "\n",
+ "nvtx_select_frames = True\n",
+ "nvtx_stack_top = 1\n",
+ "nvtx_stack_bottom = 4\n",
+ "\n",
+ "t_openacc = True\n",
+ "\n",
+ "reports = [\"nvtx_pushpop_trace\", \"cuda_api_trace\", \"cuda_gpu_trace\"]\n",
+ "\n",
+ "def build_nsys_stats_name(report_name):\n",
+ " base_name = os.path.splitext(os.path.basename(REPORT_FILE))[0]\n",
+ " return os.path.join(REPORT_DIR, base_name+\"_{}.csv\".format(report_name))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "engine = create_engine(f\"sqlite:///{os.path.splitext(REPORT_FILE)[0]}.sqlite\")\n",
+ "with engine.connect() as conn, conn.begin():\n",
+ " with open(os.path.join(os.path.dirname(__file__), 'scripts/nvtx_startend_trace.sql'), 'r') as query:\n",
+ " # connection == the connection to your database, in your case prob_db\n",
+ " nvtx_startend_ranges = pd.read_sql_query(query.read(), conn)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " start | \n",
+ " end | \n",
+ " duration | \n",
+ " tag | \n",
+ " Pid | \n",
+ " Tid | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 15875509953 | \n",
+ " 18153918605 | \n",
+ " 2278408652 | \n",
+ " Prefill Transformer | \n",
+ " 11906 | \n",
+ " 11906 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 18249289404 | \n",
+ " 18317053596 | \n",
+ " 67764192 | \n",
+ " Decode Transformer | \n",
+ " 11906 | \n",
+ " 11906 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 18318613145 | \n",
+ " 18370357541 | \n",
+ " 51744396 | \n",
+ " Decode Transformer | \n",
+ " 11906 | \n",
+ " 11906 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 18371800631 | \n",
+ " 18419198817 | \n",
+ " 47398186 | \n",
+ " Decode Transformer | \n",
+ " 11906 | \n",
+ " 11906 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 18420475918 | \n",
+ " 18469221364 | \n",
+ " 48745446 | \n",
+ " Decode Transformer | \n",
+ " 11906 | \n",
+ " 11906 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 18470516206 | \n",
+ " 18519054513 | \n",
+ " 48538307 | \n",
+ " Decode Transformer | \n",
+ " 11906 | \n",
+ " 11906 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 18520348624 | \n",
+ " 18566994576 | \n",
+ " 46645952 | \n",
+ " Decode Transformer | \n",
+ " 11906 | \n",
+ " 11906 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 18568257617 | \n",
+ " 18616133670 | \n",
+ " 47876053 | \n",
+ " Decode Transformer | \n",
+ " 11906 | \n",
+ " 11906 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 18617419861 | \n",
+ " 18665209334 | \n",
+ " 47789473 | \n",
+ " Decode Transformer | \n",
+ " 11906 | \n",
+ " 11906 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 18666531725 | \n",
+ " 18717456526 | \n",
+ " 50924801 | \n",
+ " Decode Transformer | \n",
+ " 11906 | \n",
+ " 11906 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 18718733077 | \n",
+ " 18766405731 | \n",
+ " 47672654 | \n",
+ " Decode Transformer | \n",
+ " 11906 | \n",
+ " 11906 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " 18767686192 | \n",
+ " 18817276263 | \n",
+ " 49590071 | \n",
+ " Decode Transformer | \n",
+ " 11906 | \n",
+ " 11906 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " 18818585724 | \n",
+ " 18867426470 | \n",
+ " 48840746 | \n",
+ " Decode Transformer | \n",
+ " 11906 | \n",
+ " 11906 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " 18868709601 | \n",
+ " 18917896044 | \n",
+ " 49186443 | \n",
+ " Decode Transformer | \n",
+ " 11906 | \n",
+ " 11906 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " 18919210995 | \n",
+ " 18970345235 | \n",
+ " 51134240 | \n",
+ " Decode Transformer | \n",
+ " 11906 | \n",
+ " 11906 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " 18971678885 | \n",
+ " 19021437385 | \n",
+ " 49758500 | \n",
+ " Decode Transformer | \n",
+ " 11906 | \n",
+ " 11906 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " start end duration tag Pid Tid\n",
+ "0 15875509953 18153918605 2278408652 Prefill Transformer 11906 11906\n",
+ "1 18249289404 18317053596 67764192 Decode Transformer 11906 11906\n",
+ "2 18318613145 18370357541 51744396 Decode Transformer 11906 11906\n",
+ "3 18371800631 18419198817 47398186 Decode Transformer 11906 11906\n",
+ "4 18420475918 18469221364 48745446 Decode Transformer 11906 11906\n",
+ "5 18470516206 18519054513 48538307 Decode Transformer 11906 11906\n",
+ "6 18520348624 18566994576 46645952 Decode Transformer 11906 11906\n",
+ "7 18568257617 18616133670 47876053 Decode Transformer 11906 11906\n",
+ "8 18617419861 18665209334 47789473 Decode Transformer 11906 11906\n",
+ "9 18666531725 18717456526 50924801 Decode Transformer 11906 11906\n",
+ "10 18718733077 18766405731 47672654 Decode Transformer 11906 11906\n",
+ "11 18767686192 18817276263 49590071 Decode Transformer 11906 11906\n",
+ "12 18818585724 18867426470 48840746 Decode Transformer 11906 11906\n",
+ "13 18868709601 18917896044 49186443 Decode Transformer 11906 11906\n",
+ "14 18919210995 18970345235 51134240 Decode Transformer 11906 11906\n",
+ "15 18971678885 19021437385 49758500 Decode Transformer 11906 11906"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "nvtx_startend_ranges"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " tag | \n",
+ " event_value | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " Decode Transformer | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " Prefill Transformer | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " tag event_value\n",
+ "1 Decode Transformer 1\n",
+ "0 Prefill Transformer 2"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "nvtx_startend_ranges[\"event_value\"] = nvtx_startend_ranges.groupby([\"tag\"]).ngroup() + 1\n",
+ "nvtx_startend_names = nvtx_startend_ranges[['tag', 'event_value']].drop_duplicates()\n",
+ "nvtx_startend_names.sort_values(\"event_value\", inplace=True)\n",
+ "nvtx_startend_names"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "env",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/parse-nsys-stats.py b/parse-nsys-stats.py
index 651a4476a669022f38be734330b6e3428c108e61..ba94b6cb46a10db46598759b6feb153c149c53a7 100755
--- a/parse-nsys-stats.py
+++ b/parse-nsys-stats.py
@@ -11,6 +11,7 @@ import subprocess
import os
import locale
import sqlite3
+from sqlalchemy import create_engine
locale.setlocale(locale.LC_ALL, '')
@@ -19,7 +20,7 @@ parser = argparse.ArgumentParser(description="Convert a NVIDIA Nsight System tra
epilog="The environment variables NSIGHT_HOME and PARAVER_HOME are needed")
parser.add_argument("-f", "--filter-nvtx", help="Filter by this NVTX range")
-parser.add_argument("-t", "--trace", required=True, help="Comma separated names of events to translate: [mpi_event_trace, nvtx_pushpop_trace, cuda_api_trace, cuda_gpu_trace, gpu_metrics]")
+parser.add_argument("-t", "--trace", required=True, help="Comma separated names of events to translate: [mpi_event_trace, nvtx_pushpop_trace, nvtx_startend_trace, cuda_api_trace, cuda_gpu_trace, gpu_metrics]")
parser.add_argument("--force-sqlite", action="store_true", help="Force Nsight System to export SQLite database")
@@ -56,6 +57,7 @@ NVTX_RANGE = args.filter_nvtx
reports = args.trace.split(",")
t_nvtx = False
+t_nvtx_startend = False
t_kernels = False
t_apicalls = False
t_mpi = False
@@ -68,12 +70,16 @@ if "mpi_event_trace" in reports: t_mpi = True
if "gpu_metrics" in reports:
t_metrics = True
reports.remove("gpu_metrics")
+if "nvtx_startend_trace" in reports:
+ t_nvtx_startend = True
+ reports.remove("nvtx_startend_trace")
#trace_name = "llava_cesga"
event_type_kernels = 63000006
event_type_memcopy_size = 63000002
event_type_api = 63000000
event_type_nvtx = 9003
+event_type_nvtx_startend = 9004
event_type_blkgrd_name = 9100
event_types_block_grid_values = [9101, 9102, 9103, 9104, 9105, 9106]
event_types_block_grid_values_names = ['GrdX', 'GrdY', 'GrdZ', 'BlkX', 'BlkY', 'BlkZ']
@@ -86,7 +92,7 @@ comm_tag_launch = 55001
comm_tag_memory = 55002
comm_tag_dependency = 55003
-nvtx_select_frames = True
+nvtx_select_frames = False
nvtx_stack_top = 1
nvtx_stack_bottom = 4
@@ -141,6 +147,15 @@ if t_nvtx:
else:
nvtx_df = pd.DataFrame()
+if t_nvtx_startend:
+ engine = create_engine(f"sqlite:///{os.path.splitext(REPORT_FILE)[0]}.sqlite")
+ with engine.connect() as conn, conn.begin():
+ with open(os.path.join(os.path.dirname(__file__), 'scripts/nvtx_startend_trace.sql'), 'r') as query:
+ # connection == the connection to your database, in your case prob_db
+ nvtx_startend_df = pd.read_sql_query(query.read(), conn)
+else:
+ nvtx_startend_df = pd.DataFrame()
+
if t_mpi:
mpi_df = pd.read_csv(build_nsys_stats_name("mpi_event_trace"))
else:
@@ -171,6 +186,7 @@ if t_metrics:
if t_apicalls: print("CUDA calls unique processes: {}, and unique threads: {}".format(cuda_api_df["Pid"].unique(), cuda_api_df["Tid"].unique()))
if t_nvtx: print("NVTX ranges unique processes: {}, and unique threads: {}".format(nvtx_df["PID"].unique(), nvtx_df["TID"].unique()))
+if t_nvtx_startend: print("NVTX startend unique processes: {}, and unique threads: {}".format(nvtx_startend_df["Pid"].unique(), nvtx_startend_df["Tid"].unique()))
if t_mpi: print("MPI ranges unique processes: {}, and unique threads: {}".format(mpi_df["Pid"].unique(), mpi_df["Tid"].unique()))
if t_nvtx: nvtx_df.rename(columns={"PID":"Pid", "TID":"Tid"}, inplace=True)
@@ -178,8 +194,10 @@ if t_nvtx: nvtx_df.rename(columns={"PID":"Pid", "TID":"Tid"}, inplace=True)
compute_threads_with = []
if t_apicalls: compute_threads_with.append(cuda_api_df[['Pid', 'Tid']])
if t_nvtx: compute_threads_with.append(nvtx_df[["Pid", "Tid"]])
+if t_nvtx_startend: compute_threads_with.append(nvtx_startend_df[["Pid", "Tid"]])
if t_mpi: compute_threads_with.append(mpi_df[["Pid", "Tid"]])
+
threads = pd.concat(compute_threads_with).drop_duplicates()
threads.sort_values(["Pid"], inplace=True)
threads["thread"] = threads.groupby(["Pid"]).cumcount() + 1
@@ -197,6 +215,8 @@ cuda_api_df["thread"] = 0
cuda_api_df["task"] = 0
nvtx_df["thread"] = 0
nvtx_df["task"] = 0
+nvtx_startend_df["thread"] = 0
+nvtx_startend_df["task"] = 0
mpi_df["thread"] = 0
mpi_df["task"] = 0
@@ -218,6 +238,10 @@ if t_nvtx:
nvtx_df["thread"] = nvtx_df["Tid"].map(threads.set_index('Tid')["thread"])
nvtx_df["task"] = nvtx_df["Tid"].map(threads.set_index('Tid')["task"])
+if t_nvtx_startend:
+ nvtx_startend_df["thread"] = nvtx_startend_df["Tid"].map(threads.set_index('Tid')["thread"])
+ nvtx_startend_df["task"] = nvtx_startend_df["Tid"].map(threads.set_index('Tid')["task"])
+
if t_mpi:
mpi_df["thread"] = mpi_df["Tid"].map(threads.set_index('Tid')["thread"])
mpi_df["task"] = mpi_df["Tid"].map(threads.set_index('Tid')["task"])
@@ -335,6 +359,12 @@ if t_nvtx:
ranges_names = nvtx_df_subset[['event_value', 'Name']].drop_duplicates()
ranges_names.sort_values("event_value", inplace=True)
+if t_nvtx_startend:
+ nvtx_startend_df["event_value"] = nvtx_startend_df.groupby(["tag"]).ngroup() + 1
+ nvtx_startend_names = nvtx_startend_df[['tag', 'event_value']].drop_duplicates()
+ nvtx_startend_names.sort_values("event_value", inplace=True)
+ nvtx_startend_names
+
print("-\tWriting pcf file...")
@@ -437,13 +467,22 @@ if t_metrics:
if t_nvtx:
pcf_file.write("EVENT_TYPE\n")
- pcf_file.write("0 {} NVTX ranges\n".format(event_type_nvtx))
+ pcf_file.write("0 {} NVTX pushpop ranges\n".format(event_type_nvtx))
pcf_file.write("VALUES\n")
pcf_file.write("0 End\n")
for index, row in ranges_names.iterrows():
pcf_file.write("{} {}\n".format(row["event_value"], row["Name"]))
pcf_file.write("\n")
+if t_nvtx_startend:
+ pcf_file.write("EVENT_TYPE\n")
+ pcf_file.write("0 {} NVTX startend ranges\n".format(event_type_nvtx_startend))
+ pcf_file.write("VALUES\n")
+ pcf_file.write("0 End\n")
+ for index, row in nvtx_startend_names.iterrows():
+ pcf_file.write("{} {}\n".format(row["event_value"], row["tag"]))
+ pcf_file.write("\n")
+
pcf_file.close()
# # Split of kernel execution between compute and memory
@@ -508,6 +547,7 @@ applist = applist + ")"
compute_max_with = []
if t_apicalls: compute_max_with.append((cuda_api_df["Start (ns)"] + cuda_api_df["Duration (ns)"]).max())
if t_nvtx: compute_max_with.append(nvtx_df["End (ns)"].max())
+if t_nvtx_startend: compute_max_with.append(nvtx_startend_df["end"].max())
if t_mpi: compute_max_with.append(mpi_df["End (ns)"].max())
ftime = max(compute_max_with)
@@ -547,12 +587,19 @@ if t_apicalls:
chunk = ""
if t_nvtx:
- print("-\tWriting NVTX ranges...")
+ print("-\tWriting NVTX pushpop ranges...")
for index, row in nvtx_df_subset.iterrows():
chunk += create_event_record(row.iloc[0], row.iloc[2], int(row["thread"]), int(row["task"]), event_type_nvtx, row["event_value"])
prv_file.write(chunk)
chunk = ""
+if t_nvtx_startend:
+ print("-\tWriting NVTX startend ranges...")
+ for index, row in nvtx_startend_df.iterrows():
+ chunk += create_event_record(row.iloc[0], row.iloc[2], int(row["thread"]), int(row["task"]), event_type_nvtx_startend, row["event_value"])
+ prv_file.write(chunk)
+ chunk = ""
+
if t_mpi:
print("-\tWriting MPI events...")
for index, row in mpi_df.iterrows():
diff --git a/scripts/nvtx_startend_trace.sql b/scripts/nvtx_startend_trace.sql
new file mode 100644
index 0000000000000000000000000000000000000000..c52b66ab41845276a2c187401b4da0869b03f5ba
--- /dev/null
+++ b/scripts/nvtx_startend_trace.sql
@@ -0,0 +1,51 @@
+WITH
+ domains AS (
+ SELECT
+ min(start),
+ domainId AS id,
+ globalTid AS globalTid,
+ text AS name
+ FROM
+ NVTX_EVENTS
+ WHERE
+ eventType == 75
+ GROUP BY 2, 3
+ ),
+ maxts AS(
+ SELECT max(max(start), max(end)) AS m
+ FROM NVTX_EVENTS
+ ),
+ nvtx AS (
+ SELECT
+ ne.start as start,
+ ne.end as end,
+ coalesce(ne.end, (SELECT m FROM maxts)) - ne.start AS duration,
+ CASE
+ WHEN d.name NOT NULL AND sid.value IS NOT NULL
+ THEN d.name || ':' || sid.value
+ WHEN d.name NOT NULL AND sid.value IS NULL
+ THEN d.name || ':' || ne.text
+ WHEN d.name IS NULL AND sid.value NOT NULL
+ THEN sid.value
+ ELSE ne.text
+ END AS tag,
+ (ne.globalTid / 0x1000000 % 0x1000000) as Pid,
+ (ne.globalTid % 0x1000000) as Tid
+ FROM
+ NVTX_EVENTS AS ne
+ LEFT OUTER JOIN
+ domains AS d
+ ON ne.domainId == d.id
+ AND (ne.globalTid & 0x0000FFFFFF000000) == (d.globalTid & 0x0000FFFFFF000000)
+ LEFT OUTER JOIN
+ StringIds AS sid
+ ON ne.textId == sid.id
+ WHERE
+ ne.eventType == 60
+ OR
+ ne.eventType == 71
+ )
+SELECT
+ *
+ FROM
+ nvtx
\ No newline at end of file