diff --git a/parse-nsys-stats.ipynb b/parse-nsys-stats.ipynb
index c4af5a0c8ac5705af2682debddcb0feeb3bca862..ae044a77ed39ec6d37a0d5440de9c7a8b0785eaa 100644
--- a/parse-nsys-stats.ipynb
+++ b/parse-nsys-stats.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 3,
"id": "7a6eb6bf-0de9-458d-bd77-d876b0219bd3",
"metadata": {},
"outputs": [],
@@ -13,7 +13,8 @@
"import subprocess\n",
"import os\n",
"import locale\n",
- "import sqlite3\n"
+ "import sqlite3\n",
+ "from sqlalchemy import create_engine\n"
]
},
{
@@ -26,7 +27,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 4,
"id": "69698964-097d-4cac-890a-25ea7d3beb11",
"metadata": {},
"outputs": [],
@@ -36,7 +37,8 @@
"PARAVER_HOME = os.getenv('PARAVER_HOME')\n",
"NVTX_RANGE=\"step53\"\n",
"#REPORT_FILE = os.path.abspath(\"/home/mclasca/Documents/BePPP/heka/profiles/mistral-mn5/heka-axolotl-Mistral7B0.1-4s_withmetrics-2432719.nsys-rep\")\n",
- "REPORT_FILE = os.path.abspath(\"/home/mclasca/Documents/BePPP/traces/xshells/nsys/xshells.par.medium-1N_withmetrics.nsys-rep\")\n",
+ "#REPORT_FILE = os.path.abspath(\"/home/mclasca/Documents/BePPP/traces/xshells/nsys/xshells.par.medium-1N_withmetrics.nsys-rep\")\n",
+ "REPORT_FILE = os.path.abspath(\"/home/mclasca/Documents/BePPP/traces/sod2d/nsight4_sod2d.nsys-rep\")\n",
"REPORT_DIR = os.path.dirname(REPORT_FILE)\n",
"#REPORT_NAME=\"heka-step53+accum1-profile-2023.4-5721957\"\n",
"#REPORT_NAME=\"heka-axolotl-Mistral7B0.1-profile-2110598\"\n",
@@ -60,11 +62,27 @@
"comm_tag_memory = 55002\n",
"comm_tag_dependency = 55003\n",
"\n",
+ "event_type_openacc = 66000000\n",
+ "event_type_openacc_data = 66000001\n",
+ "event_type_openacc_launch = 66000002\n",
+ "\n",
+ "event_type_name_openacc = 66100000\n",
+ "event_type_name_openacc_data = 66100001\n",
+ "event_type_name_openacc_launch = 66100002\n",
+ "\n",
+ "event_type_func_openacc = 66200000\n",
+ "event_type_func_openacc_data = 66200001\n",
+ "event_type_func_openacc_launch = 66200002\n",
+ "\n",
+ "event_type_openacc_data_size = 66300001\n",
+ "\n",
"nvtx_select_frames = True\n",
"nvtx_stack_top = 1\n",
"nvtx_stack_bottom = 4\n",
"\n",
- "reports = [\"nvtx_pushpop_trace\", \"cuda_api_trace\", \"cuda_gpu_trace\", \"mpi_event_trace\"]\n",
+ "t_openacc = True\n",
+ "\n",
+ "reports = [\"nvtx_pushpop_trace\", \"cuda_api_trace\", \"cuda_gpu_trace\"]\n",
"\n",
"def build_nsys_stats_name(report_name):\n",
" base_name = os.path.splitext(os.path.basename(REPORT_FILE))[0]\n",
@@ -112,7 +130,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 15,
"id": "6189c237-74d3-4880-a81c-f4d1e1c76230",
"metadata": {},
"outputs": [
@@ -163,9 +181,9 @@
"
\n",
" \n",
" 0 | \n",
- " 5052824755 | \n",
- " 5792 | \n",
- " 760 | \n",
+ " 1924937486 | \n",
+ " 1056 | \n",
+ " 213 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -175,21 +193,21 @@
" NaN | \n",
" ... | \n",
" NaN | \n",
- " 0,183 | \n",
- " 31553,006 | \n",
+ " 0,000 | \n",
+ " 30,303 | \n",
" Pageable | \n",
" Device | \n",
- " NVIDIA H100 (2) | \n",
+ " NVIDIA H100 (0) | \n",
" 1 | \n",
" NaN | \n",
- " 7 | \n",
+ " 13 | \n",
" [CUDA memcpy Host-to-Device] | \n",
"
\n",
" \n",
" 1 | \n",
- " 5052881747 | \n",
- " 5792 | \n",
- " 761 | \n",
+ " 1926678138 | \n",
+ " 1024 | \n",
+ " 211 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -199,21 +217,21 @@
" NaN | \n",
" ... | \n",
" NaN | \n",
- " 0,183 | \n",
- " 31553,006 | \n",
+ " 0,000 | \n",
+ " 31,250 | \n",
" Pageable | \n",
" Device | \n",
- " NVIDIA H100 (2) | \n",
+ " NVIDIA H100 (0) | \n",
" 1 | \n",
" NaN | \n",
- " 7 | \n",
+ " 13 | \n",
" [CUDA memcpy Host-to-Device] | \n",
"
\n",
" \n",
" 2 | \n",
- " 5054622386 | \n",
- " 5856 | \n",
- " 760 | \n",
+ " 1928094871 | \n",
+ " 992 | \n",
+ " 213 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -223,21 +241,21 @@
" NaN | \n",
" ... | \n",
" NaN | \n",
- " 0,183 | \n",
- " 31208,328 | \n",
+ " 0,000 | \n",
+ " 32,258 | \n",
" Pageable | \n",
" Device | \n",
- " NVIDIA H100 (1) | \n",
+ " NVIDIA H100 (0) | \n",
" 1 | \n",
" NaN | \n",
- " 7 | \n",
+ " 13 | \n",
" [CUDA memcpy Host-to-Device] | \n",
"
\n",
" \n",
" 3 | \n",
- " 5054666386 | \n",
- " 6528 | \n",
- " 761 | \n",
+ " 1932409317 | \n",
+ " 992 | \n",
+ " 211 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -247,21 +265,21 @@
" NaN | \n",
" ... | \n",
" NaN | \n",
- " 0,183 | \n",
- " 27995,661 | \n",
+ " 0,000 | \n",
+ " 32,258 | \n",
" Pageable | \n",
" Device | \n",
- " NVIDIA H100 (1) | \n",
+ " NVIDIA H100 (0) | \n",
" 1 | \n",
" NaN | \n",
- " 7 | \n",
+ " 13 | \n",
" [CUDA memcpy Host-to-Device] | \n",
"
\n",
" \n",
" 4 | \n",
- " 5063403468 | \n",
- " 6592 | \n",
- " 760 | \n",
+ " 1980006677 | \n",
+ " 896 | \n",
+ " 684 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -271,14 +289,14 @@
" NaN | \n",
" ... | \n",
" NaN | \n",
- " 0,183 | \n",
- " 27723,902 | \n",
+ " 0,000 | \n",
+ " 142,857 | \n",
" Pageable | \n",
" Device | \n",
- " NVIDIA H100 (3) | \n",
+ " NVIDIA H100 (0) | \n",
" 1 | \n",
" NaN | \n",
- " 7 | \n",
+ " 13 | \n",
" [CUDA memcpy Host-to-Device] | \n",
"
\n",
" \n",
@@ -306,10 +324,10 @@
" ... | \n",
"
\n",
" \n",
- " 163375 | \n",
- " 35129053256 | \n",
- " 3674711 | \n",
- " 290520 | \n",
+ " 56599 | \n",
+ " 38878898445 | \n",
+ " 2208 | \n",
+ " 198835 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -319,45 +337,45 @@
" NaN | \n",
" ... | \n",
" NaN | \n",
- " 190,070 | \n",
- " 51699,149 | \n",
+ " 0,000 | \n",
+ " 1,812 | \n",
" Device | \n",
- " Pinned | \n",
- " NVIDIA H100 (3) | \n",
+ " Pageable | \n",
+ " NVIDIA H100 (0) | \n",
" 1 | \n",
" NaN | \n",
- " 7 | \n",
+ " 13 | \n",
" [CUDA memcpy Device-to-Host] | \n",
"
\n",
" \n",
- " 163376 | \n",
- " 35131445786 | \n",
- " 3464975 | \n",
- " 291246 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
+ " 56600 | \n",
+ " 38878940548 | \n",
+ " 109920 | \n",
+ " 154364 | \n",
+ " 3.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 256.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 18.0 | \n",
+ " ... | \n",
+ " 0,001 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " ... | \n",
" NaN | \n",
- " 190,070 | \n",
- " 54740,275 | \n",
- " Device | \n",
- " Pinned | \n",
" NVIDIA H100 (0) | \n",
" 1 | \n",
" NaN | \n",
- " 7 | \n",
- " [CUDA memcpy Device-to-Host] | \n",
+ " 13 | \n",
+ " mod_time_ops_adapt_dt_cfl_32_gpu__red | \n",
"
\n",
" \n",
- " 163377 | \n",
- " 35131883569 | \n",
- " 3598606 | \n",
- " 668376 | \n",
+ " 56601 | \n",
+ " 38879061123 | \n",
+ " 2304 | \n",
+ " 154366 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -367,21 +385,21 @@
" NaN | \n",
" ... | \n",
" NaN | \n",
- " 190,070 | \n",
- " 52649,501 | \n",
+ " 0,000 | \n",
+ " 1,736 | \n",
" Device | \n",
- " Pinned | \n",
- " NVIDIA H100 (1) | \n",
+ " Pageable | \n",
+ " NVIDIA H100 (0) | \n",
" 1 | \n",
" NaN | \n",
- " 7 | \n",
+ " 13 | \n",
" [CUDA memcpy Device-to-Host] | \n",
"
\n",
" \n",
- " 163378 | \n",
- " 35132333093 | \n",
- " 3628153 | \n",
- " 651725 | \n",
+ " 56602 | \n",
+ " 38879071844 | \n",
+ " 2304 | \n",
+ " 154367 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -391,21 +409,21 @@
" NaN | \n",
" ... | \n",
" NaN | \n",
- " 190,070 | \n",
- " 52269,360 | \n",
+ " 0,000 | \n",
+ " 1,736 | \n",
" Device | \n",
- " Pinned | \n",
- " NVIDIA H100 (2) | \n",
+ " Pageable | \n",
+ " NVIDIA H100 (0) | \n",
" 1 | \n",
" NaN | \n",
- " 7 | \n",
+ " 13 | \n",
" [CUDA memcpy Device-to-Host] | \n",
"
\n",
" \n",
- " 163379 | \n",
- " 35132730143 | \n",
- " 3688279 | \n",
- " 290521 | \n",
+ " 56603 | \n",
+ " 38879082499 | \n",
+ " 2304 | \n",
+ " 154368 | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -415,78 +433,78 @@
" NaN | \n",
" ... | \n",
" NaN | \n",
- " 190,070 | \n",
- " 51509,078 | \n",
+ " 0,000 | \n",
+ " 1,736 | \n",
" Device | \n",
- " Pinned | \n",
- " NVIDIA H100 (3) | \n",
+ " Pageable | \n",
+ " NVIDIA H100 (0) | \n",
" 1 | \n",
" NaN | \n",
- " 7 | \n",
+ " 13 | \n",
" [CUDA memcpy Device-to-Host] | \n",
"
\n",
" \n",
"\n",
- "163380 rows × 21 columns
\n",
+ "56604 rows × 21 columns
\n",
""
],
"text/plain": [
- " Start (ns) Duration (ns) CorrID GrdX GrdY GrdZ BlkX BlkY \\\n",
- "0 5052824755 5792 760 NaN NaN NaN NaN NaN \n",
- "1 5052881747 5792 761 NaN NaN NaN NaN NaN \n",
- "2 5054622386 5856 760 NaN NaN NaN NaN NaN \n",
- "3 5054666386 6528 761 NaN NaN NaN NaN NaN \n",
- "4 5063403468 6592 760 NaN NaN NaN NaN NaN \n",
- "... ... ... ... ... ... ... ... ... \n",
- "163375 35129053256 3674711 290520 NaN NaN NaN NaN NaN \n",
- "163376 35131445786 3464975 291246 NaN NaN NaN NaN NaN \n",
- "163377 35131883569 3598606 668376 NaN NaN NaN NaN NaN \n",
- "163378 35132333093 3628153 651725 NaN NaN NaN NaN NaN \n",
- "163379 35132730143 3688279 290521 NaN NaN NaN NaN NaN \n",
+ " Start (ns) Duration (ns) CorrID GrdX GrdY GrdZ BlkX BlkY \\\n",
+ "0 1924937486 1056 213 NaN NaN NaN NaN NaN \n",
+ "1 1926678138 1024 211 NaN NaN NaN NaN NaN \n",
+ "2 1928094871 992 213 NaN NaN NaN NaN NaN \n",
+ "3 1932409317 992 211 NaN NaN NaN NaN NaN \n",
+ "4 1980006677 896 684 NaN NaN NaN NaN NaN \n",
+ "... ... ... ... ... ... ... ... ... \n",
+ "56599 38878898445 2208 198835 NaN NaN NaN NaN NaN \n",
+ "56600 38878940548 109920 154364 3.0 1.0 1.0 256.0 1.0 \n",
+ "56601 38879061123 2304 154366 NaN NaN NaN NaN NaN \n",
+ "56602 38879071844 2304 154367 NaN NaN NaN NaN NaN \n",
+ "56603 38879082499 2304 154368 NaN NaN NaN NaN NaN \n",
"\n",
- " BlkZ Reg/Trd ... DymSMem (MB) Bytes (MB) Throughput (MB/s) \\\n",
- "0 NaN NaN ... NaN 0,183 31553,006 \n",
- "1 NaN NaN ... NaN 0,183 31553,006 \n",
- "2 NaN NaN ... NaN 0,183 31208,328 \n",
- "3 NaN NaN ... NaN 0,183 27995,661 \n",
- "4 NaN NaN ... NaN 0,183 27723,902 \n",
- "... ... ... ... ... ... ... \n",
- "163375 NaN NaN ... NaN 190,070 51699,149 \n",
- "163376 NaN NaN ... NaN 190,070 54740,275 \n",
- "163377 NaN NaN ... NaN 190,070 52649,501 \n",
- "163378 NaN NaN ... NaN 190,070 52269,360 \n",
- "163379 NaN NaN ... NaN 190,070 51509,078 \n",
+ " BlkZ Reg/Trd ... DymSMem (MB) Bytes (MB) Throughput (MB/s) SrcMemKd \\\n",
+ "0 NaN NaN ... NaN 0,000 30,303 Pageable \n",
+ "1 NaN NaN ... NaN 0,000 31,250 Pageable \n",
+ "2 NaN NaN ... NaN 0,000 32,258 Pageable \n",
+ "3 NaN NaN ... NaN 0,000 32,258 Pageable \n",
+ "4 NaN NaN ... NaN 0,000 142,857 Pageable \n",
+ "... ... ... ... ... ... ... ... \n",
+ "56599 NaN NaN ... NaN 0,000 1,812 Device \n",
+ "56600 1.0 18.0 ... 0,001 NaN NaN NaN \n",
+ "56601 NaN NaN ... NaN 0,000 1,736 Device \n",
+ "56602 NaN NaN ... NaN 0,000 1,736 Device \n",
+ "56603 NaN NaN ... NaN 0,000 1,736 Device \n",
"\n",
- " SrcMemKd DstMemKd Device Ctx GreenCtx Strm \\\n",
- "0 Pageable Device NVIDIA H100 (2) 1 NaN 7 \n",
- "1 Pageable Device NVIDIA H100 (2) 1 NaN 7 \n",
- "2 Pageable Device NVIDIA H100 (1) 1 NaN 7 \n",
- "3 Pageable Device NVIDIA H100 (1) 1 NaN 7 \n",
- "4 Pageable Device NVIDIA H100 (3) 1 NaN 7 \n",
- "... ... ... ... .. ... ... \n",
- "163375 Device Pinned NVIDIA H100 (3) 1 NaN 7 \n",
- "163376 Device Pinned NVIDIA H100 (0) 1 NaN 7 \n",
- "163377 Device Pinned NVIDIA H100 (1) 1 NaN 7 \n",
- "163378 Device Pinned NVIDIA H100 (2) 1 NaN 7 \n",
- "163379 Device Pinned NVIDIA H100 (3) 1 NaN 7 \n",
+ " DstMemKd Device Ctx GreenCtx Strm \\\n",
+ "0 Device NVIDIA H100 (0) 1 NaN 13 \n",
+ "1 Device NVIDIA H100 (0) 1 NaN 13 \n",
+ "2 Device NVIDIA H100 (0) 1 NaN 13 \n",
+ "3 Device NVIDIA H100 (0) 1 NaN 13 \n",
+ "4 Device NVIDIA H100 (0) 1 NaN 13 \n",
+ "... ... ... .. ... ... \n",
+ "56599 Pageable NVIDIA H100 (0) 1 NaN 13 \n",
+ "56600 NaN NVIDIA H100 (0) 1 NaN 13 \n",
+ "56601 Pageable NVIDIA H100 (0) 1 NaN 13 \n",
+ "56602 Pageable NVIDIA H100 (0) 1 NaN 13 \n",
+ "56603 Pageable NVIDIA H100 (0) 1 NaN 13 \n",
"\n",
- " Name \n",
- "0 [CUDA memcpy Host-to-Device] \n",
- "1 [CUDA memcpy Host-to-Device] \n",
- "2 [CUDA memcpy Host-to-Device] \n",
- "3 [CUDA memcpy Host-to-Device] \n",
- "4 [CUDA memcpy Host-to-Device] \n",
- "... ... \n",
- "163375 [CUDA memcpy Device-to-Host] \n",
- "163376 [CUDA memcpy Device-to-Host] \n",
- "163377 [CUDA memcpy Device-to-Host] \n",
- "163378 [CUDA memcpy Device-to-Host] \n",
- "163379 [CUDA memcpy Device-to-Host] \n",
+ " Name \n",
+ "0 [CUDA memcpy Host-to-Device] \n",
+ "1 [CUDA memcpy Host-to-Device] \n",
+ "2 [CUDA memcpy Host-to-Device] \n",
+ "3 [CUDA memcpy Host-to-Device] \n",
+ "4 [CUDA memcpy Host-to-Device] \n",
+ "... ... \n",
+ "56599 [CUDA memcpy Device-to-Host] \n",
+ "56600 mod_time_ops_adapt_dt_cfl_32_gpu__red \n",
+ "56601 [CUDA memcpy Device-to-Host] \n",
+ "56602 [CUDA memcpy Device-to-Host] \n",
+ "56603 [CUDA memcpy Device-to-Host] \n",
"\n",
- "[163380 rows x 21 columns]"
+ "[56604 rows x 21 columns]"
]
},
- "execution_count": 4,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@@ -499,7 +517,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 6,
"id": "5cc1c0a6-8631-44f4-8dcb-66f59ef78c17",
"metadata": {},
"outputs": [
@@ -538,85 +556,85 @@
" \n",
" \n",
" 0 | \n",
- " 3298388027 | \n",
- " 2015 | \n",
+ " 1175970387 | \n",
+ " 4550 | \n",
" cuModuleGetLoadingMode | \n",
" 0 | \n",
" 1 | \n",
- " 413821 | \n",
- " 413821 | \n",
+ " 585611 | \n",
+ " 585611 | \n",
" 0 | \n",
- " MPI Rank 1 | \n",
+ " MPI Rank 0 | \n",
"
\n",
" \n",
" 1 | \n",
- " 3466891507 | \n",
- " 1785 | \n",
- " cuModuleGetLoadingMode | \n",
+ " 1176034134 | \n",
+ " 271 | \n",
+ " cuDeviceGetCount | \n",
" 0 | \n",
- " 1 | \n",
- " 413818 | \n",
- " 413818 | \n",
+ " 2 | \n",
+ " 585611 | \n",
+ " 585611 | \n",
" 0 | \n",
- " MPI Rank 2 | \n",
+ " MPI Rank 0 | \n",
"
\n",
" \n",
" 2 | \n",
- " 3467659248 | \n",
- " 1680 | \n",
- " cuModuleGetLoadingMode | \n",
+ " 1176034842 | \n",
+ " 142 | \n",
+ " cuDeviceGet | \n",
" 0 | \n",
- " 1 | \n",
- " 413820 | \n",
- " 413820 | \n",
+ " 3 | \n",
+ " 585611 | \n",
+ " 585611 | \n",
" 0 | \n",
" MPI Rank 0 | \n",
"
\n",
" \n",
" 3 | \n",
- " 3467739356 | \n",
- " 1547 | \n",
- " cuModuleGetLoadingMode | \n",
+ " 1176035727 | \n",
+ " 9146 | \n",
+ " cuDeviceGetName | \n",
" 0 | \n",
- " 1 | \n",
- " 413819 | \n",
- " 413819 | \n",
+ " 4 | \n",
+ " 585611 | \n",
+ " 585611 | \n",
" 0 | \n",
- " MPI Rank 3 | \n",
+ " MPI Rank 0 | \n",
"
\n",
" \n",
" 4 | \n",
- " 4811123712 | \n",
- " 100325 | \n",
- " cudaStreamCreateWithPriority | \n",
+ " 1176045240 | \n",
+ " 6333 | \n",
+ " cuDeviceTotalMem_v2 | \n",
" 0 | \n",
- " 745 | \n",
- " 413818 | \n",
- " 413818 | \n",
+ " 5 | \n",
+ " 585611 | \n",
+ " 585611 | \n",
" 0 | \n",
- " MPI Rank 2 | \n",
+ " MPI Rank 0 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " Start (ns) Duration (ns) Name Result CorrID \\\n",
- "0 3298388027 2015 cuModuleGetLoadingMode 0 1 \n",
- "1 3466891507 1785 cuModuleGetLoadingMode 0 1 \n",
- "2 3467659248 1680 cuModuleGetLoadingMode 0 1 \n",
- "3 3467739356 1547 cuModuleGetLoadingMode 0 1 \n",
- "4 4811123712 100325 cudaStreamCreateWithPriority 0 745 \n",
+ " Start (ns) Duration (ns) Name Result CorrID Pid \\\n",
+ "0 1175970387 4550 cuModuleGetLoadingMode 0 1 585611 \n",
+ "1 1176034134 271 cuDeviceGetCount 0 2 585611 \n",
+ "2 1176034842 142 cuDeviceGet 0 3 585611 \n",
+ "3 1176035727 9146 cuDeviceGetName 0 4 585611 \n",
+ "4 1176045240 6333 cuDeviceTotalMem_v2 0 5 585611 \n",
"\n",
- " Pid Tid T-Pri Thread Name \n",
- "0 413821 413821 0 MPI Rank 1 \n",
- "1 413818 413818 0 MPI Rank 2 \n",
- "2 413820 413820 0 MPI Rank 0 \n",
- "3 413819 413819 0 MPI Rank 3 \n",
- "4 413818 413818 0 MPI Rank 2 "
+ " Tid T-Pri Thread Name \n",
+ "0 585611 0 MPI Rank 0 \n",
+ "1 585611 0 MPI Rank 0 \n",
+ "2 585611 0 MPI Rank 0 \n",
+ "3 585611 0 MPI Rank 0 \n",
+ "4 585611 0 MPI Rank 0 "
]
},
- "execution_count": 5,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -628,7 +646,7 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 7,
"id": "05d0fa22-45f5-405b-8fe7-6a822b4ea64e",
"metadata": {},
"outputs": [
@@ -672,117 +690,117 @@
" \n",
" \n",
" 0 | \n",
- " 5614893 | \n",
- " 23874249117 | \n",
- " 23868634224 | \n",
- " 23868595137 | \n",
- " 39087 | \n",
- " gradient_range | \n",
- " 1336373 | \n",
- " 1336373 | \n",
+ " 1927980172 | \n",
+ " 8059828975 | \n",
+ " 6131848803 | \n",
" 0 | \n",
- " 1 | \n",
- " 27 | \n",
+ " 6131848803 | \n",
+ " Open mesh | \n",
+ " 585613 | \n",
+ " 585613 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 9 | \n",
" NaN | \n",
- " :27 | \n",
- " gradient_range | \n",
+ " :9 | \n",
+ " Open mesh | \n",
"
\n",
" \n",
" 1 | \n",
- " 5632294 | \n",
- " 23874227431 | \n",
- " 23868595137 | \n",
- " 23868427904 | \n",
- " 167233 | \n",
- " step10 | \n",
- " 1336373 | \n",
- " 1336373 | \n",
- " 1 | \n",
- " 1 | \n",
- " 28 | \n",
- " 27.0 | \n",
- " :27:28 | \n",
- " --step10 | \n",
+ " 1929174557 | \n",
+ " 8059831699 | \n",
+ " 6130657142 | \n",
+ " 0 | \n",
+ " 6130657142 | \n",
+ " Open mesh | \n",
+ " 585614 | \n",
+ " 585614 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 10 | \n",
+ " NaN | \n",
+ " :10 | \n",
+ " Open mesh | \n",
"
\n",
" \n",
" 2 | \n",
- " 5767713 | \n",
- " 23874195617 | \n",
- " 23868427904 | \n",
- " 23649727718 | \n",
- " 218700186 | \n",
- " gradient_range | \n",
- " 1336373 | \n",
- " 1336373 | \n",
- " 2 | \n",
- " 21 | \n",
- " 29 | \n",
- " 28.0 | \n",
- " :27:28:29 | \n",
- " ----gradient_range | \n",
+ " 1929371724 | \n",
+ " 8059831396 | \n",
+ " 6130459672 | \n",
+ " 0 | \n",
+ " 6130459672 | \n",
+ " Open mesh | \n",
+ " 585611 | \n",
+ " 585611 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 11 | \n",
+ " NaN | \n",
+ " :11 | \n",
+ " Open mesh | \n",
"
\n",
" \n",
" 3 | \n",
- " 5768678 | \n",
- " 12280877661 | \n",
- " 12275108983 | \n",
- " 12275047299 | \n",
- " 61684 | \n",
- " step10 | \n",
- " 1336373 | \n",
- " 1336373 | \n",
- " 3 | \n",
- " 1 | \n",
- " 30 | \n",
- " 29.0 | \n",
- " :27:28:29:30 | \n",
- " ------step10 | \n",
+ " 1933533501 | \n",
+ " 8059836496 | \n",
+ " 6126302995 | \n",
+ " 0 | \n",
+ " 6126302995 | \n",
+ " Open mesh | \n",
+ " 585612 | \n",
+ " 585612 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 12 | \n",
+ " NaN | \n",
+ " :12 | \n",
+ " Open mesh | \n",
"
\n",
" \n",
" 4 | \n",
- " 5807903 | \n",
- " 12280855202 | \n",
- " 12275047299 | \n",
- " 12275011872 | \n",
- " 35427 | \n",
- " gradient_range | \n",
- " 1336373 | \n",
- " 1336373 | \n",
- " 4 | \n",
- " 1 | \n",
- " 31 | \n",
- " 30.0 | \n",
- " :27:28:29:30:31 | \n",
- " --------gradient_range | \n",
+ " 8062014698 | \n",
+ " 8064261693 | \n",
+ " 2246995 | \n",
+ " 0 | \n",
+ " 2246995 | \n",
+ " Gaussian Quadrature | \n",
+ " 585614 | \n",
+ " 585614 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 13 | \n",
+ " NaN | \n",
+ " :13 | \n",
+ " Gaussian Quadrature | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " Start (ns) End (ns) Duration (ns) DurChild (ns) DurNonChild (ns) \\\n",
- "0 5614893 23874249117 23868634224 23868595137 39087 \n",
- "1 5632294 23874227431 23868595137 23868427904 167233 \n",
- "2 5767713 23874195617 23868427904 23649727718 218700186 \n",
- "3 5768678 12280877661 12275108983 12275047299 61684 \n",
- "4 5807903 12280855202 12275047299 12275011872 35427 \n",
+ " Start (ns) End (ns) Duration (ns) DurChild (ns) DurNonChild (ns) \\\n",
+ "0 1927980172 8059828975 6131848803 0 6131848803 \n",
+ "1 1929174557 8059831699 6130657142 0 6130657142 \n",
+ "2 1929371724 8059831396 6130459672 0 6130459672 \n",
+ "3 1933533501 8059836496 6126302995 0 6126302995 \n",
+ "4 8062014698 8064261693 2246995 0 2246995 \n",
"\n",
- " Name PID TID Lvl NumChild RangeId ParentId \\\n",
- "0 gradient_range 1336373 1336373 0 1 27 NaN \n",
- "1 step10 1336373 1336373 1 1 28 27.0 \n",
- "2 gradient_range 1336373 1336373 2 21 29 28.0 \n",
- "3 step10 1336373 1336373 3 1 30 29.0 \n",
- "4 gradient_range 1336373 1336373 4 1 31 30.0 \n",
+ " Name PID TID Lvl NumChild RangeId ParentId \\\n",
+ "0 Open mesh 585613 585613 0 0 9 NaN \n",
+ "1 Open mesh 585614 585614 0 0 10 NaN \n",
+ "2 Open mesh 585611 585611 0 0 11 NaN \n",
+ "3 Open mesh 585612 585612 0 0 12 NaN \n",
+ "4 Gaussian Quadrature 585614 585614 0 0 13 NaN \n",
"\n",
- " RangeStack NameTree \n",
- "0 :27 gradient_range \n",
- "1 :27:28 --step10 \n",
- "2 :27:28:29 ----gradient_range \n",
- "3 :27:28:29:30 ------step10 \n",
- "4 :27:28:29:30:31 --------gradient_range "
+ " RangeStack NameTree \n",
+ "0 :9 Open mesh \n",
+ "1 :10 Open mesh \n",
+ "2 :11 Open mesh \n",
+ "3 :12 Open mesh \n",
+ "4 :13 Gaussian Quadrature "
]
},
- "execution_count": 31,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -1035,6 +1053,105 @@
"mpi_df.iloc[30:40,]"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "27573ad8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sql_openacc_other = \"\"\"\n",
+ "SELECT\n",
+ " CASE\n",
+ " WHEN srcFile NOT NULL\n",
+ " THEN nameIds.value || '@' || srcFileIds.value || ':' || lineNo\n",
+ " ELSE nameIds.value\n",
+ " END AS name,\n",
+ " start,\n",
+ " end,\n",
+ " eventKind,\n",
+ "\tfuncIds.value as func,\n",
+ " globalTid / 0x1000000 % 0x1000000 AS Pid, globalTid % 0x1000000 AS Tid\n",
+ "FROM\n",
+ " CUPTI_ACTIVITY_KIND_OPENACC_OTHER\n",
+ "LEFT JOIN\n",
+ " StringIds AS srcFileIds\n",
+ " ON srcFileIds.id == srcFile\n",
+ "LEFT JOIN\n",
+ " StringIds AS nameIds\n",
+ " ON nameIds.id == nameId\n",
+ "LEFT JOIN\n",
+ "\tStringIds AS funcIds\n",
+ "\tON funcIds.id == funcName\n",
+ "\"\"\"\n",
+ "\n",
+ "sql_openacc_launch = \"\"\"\n",
+ "SELECT\n",
+ " CASE\n",
+ " WHEN srcFile NOT NULL\n",
+ " THEN nameIds.value || '@' || srcFileIds.value || ':' || lineNo\n",
+ " ELSE nameIds.value\n",
+ " END AS name,\n",
+ " start,\n",
+ " end,\n",
+ " eventKind,\n",
+ " kernelIds.value as kernelName,\n",
+ "\tfuncIds.value as func,\n",
+ " globalTid / 0x1000000 % 0x1000000 AS Pid, globalTid % 0x1000000 AS Tid\n",
+ "FROM\n",
+ " CUPTI_ACTIVITY_KIND_OPENACC_LAUNCH\n",
+ "LEFT JOIN\n",
+ " StringIds AS srcFileIds\n",
+ " ON srcFileIds.id == srcFile\n",
+ "LEFT JOIN\n",
+ " StringIds AS nameIds\n",
+ " ON nameIds.id == nameId\n",
+ "LEFT JOIN\n",
+ "\tStringIds AS funcIds\n",
+ "\tON funcIds.id == funcName\n",
+ "LEFT JOIN\n",
+ "\tStringIds AS kernelIds\n",
+ "\tON kernelIds.id == kernelName\n",
+ "\"\"\"\n",
+ "\n",
+ "sql_openacc_data = \"\"\"\n",
+ "SELECT\n",
+ " CASE\n",
+ " WHEN srcFile NOT NULL\n",
+ " THEN nameIds.value || '@' || srcFileIds.value || ':' || lineNo\n",
+ " ELSE nameIds.value\n",
+ " END AS name,\n",
+ " start,\n",
+ " end,\n",
+ " eventKind,\n",
+ " varIds.value as variableName,\n",
+ "\tfuncIds.value as func,\n",
+ "\tbytes,\n",
+ " globalTid / 0x1000000 % 0x1000000 AS Pid, globalTid % 0x1000000 AS Tid\n",
+ "FROM\n",
+ " CUPTI_ACTIVITY_KIND_OPENACC_DATA\n",
+ "LEFT JOIN\n",
+ " StringIds AS srcFileIds\n",
+ " ON srcFileIds.id == srcFile\n",
+ "LEFT JOIN\n",
+ " StringIds AS nameIds\n",
+ " ON nameIds.id == nameId\n",
+ "LEFT JOIN\n",
+ "\tStringIds AS funcIds\n",
+ "\tON funcIds.id == funcName\n",
+ "LEFT JOIN\n",
+ "\tStringIds AS varIds\n",
+ "\tON varIds.id == varName\n",
+ "\"\"\"\n",
+ "\n",
+ "engine = create_engine(f\"sqlite:///{os.path.splitext(REPORT_FILE)[0]}.sqlite\")\n",
+ "with engine.connect() as conn, conn.begin():\n",
+ " openacc_other_df = pd.read_sql_query(sql_openacc_other, conn)\n",
+ " openacc_launch_df = pd.read_sql_query(sql_openacc_launch, conn)\n",
+ " openacc_data_df = pd.read_sql_query(sql_openacc_data, conn)\n",
+ " openacc_event_kind = pd.read_sql_table(\"ENUM_OPENACC_EVENT_KIND\", conn)"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 9,
@@ -1074,65 +1191,65 @@
" \n",
" \n",
" \n",
- " 3 | \n",
+ " 0 | \n",
" 7 | \n",
" 0 | \n",
" 1 | \n",
- " 413818 | \n",
- " 2 | \n",
+ " 585611 | \n",
+ " 0 | \n",
" 1 | \n",
- " None | \n",
- " None | \n",
+ " 0 | \n",
+ " 0 | \n",
"
\n",
" \n",
- " 1 | \n",
+ " 2 | \n",
" 7 | \n",
" 0 | \n",
" 1 | \n",
- " 413819 | \n",
- " 3 | \n",
+ " 585612 | \n",
+ " 0 | \n",
" 1 | \n",
- " None | \n",
- " None | \n",
+ " 0 | \n",
+ " 0 | \n",
"
\n",
" \n",
- " 0 | \n",
+ " 3 | \n",
" 7 | \n",
" 0 | \n",
" 1 | \n",
- " 413820 | \n",
+ " 585613 | \n",
" 0 | \n",
" 1 | \n",
- " None | \n",
- " None | \n",
+ " 0 | \n",
+ " 0 | \n",
"
\n",
" \n",
- " 2 | \n",
+ " 1 | \n",
" 7 | \n",
" 0 | \n",
" 1 | \n",
- " 413821 | \n",
- " 1 | \n",
+ " 585614 | \n",
+ " 0 | \n",
" 1 | \n",
- " None | \n",
- " None | \n",
+ " 0 | \n",
+ " 0 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " nullStreamId hwId vmId processId deviceId contextId parentContextId \\\n",
- "3 7 0 1 413818 2 1 None \n",
- "1 7 0 1 413819 3 1 None \n",
- "0 7 0 1 413820 0 1 None \n",
- "2 7 0 1 413821 1 1 None \n",
+ " nullStreamId hwId vmId processId deviceId contextId parentContextId \\\n",
+ "0 7 0 1 585611 0 1 0 \n",
+ "2 7 0 1 585612 0 1 0 \n",
+ "3 7 0 1 585613 0 1 0 \n",
+ "1 7 0 1 585614 0 1 0 \n",
"\n",
- " isGreenContext \n",
- "3 None \n",
- "1 None \n",
- "0 None \n",
- "2 None "
+ " isGreenContext \n",
+ "0 0 \n",
+ "2 0 \n",
+ "3 0 \n",
+ "1 0 "
]
},
"execution_count": 9,
@@ -1151,7 +1268,20 @@
"execution_count": 10,
"id": "90cbe0f8",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "ename": "ValueError",
+ "evalue": "Table GPU_METRICS not found",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[0;32mIn[10], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m gpu_metrics \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_sql_table\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mGPU_METRICS\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msqlite:///\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msplitext\u001b[49m\u001b[43m(\u001b[49m\u001b[43mREPORT_FILE\u001b[49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.sqlite\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2\u001b[0m metrics_description \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_sql_table(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTARGET_INFO_GPU_METRICS\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msqlite:///\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mos\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39msplitext(REPORT_FILE)[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.sqlite\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+ "File \u001b[0;32m~/Documents/BePPP/heka/tooling/env/lib/python3.10/site-packages/pandas/io/sql.py:386\u001b[0m, in \u001b[0;36mread_sql_table\u001b[0;34m(table_name, con, schema, index_col, coerce_float, parse_dates, columns, chunksize, dtype_backend)\u001b[0m\n\u001b[1;32m 384\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m pandasSQL_builder(con, schema\u001b[38;5;241m=\u001b[39mschema, need_transaction\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m pandas_sql:\n\u001b[1;32m 385\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m pandas_sql\u001b[38;5;241m.\u001b[39mhas_table(table_name):\n\u001b[0;32m--> 386\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTable \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtable_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not found\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 388\u001b[0m table \u001b[38;5;241m=\u001b[39m pandas_sql\u001b[38;5;241m.\u001b[39mread_table(\n\u001b[1;32m 389\u001b[0m table_name,\n\u001b[1;32m 390\u001b[0m index_col\u001b[38;5;241m=\u001b[39mindex_col,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 395\u001b[0m dtype_backend\u001b[38;5;241m=\u001b[39mdtype_backend,\n\u001b[1;32m 396\u001b[0m )\n\u001b[1;32m 398\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m table \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
+ "\u001b[0;31mValueError\u001b[0m: Table GPU_METRICS not found"
+ ]
+ }
+ ],
"source": [
"gpu_metrics = pd.read_sql_table(\"GPU_METRICS\", f\"sqlite:///{os.path.splitext(REPORT_FILE)[0]}.sqlite\")\n",
"metrics_description = pd.read_sql_table(\"TARGET_INFO_GPU_METRICS\", f\"sqlite:///{os.path.splitext(REPORT_FILE)[0]}.sqlite\")"
@@ -1709,7 +1839,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 11,
"id": "69bf520f-cbe4-4c34-b100-7b1b6ede4f17",
"metadata": {},
"outputs": [
@@ -1717,7 +1847,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Unique processes: [413821 413818 413820 413819], and unique threads: [413821 413818 413820 413819]\n"
+ "Unique processes: [585611 585612 585613 585614], and unique threads: [585611 585612 585613 585614]\n"
]
}
],
@@ -1837,7 +1967,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 12,
"id": "5664ba75-7e11-42c6-867b-7ea1a7acd7b7",
"metadata": {},
"outputs": [
@@ -1871,50 +2001,50 @@
" \n",
" \n",
" \n",
- " 1 | \n",
- " 413818 | \n",
- " 413818 | \n",
+ " 0 | \n",
+ " 585611 | \n",
+ " 585611 | \n",
" 1 | \n",
" 1 | \n",
- " 2 | \n",
+ " 0 | \n",
"
\n",
" \n",
- " 3 | \n",
- " 413819 | \n",
- " 413819 | \n",
+ " 146 | \n",
+ " 585612 | \n",
+ " 585612 | \n",
" 1 | \n",
" 2 | \n",
- " 3 | \n",
+ " 0 | \n",
"
\n",
" \n",
- " 2 | \n",
- " 413820 | \n",
- " 413820 | \n",
+ " 292 | \n",
+ " 585613 | \n",
+ " 585613 | \n",
" 1 | \n",
" 3 | \n",
" 0 | \n",
"
\n",
" \n",
- " 0 | \n",
- " 413821 | \n",
- " 413821 | \n",
+ " 421 | \n",
+ " 585614 | \n",
+ " 585614 | \n",
" 1 | \n",
" 4 | \n",
- " 1 | \n",
+ " 0 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " Pid Tid thread task device\n",
- "1 413818 413818 1 1 2\n",
- "3 413819 413819 1 2 3\n",
- "2 413820 413820 1 3 0\n",
- "0 413821 413821 1 4 1"
+ " Pid Tid thread task device\n",
+ "0 585611 585611 1 1 0\n",
+ "146 585612 585612 1 2 0\n",
+ "292 585613 585613 1 3 0\n",
+ "421 585614 585614 1 4 0"
]
},
- "execution_count": 19,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@@ -1934,7 +2064,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 13,
"id": "8f6ca36c",
"metadata": {},
"outputs": [
@@ -1975,31 +2105,31 @@
" \n",
" \n",
" 1 | \n",
- " 413818 | \n",
- " {413818} | \n",
+ " 585611 | \n",
+ " {585611} | \n",
" 1 | \n",
- " 2 | \n",
+ " 0 | \n",
"
\n",
" \n",
" 2 | \n",
- " 413819 | \n",
- " {413819} | \n",
+ " 585612 | \n",
+ " {585612} | \n",
" 1 | \n",
- " 3 | \n",
+ " 0 | \n",
"
\n",
" \n",
" 3 | \n",
- " 413820 | \n",
- " {413820} | \n",
+ " 585613 | \n",
+ " {585613} | \n",
" 1 | \n",
" 0 | \n",
"
\n",
" \n",
" 4 | \n",
- " 413821 | \n",
- " {413821} | \n",
- " 1 | \n",
+ " 585614 | \n",
+ " {585614} | \n",
" 1 | \n",
+ " 0 | \n",
"
\n",
" \n",
"\n",
@@ -2008,13 +2138,13 @@
"text/plain": [
" Pid Tid thread device\n",
"task \n",
- "1 413818 {413818} 1 2\n",
- "2 413819 {413819} 1 3\n",
- "3 413820 {413820} 1 0\n",
- "4 413821 {413821} 1 1"
+ "1 585611 {585611} 1 0\n",
+ "2 585612 {585612} 1 0\n",
+ "3 585613 {585613} 1 0\n",
+ "4 585614 {585614} 1 0"
]
},
- "execution_count": 20,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@@ -2038,6 +2168,12 @@
"cuda_api_df[\"task\"] = 0\n",
"#nvtx_df[\"thread\"] = 0\n",
"#nvtx_df[\"task\"] = 0\n",
+ "openacc_other_df[\"thread\"] = 0\n",
+ "openacc_other_df[\"task\"] = 0\n",
+ "openacc_launch_df[\"thread\"] = 0\n",
+ "openacc_launch_df[\"task\"] = 0\n",
+ "openacc_data_df[\"thread\"] = 0\n",
+ "openacc_data_df[\"task\"] = 0\n",
"\n",
"threads['row_name'] = \"THREAD 1.\" + threads['task'].astype(str) + '.' + threads['thread'].astype(str)\n",
"\n",
@@ -2048,6 +2184,22 @@
"#nvtx_df[\"task\"] = nvtx_df[\"Tid\"].map(threads.set_index('Tid')[\"task\"])\n"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "dca9eb26",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "if t_openacc:\n",
+ " openacc_other_df[\"thread\"] = openacc_other_df[\"Tid\"].map(threads.set_index('Tid')[\"thread\"])\n",
+ " openacc_other_df[\"task\"] = openacc_other_df[\"Tid\"].map(threads.set_index('Tid')[\"task\"])\n",
+ " openacc_launch_df[\"thread\"] = openacc_launch_df[\"Tid\"].map(threads.set_index('Tid')[\"thread\"])\n",
+ " openacc_launch_df[\"task\"] = openacc_launch_df[\"Tid\"].map(threads.set_index('Tid')[\"task\"])\n",
+ " openacc_data_df[\"thread\"] = openacc_data_df[\"Tid\"].map(threads.set_index('Tid')[\"thread\"])\n",
+ " openacc_data_df[\"task\"] = openacc_data_df[\"Tid\"].map(threads.set_index('Tid')[\"task\"])"
+ ]
+ },
{
"cell_type": "markdown",
"id": "eac096f4",
@@ -3776,6 +3928,44 @@
"ranges_names.sort_values(\"event_value\", inplace=True)"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "43e05db4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "if t_openacc:\n",
+ " openacc_event_kind[\"id\"] += 1\n",
+ " openacc_launch_df[\"eventKind\"] += 1\n",
+ " openacc_data_df[\"eventKind\"] += 1\n",
+ " openacc_other_df[\"eventKind\"] += 1\n",
+ "\n",
+ " openacc_data_df[\"name_value\"] = openacc_data_df.groupby([\"name\"], dropna=False).ngroup() + 1\n",
+ " openacc_full_data_names = openacc_data_df[['name_value', 'name']].drop_duplicates()\n",
+ " openacc_full_data_names.sort_values([\"name_value\"], inplace=True)\n",
+ "\n",
+ " openacc_launch_df[\"name_value\"] = openacc_launch_df.groupby([\"name\"], dropna=False).ngroup() + 1 + openacc_full_data_names.count().iloc[0]\n",
+ " openacc_full_launch_names = openacc_launch_df[['name_value', 'name']].drop_duplicates()\n",
+ " openacc_full_launch_names.sort_values([\"name_value\"], inplace=True)\n",
+ "\n",
+ " openacc_other_df[\"name_value\"] = openacc_other_df.groupby([\"name\"], dropna=False).ngroup() + 1 + openacc_full_data_names.count().iloc[0] + openacc_full_launch_names.count().iloc[0]\n",
+ " openacc_full_other_names = openacc_other_df[['name_value', 'name']].drop_duplicates()\n",
+ " openacc_full_other_names.sort_values([\"name_value\"], inplace=True)\n",
+ "\n",
+ " openacc_data_df[\"func_value\"] = openacc_data_df.groupby([\"func\"], dropna=False).ngroup() + 1\n",
+ " openacc_full_data_funcs = openacc_data_df[['func_value', 'func']].drop_duplicates()\n",
+ " openacc_full_data_funcs.sort_values([\"func_value\"], inplace=True)\n",
+ "\n",
+ " openacc_launch_df[\"func_value\"] = openacc_launch_df.groupby([\"func\"], dropna=False).ngroup() + 1 + openacc_full_data_funcs.count().iloc[0]\n",
+ " openacc_full_launch_funcs = openacc_launch_df[['func_value', 'func']].drop_duplicates()\n",
+ " openacc_full_launch_funcs.sort_values([\"func_value\"], inplace=True)\n",
+ "\n",
+ " openacc_other_df[\"func_value\"] = openacc_other_df.groupby([\"func\"], dropna=False).ngroup() + 1 + openacc_full_data_funcs.count().iloc[0] + openacc_full_launch_funcs.count().iloc[0]\n",
+ " openacc_full_other_funcs = openacc_other_df[['func_value', 'func']].drop_duplicates()\n",
+ " openacc_full_other_funcs.sort_values([\"func_value\"], inplace=True)"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 90,
@@ -3875,6 +4065,79 @@
" pcf_file.write(\"{} {}\\n\".format(row[\"event_value\"], row[\"Name\"]))\n",
"pcf_file.write(\"\\n\")\n",
"\n",
+ "if t_openacc:\n",
+ " pcf_file.write(\"EVENT_TYPE\\n\")\n",
+ " pcf_file.write(\"0 {} OpenACC Data Events\\n\".format(event_type_openacc_data))\n",
+ " pcf_file.write(\"VALUES\\n\")\n",
+ " pcf_file.write(\"0 End\\n\")\n",
+ " for index, row in openacc_event_kind.iterrows():\n",
+ " pcf_file.write(\"{} {}\\n\".format(row[\"event_kind\"], row[\"label\"]))\n",
+ " pcf_file.write(\"\\n\")\n",
+ "\n",
+ " pcf_file.write(\"EVENT_TYPE\\n\")\n",
+ " pcf_file.write(\"0 {} OpenACC Launch Events\\n\".format(event_type_openacc_launch))\n",
+ " pcf_file.write(\"VALUES\\n\")\n",
+ " pcf_file.write(\"0 End\\n\")\n",
+ " for index, row in openacc_event_kind.iterrows():\n",
+ " pcf_file.write(\"{} {}\\n\".format(row[\"event_kind\"], row[\"label\"]))\n",
+ " pcf_file.write(\"\\n\")\n",
+ "\n",
+ " pcf_file.write(\"EVENT_TYPE\\n\")\n",
+ " pcf_file.write(\"0 {} OpenACC Other Events\\n\".format(event_type_openacc))\n",
+ " pcf_file.write(\"VALUES\\n\")\n",
+ " pcf_file.write(\"0 End\\n\")\n",
+ " for index, row in openacc_event_kind.iterrows():\n",
+ " pcf_file.write(\"{} {}\\n\".format(row[\"event_kind\"], row[\"label\"]))\n",
+ " pcf_file.write(\"\\n\")\n",
+ "\n",
+ " pcf_file.write(\"EVENT_TYPE\\n\")\n",
+ " pcf_file.write(\"0 {} OpenACC data region source\\n\".format(event_type_name_openacc_data))\n",
+ " pcf_file.write(\"VALUES\\n\")\n",
+ " pcf_file.write(\"0 End\\n\")\n",
+ " for index, row in openacc_full_data_names.iterrows():\n",
+ " pcf_file.write(\"{} {}\\n\".format(row[\"name_value\"], row[\"name\"]))\n",
+ " pcf_file.write(\"\\n\")\n",
+ "\n",
+ " pcf_file.write(\"EVENT_TYPE\\n\")\n",
+ " pcf_file.write(\"0 {} OpenACC launch region source\\n\".format(event_type_name_openacc_launch))\n",
+ " pcf_file.write(\"VALUES\\n\")\n",
+ " pcf_file.write(\"0 End\\n\")\n",
+ " for index, row in openacc_full_launch_names.iterrows():\n",
+ " pcf_file.write(\"{} {}\\n\".format(row[\"name_value\"], row[\"name\"]))\n",
+ " pcf_file.write(\"\\n\")\n",
+ "\n",
+ " pcf_file.write(\"EVENT_TYPE\\n\")\n",
+ " pcf_file.write(\"0 {} OpenACC other region source\\n\".format(event_type_name_openacc))\n",
+ " pcf_file.write(\"VALUES\\n\")\n",
+ " pcf_file.write(\"0 End\\n\")\n",
+ " for index, row in openacc_full_other_names.iterrows():\n",
+ " pcf_file.write(\"{} {}\\n\".format(row[\"name_value\"], row[\"name\"]))\n",
+ " pcf_file.write(\"\\n\")\n",
+ "\n",
+ " pcf_file.write(\"EVENT_TYPE\\n\")\n",
+ " pcf_file.write(\"0 {} OpenACC data function name\\n\".format(event_type_func_openacc_data))\n",
+ " pcf_file.write(\"VALUES\\n\")\n",
+ " pcf_file.write(\"0 End\\n\")\n",
+ " for index, row in openacc_full_data_funcs.iterrows():\n",
+ " pcf_file.write(\"{} {}\\n\".format(row[\"func_value\"], row[\"func\"]))\n",
+ " pcf_file.write(\"\\n\")\n",
+ "\n",
+ " pcf_file.write(\"EVENT_TYPE\\n\")\n",
+ " pcf_file.write(\"0 {} OpenACC launch function name\\n\".format(event_type_func_openacc_launch))\n",
+ " pcf_file.write(\"VALUES\\n\")\n",
+ " pcf_file.write(\"0 End\\n\")\n",
+ " for index, row in openacc_full_launch_funcs.iterrows():\n",
+ " pcf_file.write(\"{} {}\\n\".format(row[\"func_value\"], row[\"func\"]))\n",
+ " pcf_file.write(\"\\n\")\n",
+ "\n",
+ " pcf_file.write(\"EVENT_TYPE\\n\")\n",
+ " pcf_file.write(\"0 {} OpenACC other function name\\n\".format(event_type_func_openacc))\n",
+ " pcf_file.write(\"VALUES\\n\")\n",
+ " pcf_file.write(\"0 End\\n\")\n",
+ " for index, row in openacc_full_other_funcs.iterrows():\n",
+ " pcf_file.write(\"{} {}\\n\".format(row[\"func_value\"], row[\"func\"]))\n",
+ " pcf_file.write(\"\\n\")\n",
+ "\n",
"pcf_file.close()"
]
},
@@ -4396,6 +4659,26 @@
"prv_file.write(chunk)\n",
"\n",
"chunk = \"\"\n",
+ "t_acc_d = [event_type_openacc_data, event_type_name_openacc_data, event_type_func_openacc_data, event_type_openacc_data_size]\n",
+ "for index, r in openacc_data_df.iterrows():\n",
+ " values = [r[\"eventKind\"], r[\"name_value\"], r[\"func_value\"], r[\"bytes\"]]\n",
+ " chunk += create_combined_events_record(r[\"start\"], r[\"end\"] - r[\"start\"], r[\"thread\"], r[\"task\"], t_acc_d, values)\n",
+ "prv_file.write(chunk)\n",
+ "chunk = \"\"\n",
+ "t_acc_l = [event_type_openacc_launch, event_type_name_openacc_launch, event_type_func_openacc_launch]\n",
+ "for index, r in openacc_launch_df.iterrows():\n",
+ " values = [r[\"eventKind\"], r[\"name_value\"], r[\"func_value\"]]\n",
+ " chunk += create_combined_events_record(r[\"start\"], r[\"end\"] - r[\"start\"], r[\"thread\"], r[\"task\"], t_acc_l, values)\n",
+ "prv_file.write(chunk)\n",
+ "chunk = \"\"\n",
+ "t_acc_o = [event_type_openacc, event_type_name_openacc, event_type_func_openacc]\n",
+ "for index, r in openacc_other_df.iterrows():\n",
+ " values = [r[\"eventKind\"], r[\"name_value\"], r[\"func_value\"]]\n",
+ " chunk += create_combined_events_record(r[\"start\"], r[\"end\"] - r[\"start\"], r[\"thread\"], r[\"task\"], t_acc_o, values)\n",
+ "prv_file.write(chunk)\n",
+ "\n",
+ "\n",
+ "chunk = \"\"\n",
"for index, row in comm_kernel_df.iterrows():\n",
" chunk += create_communication_record(row[\"task\"], row[\"thread_call\"], row[\"task\"], row[\"thread_k\"], (row[\"Start (ns)_call\"] + row[\"Duration (ns)_call\"]), row[\"Start (ns)_k\"], 0, comm_tag_launch)\n",
"prv_file.write(chunk)\n",
diff --git a/parse-nsys-stats.py b/parse-nsys-stats.py
index 651a4476a669022f38be734330b6e3428c108e61..29ee79a594fc0ba3e70a9ae99429d5983d489211 100755
--- a/parse-nsys-stats.py
+++ b/parse-nsys-stats.py
@@ -11,6 +11,7 @@ import subprocess
import os
import locale
import sqlite3
+from sqlalchemy import create_engine
locale.setlocale(locale.LC_ALL, '')
@@ -19,7 +20,7 @@ parser = argparse.ArgumentParser(description="Convert a NVIDIA Nsight System tra
epilog="The environment variables NSIGHT_HOME and PARAVER_HOME are needed")
parser.add_argument("-f", "--filter-nvtx", help="Filter by this NVTX range")
-parser.add_argument("-t", "--trace", required=True, help="Comma separated names of events to translate: [mpi_event_trace, nvtx_pushpop_trace, cuda_api_trace, cuda_gpu_trace, gpu_metrics]")
+parser.add_argument("-t", "--trace", required=True, help="Comma separated names of events to translate: [mpi_event_trace, nvtx_pushpop_trace, cuda_api_trace, cuda_gpu_trace, gpu_metrics, openacc]")
parser.add_argument("--force-sqlite", action="store_true", help="Force Nsight System to export SQLite database")
@@ -36,6 +37,89 @@ args = parser.parse_args()
# # Trace configuration and setup
+sql_openacc_other = """
+SELECT
+ CASE
+ WHEN srcFile NOT NULL
+ THEN nameIds.value || '@' || srcFileIds.value || ':' || lineNo
+ ELSE nameIds.value
+ END AS name,
+ start,
+ end,
+ eventKind,
+ funcIds.value as func,
+ globalTid / 0x1000000 % 0x1000000 AS Pid, globalTid % 0x1000000 AS Tid
+FROM
+ CUPTI_ACTIVITY_KIND_OPENACC_OTHER
+LEFT JOIN
+ StringIds AS srcFileIds
+ ON srcFileIds.id == srcFile
+LEFT JOIN
+ StringIds AS nameIds
+ ON nameIds.id == nameId
+LEFT JOIN
+ StringIds AS funcIds
+ ON funcIds.id == funcName
+"""
+
+sql_openacc_launch = """
+SELECT
+ CASE
+ WHEN srcFile NOT NULL
+ THEN nameIds.value || '@' || srcFileIds.value || ':' || lineNo
+ ELSE nameIds.value
+ END AS name,
+ start,
+ end,
+ eventKind,
+ kernelIds.value as kernelName,
+ funcIds.value as func,
+ globalTid / 0x1000000 % 0x1000000 AS Pid, globalTid % 0x1000000 AS Tid
+FROM
+ CUPTI_ACTIVITY_KIND_OPENACC_LAUNCH
+LEFT JOIN
+ StringIds AS srcFileIds
+ ON srcFileIds.id == srcFile
+LEFT JOIN
+ StringIds AS nameIds
+ ON nameIds.id == nameId
+LEFT JOIN
+ StringIds AS funcIds
+ ON funcIds.id == funcName
+LEFT JOIN
+ StringIds AS kernelIds
+ ON kernelIds.id == kernelName
+"""
+
+sql_openacc_data = """
+SELECT
+ CASE
+ WHEN srcFile NOT NULL
+ THEN nameIds.value || '@' || srcFileIds.value || ':' || lineNo
+ ELSE nameIds.value
+ END AS name,
+ start,
+ end,
+ eventKind,
+ varIds.value as variableName,
+ funcIds.value as func,
+ bytes,
+ globalTid / 0x1000000 % 0x1000000 AS Pid, globalTid % 0x1000000 AS Tid
+FROM
+ CUPTI_ACTIVITY_KIND_OPENACC_DATA
+LEFT JOIN
+ StringIds AS srcFileIds
+ ON srcFileIds.id == srcFile
+LEFT JOIN
+ StringIds AS nameIds
+ ON nameIds.id == nameId
+LEFT JOIN
+ StringIds AS funcIds
+ ON funcIds.id == funcName
+LEFT JOIN
+ StringIds AS varIds
+ ON varIds.id == varName
+"""
#NSIGHT_HOME="/home/mclasca/Apps/nsight-system/2024.1"
NSIGHT_HOME = os.getenv('NSIGHT_HOME')
@@ -54,12 +138,14 @@ NVTX_FILTER = args.filter_nvtx != None
NVTX_RANGE = args.filter_nvtx
reports = args.trace.split(",")
+reports_og = reports.copy()
t_nvtx = False
t_kernels = False
t_apicalls = False
t_mpi = False
t_metrics = False
+t_openacc = False
if "nvtx_pushpop_trace" in reports: t_nvtx = True
if "cuda_gpu_trace" in reports: t_kernels = True
@@ -68,6 +154,9 @@ if "mpi_event_trace" in reports: t_mpi = True
if "gpu_metrics" in reports:
t_metrics = True
reports.remove("gpu_metrics")
+if "openacc" in reports:
+ t_openacc = True
+ reports.remove("openacc")
#trace_name = "llava_cesga"
event_type_kernels = 63000006
@@ -82,6 +171,20 @@ event_type_correlation = 9200
event_type_mpi = 9300
event_type_metrics_base = 9400
+event_type_openacc = 66000000
+event_type_openacc_data = 66000001
+event_type_openacc_launch = 66000002
+
+event_type_name_openacc = 66100000
+event_type_name_openacc_data = 66100001
+event_type_name_openacc_launch = 66100002
+
+event_type_func_openacc = 66200000
+event_type_func_openacc_data = 66200001
+event_type_func_openacc_launch = 66200002
+
+event_type_openacc_data_size = 66300001
+
comm_tag_launch = 55001
comm_tag_memory = 55002
comm_tag_dependency = 55003
@@ -102,7 +205,7 @@ def build_nsys_stats_name(report_name):
return os.path.join(REPORT_DIR, base_name+"_{}.csv".format(report_name))
-print("Extracting reports for: {}".format(reports))
+print("Extracting reports for: {}".format(reports_og))
nsys_call = (NSIGHT_HOME+"/bin/nsys", "stats", "-r", ",".join(reports),
"--timeunit", "nsec", "-f", "csv",
"--force-overwrite", "true", "-o", ".")
@@ -163,6 +266,14 @@ if t_metrics:
gpu_metrics_agg.reset_index(inplace=True)
+if t_openacc:
+ engine = create_engine(f"sqlite:///{os.path.splitext(REPORT_FILE)[0]}.sqlite")
+ with engine.connect() as conn, conn.begin():
+ openacc_other_df = pd.read_sql_query(sql_openacc_other, conn)
+ openacc_launch_df = pd.read_sql_query(sql_openacc_launch, conn)
+ openacc_data_df = pd.read_sql_query(sql_openacc_data, conn)
+ openacc_event_kind = pd.read_sql_table("ENUM_OPENACC_EVENT_KIND", conn)
+
# # Building object model
@@ -171,7 +282,8 @@ if t_metrics:
if t_apicalls: print("CUDA calls unique processes: {}, and unique threads: {}".format(cuda_api_df["Pid"].unique(), cuda_api_df["Tid"].unique()))
if t_nvtx: print("NVTX ranges unique processes: {}, and unique threads: {}".format(nvtx_df["PID"].unique(), nvtx_df["TID"].unique()))
-if t_mpi: print("MPI ranges unique processes: {}, and unique threads: {}".format(mpi_df["Pid"].unique(), mpi_df["Tid"].unique()))
+if t_mpi: print("MPI calls unique processes: {}, and unique threads: {}".format(mpi_df["Pid"].unique(), mpi_df["Tid"].unique()))
+if t_openacc: print("OpenACC calls unique processes: {}, and unique threads: {}".format(openacc_other_df["Pid"].unique(), openacc_other_df["Tid"].unique()))
if t_nvtx: nvtx_df.rename(columns={"PID":"Pid", "TID":"Tid"}, inplace=True)
@@ -179,6 +291,7 @@ compute_threads_with = []
if t_apicalls: compute_threads_with.append(cuda_api_df[['Pid', 'Tid']])
if t_nvtx: compute_threads_with.append(nvtx_df[["Pid", "Tid"]])
if t_mpi: compute_threads_with.append(mpi_df[["Pid", "Tid"]])
+if t_openacc: compute_threads_with.append(openacc_other_df[["Pid", "Tid"]])
threads = pd.concat(compute_threads_with).drop_duplicates()
threads.sort_values(["Pid"], inplace=True)
@@ -199,6 +312,12 @@ nvtx_df["thread"] = 0
nvtx_df["task"] = 0
mpi_df["thread"] = 0
mpi_df["task"] = 0
+openacc_other_df["thread"] = 0
+openacc_other_df["task"] = 0
+openacc_launch_df["thread"] = 0
+openacc_launch_df["task"] = 0
+openacc_data_df["thread"] = 0
+openacc_data_df["task"] = 0
threads['row_name'] = "THREAD 1." + threads['task'].astype(str) + '.' + threads['thread'].astype(str)
@@ -222,6 +341,15 @@ if t_mpi:
mpi_df["thread"] = mpi_df["Tid"].map(threads.set_index('Tid')["thread"])
mpi_df["task"] = mpi_df["Tid"].map(threads.set_index('Tid')["task"])
+if t_openacc:
+ openacc_other_df["thread"] = openacc_other_df["Tid"].map(threads.set_index('Tid')["thread"])
+ openacc_other_df["task"] = openacc_other_df["Tid"].map(threads.set_index('Tid')["task"])
+ openacc_launch_df["thread"] = openacc_launch_df["Tid"].map(threads.set_index('Tid')["thread"])
+ openacc_launch_df["task"] = openacc_launch_df["Tid"].map(threads.set_index('Tid')["task"])
+ openacc_data_df["thread"] = openacc_data_df["Tid"].map(threads.set_index('Tid')["thread"])
+ openacc_data_df["task"] = openacc_data_df["Tid"].map(threads.set_index('Tid')["task"])
+
+
#
# ## GPU devices
# First, detect number of devices and streams. To respect Paraver's resource model, we will create a THREAD for each stream. To do that, select each unique pair of Device and Stream and assign an incremental ID.
@@ -335,6 +463,37 @@ if t_nvtx:
ranges_names = nvtx_df_subset[['event_value', 'Name']].drop_duplicates()
ranges_names.sort_values("event_value", inplace=True)
+if t_openacc:
+ openacc_event_kind["id"] += 1
+ openacc_launch_df["eventKind"] += 1
+ openacc_data_df["eventKind"] += 1
+ openacc_other_df["eventKind"] += 1
+
+ openacc_data_df["name_value"] = openacc_data_df.groupby(["name"], dropna=False).ngroup() + 1
+ openacc_full_data_names = openacc_data_df[['name_value', 'name']].drop_duplicates()
+ openacc_full_data_names.sort_values(["name_value"], inplace=True)
+
+ openacc_launch_df["name_value"] = openacc_launch_df.groupby(["name"], dropna=False).ngroup() + 1 + openacc_full_data_names.count().iloc[0]
+ openacc_full_launch_names = openacc_launch_df[['name_value', 'name']].drop_duplicates()
+ openacc_full_launch_names.sort_values(["name_value"], inplace=True)
+
+ openacc_other_df["name_value"] = openacc_other_df.groupby(["name"], dropna=False).ngroup() + 1 + openacc_full_data_names.count().iloc[0] + openacc_full_launch_names.count().iloc[0]
+ openacc_full_other_names = openacc_other_df[['name_value', 'name']].drop_duplicates()
+ openacc_full_other_names.sort_values(["name_value"], inplace=True)
+
+ openacc_data_df["func_value"] = openacc_data_df.groupby(["func"], dropna=False).ngroup() + 1
+ openacc_full_data_funcs = openacc_data_df[['func_value', 'func']].drop_duplicates()
+ openacc_full_data_funcs.sort_values(["func_value"], inplace=True)
+
+ openacc_launch_df["func_value"] = openacc_launch_df.groupby(["func"], dropna=False).ngroup() + 1 + openacc_full_data_funcs.count().iloc[0]
+ openacc_full_launch_funcs = openacc_launch_df[['func_value', 'func']].drop_duplicates()
+ openacc_full_launch_funcs.sort_values(["func_value"], inplace=True)
+
+ openacc_other_df["func_value"] = openacc_other_df.groupby(["func"], dropna=False).ngroup() + 1 + openacc_full_data_funcs.count().iloc[0] + openacc_full_launch_funcs.count().iloc[0]
+ openacc_full_other_funcs = openacc_other_df[['func_value', 'func']].drop_duplicates()
+ openacc_full_other_funcs.sort_values(["func_value"], inplace=True)
+
+
print("-\tWriting pcf file...")
@@ -444,6 +603,79 @@ if t_nvtx:
pcf_file.write("{} {}\n".format(row["event_value"], row["Name"]))
pcf_file.write("\n")
+if t_openacc:
+ pcf_file.write("EVENT_TYPE\n")
+ pcf_file.write("0 {} OpenACC Data Events\n".format(event_type_openacc_data))
+ pcf_file.write("VALUES\n")
+ pcf_file.write("0 End\n")
+ for index, row in openacc_event_kind.iterrows():
+ pcf_file.write("{} {}\n".format(row["id"], row["label"]))
+ pcf_file.write("\n")
+
+ pcf_file.write("EVENT_TYPE\n")
+ pcf_file.write("0 {} OpenACC Launch Events\n".format(event_type_openacc_launch))
+ pcf_file.write("VALUES\n")
+ pcf_file.write("0 End\n")
+ for index, row in openacc_event_kind.iterrows():
+ pcf_file.write("{} {}\n".format(row["id"], row["label"]))
+ pcf_file.write("\n")
+
+ pcf_file.write("EVENT_TYPE\n")
+ pcf_file.write("0 {} OpenACC Other Events\n".format(event_type_openacc))
+ pcf_file.write("VALUES\n")
+ pcf_file.write("0 End\n")
+ for index, row in openacc_event_kind.iterrows():
+ pcf_file.write("{} {}\n".format(row["id"], row["label"]))
+ pcf_file.write("\n")
+
+ pcf_file.write("EVENT_TYPE\n")
+ pcf_file.write("0 {} OpenACC data region source\n".format(event_type_name_openacc_data))
+ pcf_file.write("VALUES\n")
+ pcf_file.write("0 End\n")
+ for index, row in openacc_full_data_names.iterrows():
+ pcf_file.write("{} {}\n".format(row["name_value"], row["name"]))
+ pcf_file.write("\n")
+
+ pcf_file.write("EVENT_TYPE\n")
+ pcf_file.write("0 {} OpenACC launch region source\n".format(event_type_name_openacc_launch))
+ pcf_file.write("VALUES\n")
+ pcf_file.write("0 End\n")
+ for index, row in openacc_full_launch_names.iterrows():
+ pcf_file.write("{} {}\n".format(row["name_value"], row["name"]))
+ pcf_file.write("\n")
+
+ pcf_file.write("EVENT_TYPE\n")
+ pcf_file.write("0 {} OpenACC other region source\n".format(event_type_name_openacc))
+ pcf_file.write("VALUES\n")
+ pcf_file.write("0 End\n")
+ for index, row in openacc_full_other_names.iterrows():
+ pcf_file.write("{} {}\n".format(row["name_value"], row["name"]))
+ pcf_file.write("\n")
+
+ pcf_file.write("EVENT_TYPE\n")
+ pcf_file.write("0 {} OpenACC data function name\n".format(event_type_func_openacc_data))
+ pcf_file.write("VALUES\n")
+ pcf_file.write("0 End\n")
+ for index, row in openacc_full_data_funcs.iterrows():
+ pcf_file.write("{} {}\n".format(row["func_value"], row["func"]))
+ pcf_file.write("\n")
+
+ pcf_file.write("EVENT_TYPE\n")
+ pcf_file.write("0 {} OpenACC launch function name\n".format(event_type_func_openacc_launch))
+ pcf_file.write("VALUES\n")
+ pcf_file.write("0 End\n")
+ for index, row in openacc_full_launch_funcs.iterrows():
+ pcf_file.write("{} {}\n".format(row["func_value"], row["func"]))
+ pcf_file.write("\n")
+
+ pcf_file.write("EVENT_TYPE\n")
+ pcf_file.write("0 {} OpenACC other function name\n".format(event_type_func_openacc))
+ pcf_file.write("VALUES\n")
+ pcf_file.write("0 End\n")
+ for index, row in openacc_full_other_funcs.iterrows():
+ pcf_file.write("{} {}\n".format(row["func_value"], row["func"]))
+ pcf_file.write("\n")
+
pcf_file.close()
# # Split of kernel execution between compute and memory
@@ -560,6 +792,27 @@ if t_mpi:
prv_file.write(chunk)
chunk = ""
+if t_openacc:
+ print("-\tWriting OpenACC events...")
+ t_acc_d = [event_type_openacc_data, event_type_name_openacc_data, event_type_func_openacc_data, event_type_openacc_data_size]
+ for index, r in openacc_data_df.iterrows():
+ values = [r["eventKind"], r["name_value"], r["func_value"], r["bytes"]]
+ chunk += create_combined_events_record(r["start"], r["end"] - r["start"], r["thread"], r["task"], t_acc_d, values)
+ prv_file.write(chunk)
+ chunk = ""
+ t_acc_l = [event_type_openacc_launch, event_type_name_openacc_launch, event_type_func_openacc_launch]
+ for index, r in openacc_launch_df.iterrows():
+ values = [r["eventKind"], r["name_value"], r["func_value"]]
+ chunk += create_combined_events_record(r["start"], r["end"] - r["start"], r["thread"], r["task"], t_acc_l, values)
+ prv_file.write(chunk)
+ chunk = ""
+ t_acc_o = [event_type_openacc, event_type_name_openacc, event_type_func_openacc]
+ for index, r in openacc_other_df.iterrows():
+ values = [r["eventKind"], r["name_value"], r["func_value"]]
+ chunk += create_combined_events_record(r["start"], r["end"] - r["start"], r["thread"], r["task"], t_acc_o, values)
+ prv_file.write(chunk)
+ chunk = ""
+
if t_metrics:
print("-\tWriting GPU metrics...")
for index, row in gpu_metrics_agg.iterrows():
diff --git a/parser-playground.ipynb b/parser-playground.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..3a2169c3b1ec318333e91b25ec94d3f9044f4ff1
--- /dev/null
+++ b/parser-playground.ipynb
@@ -0,0 +1,1466 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import time\n",
+ "import subprocess\n",
+ "import os\n",
+ "import locale\n",
+ "import sqlite3\n",
+ "from sqlalchemy import create_engine\n",
+ "\n",
+ "NSIGHT_HOME=\"/home/mclasca/Apps/nsight-system/2024.1\"\n",
+ "#NSIGHT_HOME = os.getenv('NSIGHT_HOME')\n",
+ "PARAVER_HOME = os.getenv('PARAVER_HOME')\n",
+ "NVTX_RANGE=\"step53\"\n",
+ "#REPORT_FILE = os.path.abspath(\"/home/mclasca/Documents/BePPP/heka/profiles/mistral-mn5/heka-axolotl-Mistral7B0.1-4s_withmetrics-2432719.nsys-rep\")\n",
+ "#REPORT_FILE = os.path.abspath(\"/home/mclasca/Documents/BePPP/traces/xshells/nsys/xshells.par.medium-1N_withmetrics.nsys-rep\")\n",
+ "REPORT_FILE = os.path.abspath(\"/home/mclasca/Documents/BePPP/traces/sod2d/nsight4_sod2d.nsys-rep\")\n",
+ "REPORT_DIR = os.path.dirname(REPORT_FILE)\n",
+ "#REPORT_NAME=\"heka-step53+accum1-profile-2023.4-5721957\"\n",
+ "#REPORT_NAME=\"heka-axolotl-Mistral7B0.1-profile-2110598\"\n",
+ "\n",
+ "locale.setlocale(locale.LC_ALL, '')\n",
+ "\n",
+ "trace_name = \"test-sod2d-openacc\"\n",
+ "event_type_kernels = 63000006\n",
+ "event_type_memcopy_size = 63000002\n",
+ "event_type_api = 63000000\n",
+ "event_type_nvtx = 9003\n",
+ "event_type_blkgrd_name = 9100\n",
+ "event_types_block_grid_values = [9101, 9102, 9103, 9104, 9105, 9106]\n",
+ "event_types_block_grid_values_names = ['GrdX', 'GrdY', 'GrdZ', 'BlkX', 'BlkY', 'BlkZ']\n",
+ "event_type_registers_thread = 9107\n",
+ "event_type_correlation = 9200\n",
+ "event_type_mpi = 9300\n",
+ "event_type_metrics_base = 9400\n",
+ "\n",
+ "event_type_openacc = 66000000\n",
+ "event_type_openacc_data = 66000001\n",
+ "event_type_openacc_launch = 66000002\n",
+ "\n",
+ "event_type_name_openacc = 66100000\n",
+ "event_type_name_openacc_data = 66100001\n",
+ "event_type_name_openacc_launch = 66100002\n",
+ "\n",
+ "event_type_func_openacc = 66200000\n",
+ "event_type_func_openacc_data = 66200001\n",
+ "event_type_func_openacc_launch = 66200002\n",
+ "\n",
+ "event_type_openacc_data_size = 66300001\n",
+ "\n",
+ "comm_tag_launch = 55001\n",
+ "comm_tag_memory = 55002\n",
+ "comm_tag_dependency = 55003\n",
+ "\n",
+ "nvtx_select_frames = True\n",
+ "nvtx_stack_top = 1\n",
+ "nvtx_stack_bottom = 4\n",
+ "\n",
+ "reports = [\"nvtx_pushpop_trace\", \"cuda_api_trace\", \"cuda_gpu_trace\"]\n",
+ "\n",
+ "def build_nsys_stats_name(report_name):\n",
+ " base_name = os.path.splitext(os.path.basename(REPORT_FILE))[0]\n",
+ " return os.path.join(REPORT_DIR, base_name+\"_{}.csv\".format(report_name))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Extracting reports for: ['nvtx_pushpop_trace', 'cuda_api_trace', 'cuda_gpu_trace']\n",
+ "\n",
+ "NOTICE: Existing SQLite export found: /home/mclasca/Documents/BePPP/traces/sod2d/nsight4_sod2d.sqlite\n",
+ " It is assumed file was previously exported from: /home/mclasca/Documents/BePPP/traces/sod2d/nsight4_sod2d.nsys-rep\n",
+ " Consider using --force-export=true if needed.\n",
+ "\n",
+ "Processing [/home/mclasca/Documents/BePPP/traces/sod2d/nsight4_sod2d.sqlite] with [/home/mclasca/Apps/nsight-system/2024.1/host-linux-x64/reports/nvtx_pushpop_trace.py] to [/home/mclasca/Documents/BePPP/traces/sod2d/nsight4_sod2d_nvtx_pushpop_trace.csv]... PROCESSED\n",
+ "\n",
+ "Processing [/home/mclasca/Documents/BePPP/traces/sod2d/nsight4_sod2d.sqlite] with [/home/mclasca/Apps/nsight-system/2024.1/host-linux-x64/reports/cuda_api_trace.py] to [/home/mclasca/Documents/BePPP/traces/sod2d/nsight4_sod2d_cuda_api_trace.csv]... PROCESSED\n",
+ "\n",
+ "Processing [/home/mclasca/Documents/BePPP/traces/sod2d/nsight4_sod2d.sqlite] with [/home/mclasca/Apps/nsight-system/2024.1/host-linux-x64/reports/cuda_gpu_trace.py] to [/home/mclasca/Documents/BePPP/traces/sod2d/nsight4_sod2d_cuda_gpu_trace.csv]... PROCESSED\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Extracting reports for: {}\".format(reports))\n",
+ "args = (NSIGHT_HOME+\"/bin/nsys\", \"stats\", \"-r\", \",\".join(reports), \n",
+ " \"--timeunit\", \"nsec\", \"-f\", \"csv\", \n",
+ " \"--force-overwrite\", \"true\", \"-o\", \".\", REPORT_FILE)\n",
+ "\n",
+ "with subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) as p:\n",
+ " for line in p.stdout:\n",
+ " print(line.decode(), end='')\n",
+ "\n",
+ "if p.returncode != 0:\n",
+ " raise CalledProcessError(p.returncode, p.args)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Start (ns) | \n",
+ " Duration (ns) | \n",
+ " CorrID | \n",
+ " GrdX | \n",
+ " GrdY | \n",
+ " GrdZ | \n",
+ " BlkX | \n",
+ " BlkY | \n",
+ " BlkZ | \n",
+ " Reg/Trd | \n",
+ " ... | \n",
+ " DymSMem (MB) | \n",
+ " Bytes (MB) | \n",
+ " Throughput (MB/s) | \n",
+ " SrcMemKd | \n",
+ " DstMemKd | \n",
+ " Device | \n",
+ " Ctx | \n",
+ " GreenCtx | \n",
+ " Strm | \n",
+ " Name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1924937486 | \n",
+ " 1056 | \n",
+ " 213 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0,000 | \n",
+ " 30,303 | \n",
+ " Pageable | \n",
+ " Device | \n",
+ " NVIDIA H100 (0) | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " 13 | \n",
+ " [CUDA memcpy Host-to-Device] | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1926678138 | \n",
+ " 1024 | \n",
+ " 211 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0,000 | \n",
+ " 31,250 | \n",
+ " Pageable | \n",
+ " Device | \n",
+ " NVIDIA H100 (0) | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " 13 | \n",
+ " [CUDA memcpy Host-to-Device] | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1928094871 | \n",
+ " 992 | \n",
+ " 213 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0,000 | \n",
+ " 32,258 | \n",
+ " Pageable | \n",
+ " Device | \n",
+ " NVIDIA H100 (0) | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " 13 | \n",
+ " [CUDA memcpy Host-to-Device] | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1932409317 | \n",
+ " 992 | \n",
+ " 211 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0,000 | \n",
+ " 32,258 | \n",
+ " Pageable | \n",
+ " Device | \n",
+ " NVIDIA H100 (0) | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " 13 | \n",
+ " [CUDA memcpy Host-to-Device] | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1980006677 | \n",
+ " 896 | \n",
+ " 684 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0,000 | \n",
+ " 142,857 | \n",
+ " Pageable | \n",
+ " Device | \n",
+ " NVIDIA H100 (0) | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " 13 | \n",
+ " [CUDA memcpy Host-to-Device] | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 56599 | \n",
+ " 38878898445 | \n",
+ " 2208 | \n",
+ " 198835 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0,000 | \n",
+ " 1,812 | \n",
+ " Device | \n",
+ " Pageable | \n",
+ " NVIDIA H100 (0) | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " 13 | \n",
+ " [CUDA memcpy Device-to-Host] | \n",
+ "
\n",
+ " \n",
+ " 56600 | \n",
+ " 38878940548 | \n",
+ " 109920 | \n",
+ " 154364 | \n",
+ " 3.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 256.0 | \n",
+ " 1.0 | \n",
+ " 1.0 | \n",
+ " 18.0 | \n",
+ " ... | \n",
+ " 0,001 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NVIDIA H100 (0) | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " 13 | \n",
+ " mod_time_ops_adapt_dt_cfl_32_gpu__red | \n",
+ "
\n",
+ " \n",
+ " 56601 | \n",
+ " 38879061123 | \n",
+ " 2304 | \n",
+ " 154366 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0,000 | \n",
+ " 1,736 | \n",
+ " Device | \n",
+ " Pageable | \n",
+ " NVIDIA H100 (0) | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " 13 | \n",
+ " [CUDA memcpy Device-to-Host] | \n",
+ "
\n",
+ " \n",
+ " 56602 | \n",
+ " 38879071844 | \n",
+ " 2304 | \n",
+ " 154367 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0,000 | \n",
+ " 1,736 | \n",
+ " Device | \n",
+ " Pageable | \n",
+ " NVIDIA H100 (0) | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " 13 | \n",
+ " [CUDA memcpy Device-to-Host] | \n",
+ "
\n",
+ " \n",
+ " 56603 | \n",
+ " 38879082499 | \n",
+ " 2304 | \n",
+ " 154368 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0,000 | \n",
+ " 1,736 | \n",
+ " Device | \n",
+ " Pageable | \n",
+ " NVIDIA H100 (0) | \n",
+ " 1 | \n",
+ " NaN | \n",
+ " 13 | \n",
+ " [CUDA memcpy Device-to-Host] | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
56604 rows × 21 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Start (ns) Duration (ns) CorrID GrdX GrdY GrdZ BlkX BlkY \\\n",
+ "0 1924937486 1056 213 NaN NaN NaN NaN NaN \n",
+ "1 1926678138 1024 211 NaN NaN NaN NaN NaN \n",
+ "2 1928094871 992 213 NaN NaN NaN NaN NaN \n",
+ "3 1932409317 992 211 NaN NaN NaN NaN NaN \n",
+ "4 1980006677 896 684 NaN NaN NaN NaN NaN \n",
+ "... ... ... ... ... ... ... ... ... \n",
+ "56599 38878898445 2208 198835 NaN NaN NaN NaN NaN \n",
+ "56600 38878940548 109920 154364 3.0 1.0 1.0 256.0 1.0 \n",
+ "56601 38879061123 2304 154366 NaN NaN NaN NaN NaN \n",
+ "56602 38879071844 2304 154367 NaN NaN NaN NaN NaN \n",
+ "56603 38879082499 2304 154368 NaN NaN NaN NaN NaN \n",
+ "\n",
+ " BlkZ Reg/Trd ... DymSMem (MB) Bytes (MB) Throughput (MB/s) SrcMemKd \\\n",
+ "0 NaN NaN ... NaN 0,000 30,303 Pageable \n",
+ "1 NaN NaN ... NaN 0,000 31,250 Pageable \n",
+ "2 NaN NaN ... NaN 0,000 32,258 Pageable \n",
+ "3 NaN NaN ... NaN 0,000 32,258 Pageable \n",
+ "4 NaN NaN ... NaN 0,000 142,857 Pageable \n",
+ "... ... ... ... ... ... ... ... \n",
+ "56599 NaN NaN ... NaN 0,000 1,812 Device \n",
+ "56600 1.0 18.0 ... 0,001 NaN NaN NaN \n",
+ "56601 NaN NaN ... NaN 0,000 1,736 Device \n",
+ "56602 NaN NaN ... NaN 0,000 1,736 Device \n",
+ "56603 NaN NaN ... NaN 0,000 1,736 Device \n",
+ "\n",
+ " DstMemKd Device Ctx GreenCtx Strm \\\n",
+ "0 Device NVIDIA H100 (0) 1 NaN 13 \n",
+ "1 Device NVIDIA H100 (0) 1 NaN 13 \n",
+ "2 Device NVIDIA H100 (0) 1 NaN 13 \n",
+ "3 Device NVIDIA H100 (0) 1 NaN 13 \n",
+ "4 Device NVIDIA H100 (0) 1 NaN 13 \n",
+ "... ... ... .. ... ... \n",
+ "56599 Pageable NVIDIA H100 (0) 1 NaN 13 \n",
+ "56600 NaN NVIDIA H100 (0) 1 NaN 13 \n",
+ "56601 Pageable NVIDIA H100 (0) 1 NaN 13 \n",
+ "56602 Pageable NVIDIA H100 (0) 1 NaN 13 \n",
+ "56603 Pageable NVIDIA H100 (0) 1 NaN 13 \n",
+ "\n",
+ " Name \n",
+ "0 [CUDA memcpy Host-to-Device] \n",
+ "1 [CUDA memcpy Host-to-Device] \n",
+ "2 [CUDA memcpy Host-to-Device] \n",
+ "3 [CUDA memcpy Host-to-Device] \n",
+ "4 [CUDA memcpy Host-to-Device] \n",
+ "... ... \n",
+ "56599 [CUDA memcpy Device-to-Host] \n",
+ "56600 mod_time_ops_adapt_dt_cfl_32_gpu__red \n",
+ "56601 [CUDA memcpy Device-to-Host] \n",
+ "56602 [CUDA memcpy Device-to-Host] \n",
+ "56603 [CUDA memcpy Device-to-Host] \n",
+ "\n",
+ "[56604 rows x 21 columns]"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "kernels_df = pd.read_csv(build_nsys_stats_name(\"cuda_gpu_trace\"))\n",
+ "kernels_df.rename(columns={\"CorrId\": \"CorrID\"}, inplace=True)\n",
+ "kernels_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "sql_openacc_other = \"\"\"\n",
+ "SELECT\n",
+ " CASE\n",
+ " WHEN srcFile NOT NULL\n",
+ " THEN nameIds.value || '@' || srcFileIds.value || ':' || lineNo\n",
+ " ELSE nameIds.value\n",
+ " END AS name,\n",
+ " start,\n",
+ " end,\n",
+ " eventKind,\n",
+ "\tfuncIds.value as func,\n",
+ " globalTid / 0x1000000 % 0x1000000 AS Pid, globalTid % 0x1000000 AS Tid\n",
+ "FROM\n",
+ " CUPTI_ACTIVITY_KIND_OPENACC_OTHER\n",
+ "LEFT JOIN\n",
+ " StringIds AS srcFileIds\n",
+ " ON srcFileIds.id == srcFile\n",
+ "LEFT JOIN\n",
+ " StringIds AS nameIds\n",
+ " ON nameIds.id == nameId\n",
+ "LEFT JOIN\n",
+ "\tStringIds AS funcIds\n",
+ "\tON funcIds.id == funcName\n",
+ "\"\"\"\n",
+ "\n",
+ "sql_openacc_launch = \"\"\"\n",
+ "SELECT\n",
+ " CASE\n",
+ " WHEN srcFile NOT NULL\n",
+ " THEN nameIds.value || '@' || srcFileIds.value || ':' || lineNo\n",
+ " ELSE nameIds.value\n",
+ " END AS name,\n",
+ " start,\n",
+ " end,\n",
+ " eventKind,\n",
+ " kernelIds.value as kernelName,\n",
+ "\tfuncIds.value as func,\n",
+ " globalTid / 0x1000000 % 0x1000000 AS Pid, globalTid % 0x1000000 AS Tid\n",
+ "FROM\n",
+ " CUPTI_ACTIVITY_KIND_OPENACC_LAUNCH\n",
+ "LEFT JOIN\n",
+ " StringIds AS srcFileIds\n",
+ " ON srcFileIds.id == srcFile\n",
+ "LEFT JOIN\n",
+ " StringIds AS nameIds\n",
+ " ON nameIds.id == nameId\n",
+ "LEFT JOIN\n",
+ "\tStringIds AS funcIds\n",
+ "\tON funcIds.id == funcName\n",
+ "LEFT JOIN\n",
+ "\tStringIds AS kernelIds\n",
+ "\tON kernelIds.id == kernelName\n",
+ "\"\"\"\n",
+ "\n",
+ "sql_openacc_data = \"\"\"\n",
+ "SELECT\n",
+ " CASE\n",
+ " WHEN srcFile NOT NULL\n",
+ " THEN nameIds.value || '@' || srcFileIds.value || ':' || lineNo\n",
+ " ELSE nameIds.value\n",
+ " END AS name,\n",
+ " start,\n",
+ " end,\n",
+ " eventKind,\n",
+ " varIds.value as variableName,\n",
+ "\tfuncIds.value as func,\n",
+ "\tbytes,\n",
+ " globalTid / 0x1000000 % 0x1000000 AS Pid, globalTid % 0x1000000 AS Tid\n",
+ "FROM\n",
+ " CUPTI_ACTIVITY_KIND_OPENACC_DATA\n",
+ "LEFT JOIN\n",
+ " StringIds AS srcFileIds\n",
+ " ON srcFileIds.id == srcFile\n",
+ "LEFT JOIN\n",
+ " StringIds AS nameIds\n",
+ " ON nameIds.id == nameId\n",
+ "LEFT JOIN\n",
+ "\tStringIds AS funcIds\n",
+ "\tON funcIds.id == funcName\n",
+ "LEFT JOIN\n",
+ "\tStringIds AS varIds\n",
+ "\tON varIds.id == varName\n",
+ "\"\"\"\n",
+ "\n",
+ "engine = create_engine(f\"sqlite:///{os.path.splitext(REPORT_FILE)[0]}.sqlite\")\n",
+ "with engine.connect() as conn, conn.begin():\n",
+ " openacc_other_df = pd.read_sql_query(sql_openacc_other, conn)\n",
+ " openacc_launch_df = pd.read_sql_query(sql_openacc_launch, conn)\n",
+ " openacc_data_df = pd.read_sql_query(sql_openacc_data, conn)\n",
+ " openacc_event_kind = pd.read_sql_table(\"ENUM_OPENACC_EVENT_KIND\", conn)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " name | \n",
+ " label | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " CUPTI_OPENACC_EVENT_KIND_INVALID | \n",
+ " Invalid | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " CUPTI_OPENACC_EVENT_KIND_DEVICE_INIT | \n",
+ " Device init | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " CUPTI_OPENACC_EVENT_KIND_DEVICE_SHUTDOWN | \n",
+ " Device shutdown | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " CUPTI_OPENACC_EVENT_KIND_RUNTIME_SHUTDOWN | \n",
+ " Runtime shutdown | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4 | \n",
+ " CUPTI_OPENACC_EVENT_KIND_ENQUEUE_LAUNCH | \n",
+ " Enqueue launch | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 5 | \n",
+ " CUPTI_OPENACC_EVENT_KIND_ENQUEUE_UPLOAD | \n",
+ " Enqueue upload | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 6 | \n",
+ " CUPTI_OPENACC_EVENT_KIND_ENQUEUE_DOWNLOAD | \n",
+ " Enqueue download | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 7 | \n",
+ " CUPTI_OPENACC_EVENT_KIND_WAIT | \n",
+ " Wait | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 8 | \n",
+ " CUPTI_OPENACC_EVENT_KIND_IMPLICIT_WAIT | \n",
+ " Implicit wait | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 9 | \n",
+ " CUPTI_OPENACC_EVENT_KIND_COMPUTE_CONSTRUCT | \n",
+ " Compute construct | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 10 | \n",
+ " CUPTI_OPENACC_EVENT_KIND_UPDATE | \n",
+ " UPDATE | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " 11 | \n",
+ " CUPTI_OPENACC_EVENT_KIND_ENTER_DATA | \n",
+ " Enter data | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " 12 | \n",
+ " CUPTI_OPENACC_EVENT_KIND_EXIT_DATA | \n",
+ " Exit data | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " 13 | \n",
+ " CUPTI_OPENACC_EVENT_KIND_CREATE | \n",
+ " Create | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " 14 | \n",
+ " CUPTI_OPENACC_EVENT_KIND_DELETE | \n",
+ " Delete | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " 15 | \n",
+ " CUPTI_OPENACC_EVENT_KIND_ALLOC | \n",
+ " Alloc | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " 16 | \n",
+ " CUPTI_OPENACC_EVENT_KIND_FREE | \n",
+ " Free | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id name label\n",
+ "0 0 CUPTI_OPENACC_EVENT_KIND_INVALID Invalid\n",
+ "1 1 CUPTI_OPENACC_EVENT_KIND_DEVICE_INIT Device init\n",
+ "2 2 CUPTI_OPENACC_EVENT_KIND_DEVICE_SHUTDOWN Device shutdown\n",
+ "3 3 CUPTI_OPENACC_EVENT_KIND_RUNTIME_SHUTDOWN Runtime shutdown\n",
+ "4 4 CUPTI_OPENACC_EVENT_KIND_ENQUEUE_LAUNCH Enqueue launch\n",
+ "5 5 CUPTI_OPENACC_EVENT_KIND_ENQUEUE_UPLOAD Enqueue upload\n",
+ "6 6 CUPTI_OPENACC_EVENT_KIND_ENQUEUE_DOWNLOAD Enqueue download\n",
+ "7 7 CUPTI_OPENACC_EVENT_KIND_WAIT Wait\n",
+ "8 8 CUPTI_OPENACC_EVENT_KIND_IMPLICIT_WAIT Implicit wait\n",
+ "9 9 CUPTI_OPENACC_EVENT_KIND_COMPUTE_CONSTRUCT Compute construct\n",
+ "10 10 CUPTI_OPENACC_EVENT_KIND_UPDATE UPDATE\n",
+ "11 11 CUPTI_OPENACC_EVENT_KIND_ENTER_DATA Enter data\n",
+ "12 12 CUPTI_OPENACC_EVENT_KIND_EXIT_DATA Exit data\n",
+ "13 13 CUPTI_OPENACC_EVENT_KIND_CREATE Create\n",
+ "14 14 CUPTI_OPENACC_EVENT_KIND_DELETE Delete\n",
+ "15 15 CUPTI_OPENACC_EVENT_KIND_ALLOC Alloc\n",
+ "16 16 CUPTI_OPENACC_EVENT_KIND_FREE Free"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "openacc_event_kind"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Event types and values preparation\n",
+ "### Add 1 to all event values for KIND"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "openacc_event_kind[\"id\"] += 1\n",
+ "openacc_launch_df[\"eventKind\"] += 1\n",
+ "openacc_data_df[\"eventKind\"] += 1\n",
+ "openacc_other_df[\"eventKind\"] += 1"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Create event values for name of construct and function of construct, and make them consecutive"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "openacc_data_df[\"name_value\"] = openacc_data_df.groupby([\"name\"], dropna=False).ngroup() + 1\n",
+ "openacc_full_data_names = openacc_data_df[['name_value', 'name']].drop_duplicates()\n",
+ "openacc_full_data_names.sort_values([\"name_value\"], inplace=True)\n",
+ "\n",
+ "openacc_launch_df[\"name_value\"] = openacc_launch_df.groupby([\"name\"], dropna=False).ngroup() + 1 + openacc_full_data_names.count().iloc[0]\n",
+ "openacc_full_launch_names = openacc_launch_df[['name_value', 'name']].drop_duplicates()\n",
+ "openacc_full_launch_names.sort_values([\"name_value\"], inplace=True)\n",
+ "\n",
+ "openacc_other_df[\"name_value\"] = openacc_other_df.groupby([\"name\"], dropna=False).ngroup() + 1 + openacc_full_data_names.count().iloc[0] + openacc_full_launch_names.count().iloc[0]\n",
+ "openacc_full_other_names = openacc_other_df[['name_value', 'name']].drop_duplicates()\n",
+ "openacc_full_other_names.sort_values([\"name_value\"], inplace=True)\n",
+ "\n",
+ "openacc_data_df[\"func_value\"] = openacc_data_df.groupby([\"func\"], dropna=False).ngroup() + 1\n",
+ "openacc_full_data_funcs = openacc_data_df[['func_value', 'func']].drop_duplicates()\n",
+ "openacc_full_data_funcs.sort_values([\"func_value\"], inplace=True)\n",
+ "\n",
+ "openacc_launch_df[\"func_value\"] = openacc_launch_df.groupby([\"func\"], dropna=False).ngroup() + 1 + openacc_full_data_funcs.count().iloc[0]\n",
+ "openacc_full_launch_funcs = openacc_launch_df[['func_value', 'func']].drop_duplicates()\n",
+ "openacc_full_launch_funcs.sort_values([\"func_value\"], inplace=True)\n",
+ "\n",
+ "openacc_other_df[\"func_value\"] = openacc_other_df.groupby([\"func\"], dropna=False).ngroup() + 1 + openacc_full_data_funcs.count().iloc[0] + openacc_full_launch_funcs.count().iloc[0]\n",
+ "openacc_full_other_funcs = openacc_other_df[['func_value', 'func']].drop_duplicates()\n",
+ "openacc_full_other_funcs.sort_values([\"func_value\"], inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " name | \n",
+ " start | \n",
+ " end | \n",
+ " eventKind | \n",
+ " kernelName | \n",
+ " func | \n",
+ " Pid | \n",
+ " Tid | \n",
+ " name_value | \n",
+ " func_value | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Enqueue Launch@CFDSolverBase.f90:1104 | \n",
+ " 8081370411 | \n",
+ " 8081461812 | \n",
+ " 5 | \n",
+ " cfdsolverbase_mod_cfdsolverbase_allocatevariab... | \n",
+ " cfdsolverbase_allocatevariables | \n",
+ " 585611 | \n",
+ " 585611 | \n",
+ " 770 | \n",
+ " 58 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Enqueue Launch@CFDSolverBase.f90:1104 | \n",
+ " 8081372953 | \n",
+ " 8081419924 | \n",
+ " 5 | \n",
+ " cfdsolverbase_mod_cfdsolverbase_allocatevariab... | \n",
+ " cfdsolverbase_allocatevariables | \n",
+ " 585612 | \n",
+ " 585612 | \n",
+ " 770 | \n",
+ " 58 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Enqueue Launch@CFDSolverBase.f90:1104 | \n",
+ " 8081375145 | \n",
+ " 8081473336 | \n",
+ " 5 | \n",
+ " cfdsolverbase_mod_cfdsolverbase_allocatevariab... | \n",
+ " cfdsolverbase_allocatevariables | \n",
+ " 585614 | \n",
+ " 585614 | \n",
+ " 770 | \n",
+ " 58 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Enqueue Launch@CFDSolverBase.f90:1104 | \n",
+ " 8081403759 | \n",
+ " 8081436137 | \n",
+ " 5 | \n",
+ " cfdsolverbase_mod_cfdsolverbase_allocatevariab... | \n",
+ " cfdsolverbase_allocatevariables | \n",
+ " 585613 | \n",
+ " 585613 | \n",
+ " 770 | \n",
+ " 58 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Enqueue Launch@CFDSolverBase.f90:1105 | \n",
+ " 8081420843 | \n",
+ " 8081426872 | \n",
+ " 5 | \n",
+ " cfdsolverbase_mod_cfdsolverbase_allocatevariab... | \n",
+ " cfdsolverbase_allocatevariables | \n",
+ " 585612 | \n",
+ " 585612 | \n",
+ " 771 | \n",
+ " 58 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 38287 | \n",
+ " Enqueue Launch@mod_time_ops.f90:32 | \n",
+ " 38877372018 | \n",
+ " 38877374971 | \n",
+ " 5 | \n",
+ " mod_time_ops_adapt_dt_cfl_32_gpu__red | \n",
+ " adapt_dt_cfl | \n",
+ " 585614 | \n",
+ " 585614 | \n",
+ " 883 | \n",
+ " 57 | \n",
+ "
\n",
+ " \n",
+ " 38288 | \n",
+ " Enqueue Launch@mod_time_ops.f90:32 | \n",
+ " 38877372667 | \n",
+ " 38877376233 | \n",
+ " 5 | \n",
+ " mod_time_ops_adapt_dt_cfl_32_gpu__red | \n",
+ " adapt_dt_cfl | \n",
+ " 585613 | \n",
+ " 585613 | \n",
+ " 883 | \n",
+ " 57 | \n",
+ "
\n",
+ " \n",
+ " 38289 | \n",
+ " Enqueue Launch@mod_time_ops.f90:32 | \n",
+ " 38877372863 | \n",
+ " 38877375700 | \n",
+ " 5 | \n",
+ " mod_time_ops_adapt_dt_cfl_32_gpu__red | \n",
+ " adapt_dt_cfl | \n",
+ " 585612 | \n",
+ " 585612 | \n",
+ " 883 | \n",
+ " 57 | \n",
+ "
\n",
+ " \n",
+ " 38290 | \n",
+ " Enqueue Launch@mod_time_ops.f90:32 | \n",
+ " 38877384375 | \n",
+ " 38877387928 | \n",
+ " 5 | \n",
+ " mod_time_ops_adapt_dt_cfl_32_gpu | \n",
+ " adapt_dt_cfl | \n",
+ " 585611 | \n",
+ " 585611 | \n",
+ " 883 | \n",
+ " 57 | \n",
+ "
\n",
+ " \n",
+ " 38291 | \n",
+ " Enqueue Launch@mod_time_ops.f90:32 | \n",
+ " 38877388123 | \n",
+ " 38877390998 | \n",
+ " 5 | \n",
+ " mod_time_ops_adapt_dt_cfl_32_gpu__red | \n",
+ " adapt_dt_cfl | \n",
+ " 585611 | \n",
+ " 585611 | \n",
+ " 883 | \n",
+ " 57 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
38292 rows × 10 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " name start end \\\n",
+ "0 Enqueue Launch@CFDSolverBase.f90:1104 8081370411 8081461812 \n",
+ "1 Enqueue Launch@CFDSolverBase.f90:1104 8081372953 8081419924 \n",
+ "2 Enqueue Launch@CFDSolverBase.f90:1104 8081375145 8081473336 \n",
+ "3 Enqueue Launch@CFDSolverBase.f90:1104 8081403759 8081436137 \n",
+ "4 Enqueue Launch@CFDSolverBase.f90:1105 8081420843 8081426872 \n",
+ "... ... ... ... \n",
+ "38287 Enqueue Launch@mod_time_ops.f90:32 38877372018 38877374971 \n",
+ "38288 Enqueue Launch@mod_time_ops.f90:32 38877372667 38877376233 \n",
+ "38289 Enqueue Launch@mod_time_ops.f90:32 38877372863 38877375700 \n",
+ "38290 Enqueue Launch@mod_time_ops.f90:32 38877384375 38877387928 \n",
+ "38291 Enqueue Launch@mod_time_ops.f90:32 38877388123 38877390998 \n",
+ "\n",
+ " eventKind kernelName \\\n",
+ "0 5 cfdsolverbase_mod_cfdsolverbase_allocatevariab... \n",
+ "1 5 cfdsolverbase_mod_cfdsolverbase_allocatevariab... \n",
+ "2 5 cfdsolverbase_mod_cfdsolverbase_allocatevariab... \n",
+ "3 5 cfdsolverbase_mod_cfdsolverbase_allocatevariab... \n",
+ "4 5 cfdsolverbase_mod_cfdsolverbase_allocatevariab... \n",
+ "... ... ... \n",
+ "38287 5 mod_time_ops_adapt_dt_cfl_32_gpu__red \n",
+ "38288 5 mod_time_ops_adapt_dt_cfl_32_gpu__red \n",
+ "38289 5 mod_time_ops_adapt_dt_cfl_32_gpu__red \n",
+ "38290 5 mod_time_ops_adapt_dt_cfl_32_gpu \n",
+ "38291 5 mod_time_ops_adapt_dt_cfl_32_gpu__red \n",
+ "\n",
+ " func Pid Tid name_value func_value \n",
+ "0 cfdsolverbase_allocatevariables 585611 585611 770 58 \n",
+ "1 cfdsolverbase_allocatevariables 585612 585612 770 58 \n",
+ "2 cfdsolverbase_allocatevariables 585614 585614 770 58 \n",
+ "3 cfdsolverbase_allocatevariables 585613 585613 770 58 \n",
+ "4 cfdsolverbase_allocatevariables 585612 585612 771 58 \n",
+ "... ... ... ... ... ... \n",
+ "38287 adapt_dt_cfl 585614 585614 883 57 \n",
+ "38288 adapt_dt_cfl 585613 585613 883 57 \n",
+ "38289 adapt_dt_cfl 585612 585612 883 57 \n",
+ "38290 adapt_dt_cfl 585611 585611 883 57 \n",
+ "38291 adapt_dt_cfl 585611 585611 883 57 \n",
+ "\n",
+ "[38292 rows x 10 columns]"
+ ]
+ },
+ "execution_count": 53,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "openacc_launch_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " func_value | \n",
+ " func | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 3264 | \n",
+ " 1 | \n",
+ " adapt_dt_cfl | \n",
+ "
\n",
+ " \n",
+ " 788 | \n",
+ " 2 | \n",
+ " cfdsolverbase_allocatevariables | \n",
+ "
\n",
+ " \n",
+ " 2148 | \n",
+ " 3 | \n",
+ " cfdsolverbase_boundaryfacestonodes | \n",
+ "
\n",
+ " \n",
+ " 2248 | \n",
+ " 4 | \n",
+ " cfdsolverbase_eval_elempernode_and_nearboundar... | \n",
+ "
\n",
+ " \n",
+ " 2040 | \n",
+ " 5 | \n",
+ " cfdsolverbase_evalatoijkinverse | \n",
+ "
\n",
+ " \n",
+ " 1920 | \n",
+ " 6 | \n",
+ " cfdsolverbase_evalcharlength | \n",
+ "
\n",
+ " \n",
+ " 1948 | \n",
+ " 7 | \n",
+ " cfdsolverbase_evaljacobians | \n",
+ "
\n",
+ " \n",
+ " 2380 | \n",
+ " 8 | \n",
+ " cfdsolverbase_evalmass | \n",
+ "
\n",
+ " \n",
+ " 484 | \n",
+ " 9 | \n",
+ " cfdsolverbase_evalshapefunctions | \n",
+ "
\n",
+ " \n",
+ " 3824 | \n",
+ " 10 | \n",
+ " cfdsolverbase_evaltimeiteration | \n",
+ "
\n",
+ " \n",
+ " 1901 | \n",
+ " 11 | \n",
+ " cfdsolverbase_evalviscosityfactor | \n",
+ "
\n",
+ " \n",
+ " 3336 | \n",
+ " 12 | \n",
+ " cfdsolverbase_initialbuffer | \n",
+ "
\n",
+ " \n",
+ " 1876 | \n",
+ " 13 | \n",
+ " cfdsolverbase_initializesourceterms | \n",
+ "
\n",
+ " \n",
+ " 2221 | \n",
+ " 14 | \n",
+ " checkifwallmodelon | \n",
+ "
\n",
+ " \n",
+ " 2924 | \n",
+ " 15 | \n",
+ " compute_fieldderivs | \n",
+ "
\n",
+ " \n",
+ " 2860 | \n",
+ " 16 | \n",
+ " copy_elemgpscalarfield_in_nodes_for_inst | \n",
+ "
\n",
+ " \n",
+ " 2324 | \n",
+ " 17 | \n",
+ " copy_from_rcvbuffer_int | \n",
+ "
\n",
+ " \n",
+ " 2432 | \n",
+ " 18 | \n",
+ " copy_from_rcvbuffer_real | \n",
+ "
\n",
+ " \n",
+ " 2472 | \n",
+ " 19 | \n",
+ " copy_nodescalarfield2save_in_aux_for_inst | \n",
+ "
\n",
+ " \n",
+ " 2764 | \n",
+ " 20 | \n",
+ " copy_nodevectorfield2save_in_aux_for_inst | \n",
+ "
\n",
+ " \n",
+ " 2478 | \n",
+ " 21 | \n",
+ " copyperiodicnodes_scalarfield | \n",
+ "
\n",
+ " \n",
+ " 2770 | \n",
+ " 22 | \n",
+ " copyperiodicnodes_vectorfield | \n",
+ "
\n",
+ " \n",
+ " 72247 | \n",
+ " 23 | \n",
+ " deallocate_filters | \n",
+ "
\n",
+ " \n",
+ " 1996 | \n",
+ " 24 | \n",
+ " elem_jacobian | \n",
+ "
\n",
+ " \n",
+ " 2292 | \n",
+ " 25 | \n",
+ " elempernode | \n",
+ "
\n",
+ " \n",
+ " 72484 | \n",
+ " 26 | \n",
+ " end_comms | \n",
+ "
\n",
+ " \n",
+ " 72532 | \n",
+ " 27 | \n",
+ " end_comms_bnd | \n",
+ "
\n",
+ " \n",
+ " 72334 | \n",
+ " 28 | \n",
+ " end_hdf5_auxiliar_saving_arrays | \n",
+ "
\n",
+ " \n",
+ " 72100 | \n",
+ " 29 | \n",
+ " end_rk4_solver | \n",
+ "
\n",
+ " \n",
+ " 2305 | \n",
+ " 30 | \n",
+ " fill_sendbuffer_int | \n",
+ "
\n",
+ " \n",
+ " 2413 | \n",
+ " 31 | \n",
+ " fill_sendbuffer_real | \n",
+ "
\n",
+ " \n",
+ " 4143 | \n",
+ " 32 | \n",
+ " full_convec_ijk | \n",
+ "
\n",
+ " \n",
+ " 4128 | \n",
+ " 33 | \n",
+ " full_diffusion_ijk | \n",
+ "
\n",
+ " \n",
+ " 3927 | \n",
+ " 34 | \n",
+ " generic_scalar_convec_ijk | \n",
+ "
\n",
+ " \n",
+ " 228 | \n",
+ " 35 | \n",
+ " init_comms | \n",
+ "
\n",
+ " \n",
+ " 324 | \n",
+ " 36 | \n",
+ " init_comms_bnd | \n",
+ "
\n",
+ " \n",
+ " 1668 | \n",
+ " 37 | \n",
+ " init_filters | \n",
+ "
\n",
+ " \n",
+ " 388 | \n",
+ " 38 | \n",
+ " init_hdf5_auxiliar_saving_arrays | \n",
+ "
\n",
+ " \n",
+ " 3344 | \n",
+ " 39 | \n",
+ " init_rk4_solver | \n",
+ "
\n",
+ " \n",
+ " 144 | \n",
+ " 40 | \n",
+ " load_connectivity_hdf5 | \n",
+ "
\n",
+ " \n",
+ " 116 | \n",
+ " 41 | \n",
+ " load_coordinates_hdf5 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 42 | \n",
+ " load_parallel_data_hdf5 | \n",
+ "
\n",
+ " \n",
+ " 2400 | \n",
+ " 43 | \n",
+ " lumped_mass_spectral | \n",
+ "
\n",
+ " \n",
+ " 3972 | \n",
+ " 44 | \n",
+ " lumped_solver_scal | \n",
+ "
\n",
+ " \n",
+ " 4325 | \n",
+ " 45 | \n",
+ " lumped_solver_vect | \n",
+ "
\n",
+ " \n",
+ " 3240 | \n",
+ " 46 | \n",
+ " maxmach | \n",
+ "
\n",
+ " \n",
+ " 2346 | \n",
+ " 47 | \n",
+ " nearboundarynode | \n",
+ "
\n",
+ " \n",
+ " 3922 | \n",
+ " 48 | \n",
+ " rk_4_main | \n",
+ "
\n",
+ " \n",
+ " 5468 | \n",
+ " 49 | \n",
+ " save_hdf5_restartfile | \n",
+ "
\n",
+ " \n",
+ " 2492 | \n",
+ " 50 | \n",
+ " save_hdf5_resultsfile_basefunc | \n",
+ "
\n",
+ " \n",
+ " 3980 | \n",
+ " 51 | \n",
+ " smart_visc_spectral | \n",
+ "
\n",
+ " \n",
+ " 1912 | \n",
+ " 52 | \n",
+ " sutherland_viscosity | \n",
+ "
\n",
+ " \n",
+ " 1780 | \n",
+ " 53 | \n",
+ " tgvsolver_evalinitialconditions | \n",
+ "
\n",
+ " \n",
+ " 3180 | \n",
+ " 54 | \n",
+ " visc_dissipationrate | \n",
+ "
\n",
+ " \n",
+ " 3156 | \n",
+ " 55 | \n",
+ " volavg_ek | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 56 | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " func_value func\n",
+ "3264 1 adapt_dt_cfl\n",
+ "788 2 cfdsolverbase_allocatevariables\n",
+ "2148 3 cfdsolverbase_boundaryfacestonodes\n",
+ "2248 4 cfdsolverbase_eval_elempernode_and_nearboundar...\n",
+ "2040 5 cfdsolverbase_evalatoijkinverse\n",
+ "1920 6 cfdsolverbase_evalcharlength\n",
+ "1948 7 cfdsolverbase_evaljacobians\n",
+ "2380 8 cfdsolverbase_evalmass\n",
+ "484 9 cfdsolverbase_evalshapefunctions\n",
+ "3824 10 cfdsolverbase_evaltimeiteration\n",
+ "1901 11 cfdsolverbase_evalviscosityfactor\n",
+ "3336 12 cfdsolverbase_initialbuffer\n",
+ "1876 13 cfdsolverbase_initializesourceterms\n",
+ "2221 14 checkifwallmodelon\n",
+ "2924 15 compute_fieldderivs\n",
+ "2860 16 copy_elemgpscalarfield_in_nodes_for_inst\n",
+ "2324 17 copy_from_rcvbuffer_int\n",
+ "2432 18 copy_from_rcvbuffer_real\n",
+ "2472 19 copy_nodescalarfield2save_in_aux_for_inst\n",
+ "2764 20 copy_nodevectorfield2save_in_aux_for_inst\n",
+ "2478 21 copyperiodicnodes_scalarfield\n",
+ "2770 22 copyperiodicnodes_vectorfield\n",
+ "72247 23 deallocate_filters\n",
+ "1996 24 elem_jacobian\n",
+ "2292 25 elempernode\n",
+ "72484 26 end_comms\n",
+ "72532 27 end_comms_bnd\n",
+ "72334 28 end_hdf5_auxiliar_saving_arrays\n",
+ "72100 29 end_rk4_solver\n",
+ "2305 30 fill_sendbuffer_int\n",
+ "2413 31 fill_sendbuffer_real\n",
+ "4143 32 full_convec_ijk\n",
+ "4128 33 full_diffusion_ijk\n",
+ "3927 34 generic_scalar_convec_ijk\n",
+ "228 35 init_comms\n",
+ "324 36 init_comms_bnd\n",
+ "1668 37 init_filters\n",
+ "388 38 init_hdf5_auxiliar_saving_arrays\n",
+ "3344 39 init_rk4_solver\n",
+ "144 40 load_connectivity_hdf5\n",
+ "116 41 load_coordinates_hdf5\n",
+ "4 42 load_parallel_data_hdf5\n",
+ "2400 43 lumped_mass_spectral\n",
+ "3972 44 lumped_solver_scal\n",
+ "4325 45 lumped_solver_vect\n",
+ "3240 46 maxmach\n",
+ "2346 47 nearboundarynode\n",
+ "3922 48 rk_4_main\n",
+ "5468 49 save_hdf5_restartfile\n",
+ "2492 50 save_hdf5_resultsfile_basefunc\n",
+ "3980 51 smart_visc_spectral\n",
+ "1912 52 sutherland_viscosity\n",
+ "1780 53 tgvsolver_evalinitialconditions\n",
+ "3180 54 visc_dissipationrate\n",
+ "3156 55 volavg_ek\n",
+ "0 56 None"
+ ]
+ },
+ "execution_count": 51,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "openacc_full_data_funcs"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "env",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/scripts/openacc-test.sql b/scripts/openacc-test.sql
new file mode 100644
index 0000000000000000000000000000000000000000..7a39c1e4a0521866a98623a5f0f14befa2ea172b
--- /dev/null
+++ b/scripts/openacc-test.sql
@@ -0,0 +1,64 @@
+WITH
+ openacc AS (
+ SELECT
+ start,
+ end,
+ nameId,
+ eventKind,
+ lineNo,
+ srcFile,
+ globalTid,
+ bytes,
+ funcName
+ FROM CUPTI_ACTIVITY_KIND_OPENACC_DATA
+ UNION ALL
+ SELECT
+ start,
+ end,
+ nameId,
+ eventKind,
+ lineNo,
+ srcFile,
+ globalTid,
+ null AS bytes,
+ funcName
+ FROM CUPTI_ACTIVITY_KIND_OPENACC_LAUNCH
+ UNION ALL
+ SELECT
+ start,
+ end,
+ nameId,
+ eventKind,
+ lineNo,
+ srcFile,
+ globalTid,
+ null AS bytes,
+ null AS funcName
+ FROM CUPTI_ACTIVITY_KIND_OPENACC_OTHER
+ )
+SELECT
+ CASE
+ WHEN srcFile NOT NULL
+ THEN nameIds.value || '@' || srcFileIds.value || ':' || lineNo
+ ELSE nameIds.value
+ END AS name,
+ start,
+ end,
+ eventIds.label,
+ funcIds.value as func,
+ bytes,
+ globalTid / 0x1000000 % 0x1000000 AS Pid, globalTid % 0x1000000 AS Tid
+FROM
+ openacc
+LEFT JOIN
+ StringIds AS srcFileIds
+ ON srcFileIds.id == srcFile
+LEFT JOIN
+ StringIds AS nameIds
+ ON nameIds.id == nameId
+LEFT JOIN
+ StringIds AS funcIds
+ ON funcIds.id == funcName
+LEFT JOIN
+ ENUM_OPENACC_EVENT_KIND as eventIds
+ ON eventIds.id == eventKind
\ No newline at end of file