diff --git a/parse-nsys-stats.ipynb b/parse-nsys-stats.ipynb index c4af5a0c8ac5705af2682debddcb0feeb3bca862..ae044a77ed39ec6d37a0d5440de9c7a8b0785eaa 100644 --- a/parse-nsys-stats.ipynb +++ b/parse-nsys-stats.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "7a6eb6bf-0de9-458d-bd77-d876b0219bd3", "metadata": {}, "outputs": [], @@ -13,7 +13,8 @@ "import subprocess\n", "import os\n", "import locale\n", - "import sqlite3\n" + "import sqlite3\n", + "from sqlalchemy import create_engine\n" ] }, { @@ -26,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "69698964-097d-4cac-890a-25ea7d3beb11", "metadata": {}, "outputs": [], @@ -36,7 +37,8 @@ "PARAVER_HOME = os.getenv('PARAVER_HOME')\n", "NVTX_RANGE=\"step53\"\n", "#REPORT_FILE = os.path.abspath(\"/home/mclasca/Documents/BePPP/heka/profiles/mistral-mn5/heka-axolotl-Mistral7B0.1-4s_withmetrics-2432719.nsys-rep\")\n", - "REPORT_FILE = os.path.abspath(\"/home/mclasca/Documents/BePPP/traces/xshells/nsys/xshells.par.medium-1N_withmetrics.nsys-rep\")\n", + "#REPORT_FILE = os.path.abspath(\"/home/mclasca/Documents/BePPP/traces/xshells/nsys/xshells.par.medium-1N_withmetrics.nsys-rep\")\n", + "REPORT_FILE = os.path.abspath(\"/home/mclasca/Documents/BePPP/traces/sod2d/nsight4_sod2d.nsys-rep\")\n", "REPORT_DIR = os.path.dirname(REPORT_FILE)\n", "#REPORT_NAME=\"heka-step53+accum1-profile-2023.4-5721957\"\n", "#REPORT_NAME=\"heka-axolotl-Mistral7B0.1-profile-2110598\"\n", @@ -60,11 +62,27 @@ "comm_tag_memory = 55002\n", "comm_tag_dependency = 55003\n", "\n", + "event_type_openacc = 66000000\n", + "event_type_openacc_data = 66000001\n", + "event_type_openacc_launch = 66000002\n", + "\n", + "event_type_name_openacc = 66100000\n", + "event_type_name_openacc_data = 66100001\n", + "event_type_name_openacc_launch = 66100002\n", + "\n", + "event_type_func_openacc = 66200000\n", + "event_type_func_openacc_data = 66200001\n", + "event_type_func_openacc_launch = 66200002\n", + "\n", + "event_type_openacc_data_size = 66300001\n", + "\n", "nvtx_select_frames = True\n", "nvtx_stack_top = 1\n", "nvtx_stack_bottom = 4\n", "\n", - "reports = [\"nvtx_pushpop_trace\", \"cuda_api_trace\", \"cuda_gpu_trace\", \"mpi_event_trace\"]\n", + "t_openacc = True\n", + "\n", + "reports = [\"nvtx_pushpop_trace\", \"cuda_api_trace\", \"cuda_gpu_trace\"]\n", "\n", "def build_nsys_stats_name(report_name):\n", " base_name = os.path.splitext(os.path.basename(REPORT_FILE))[0]\n", @@ -112,7 +130,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 15, "id": "6189c237-74d3-4880-a81c-f4d1e1c76230", "metadata": {}, "outputs": [ @@ -163,9 +181,9 @@ " \n", " \n", " 0\n", - " 5052824755\n", - " 5792\n", - " 760\n", + " 1924937486\n", + " 1056\n", + " 213\n", " NaN\n", " NaN\n", " NaN\n", @@ -175,21 +193,21 @@ " NaN\n", " ...\n", " NaN\n", - " 0,183\n", - " 31553,006\n", + " 0,000\n", + " 30,303\n", " Pageable\n", " Device\n", - " NVIDIA H100 (2)\n", + " NVIDIA H100 (0)\n", " 1\n", " NaN\n", - " 7\n", + " 13\n", " [CUDA memcpy Host-to-Device]\n", " \n", " \n", " 1\n", - " 5052881747\n", - " 5792\n", - " 761\n", + " 1926678138\n", + " 1024\n", + " 211\n", " NaN\n", " NaN\n", " NaN\n", @@ -199,21 +217,21 @@ " NaN\n", " ...\n", " NaN\n", - " 0,183\n", - " 31553,006\n", + " 0,000\n", + " 31,250\n", " Pageable\n", " Device\n", - " NVIDIA H100 (2)\n", + " NVIDIA H100 (0)\n", " 1\n", " NaN\n", - " 7\n", + " 13\n", " [CUDA memcpy Host-to-Device]\n", " \n", " \n", " 2\n", - " 5054622386\n", - " 5856\n", - " 760\n", + " 1928094871\n", + " 992\n", + " 213\n", " NaN\n", " NaN\n", " NaN\n", @@ -223,21 +241,21 @@ " NaN\n", " ...\n", " NaN\n", - " 0,183\n", - " 31208,328\n", + " 0,000\n", + " 32,258\n", " Pageable\n", " Device\n", - " NVIDIA H100 (1)\n", + " NVIDIA H100 (0)\n", " 1\n", " NaN\n", - " 7\n", + " 13\n", " [CUDA memcpy Host-to-Device]\n", " \n", " \n", " 3\n", - " 5054666386\n", - " 6528\n", - " 761\n", + " 1932409317\n", + " 992\n", + " 211\n", " NaN\n", " NaN\n", " NaN\n", @@ -247,21 +265,21 @@ " NaN\n", " ...\n", " NaN\n", - " 0,183\n", - " 27995,661\n", + " 0,000\n", + " 32,258\n", " Pageable\n", " Device\n", - " NVIDIA H100 (1)\n", + " NVIDIA H100 (0)\n", " 1\n", " NaN\n", - " 7\n", + " 13\n", " [CUDA memcpy Host-to-Device]\n", " \n", " \n", " 4\n", - " 5063403468\n", - " 6592\n", - " 760\n", + " 1980006677\n", + " 896\n", + " 684\n", " NaN\n", " NaN\n", " NaN\n", @@ -271,14 +289,14 @@ " NaN\n", " ...\n", " NaN\n", - " 0,183\n", - " 27723,902\n", + " 0,000\n", + " 142,857\n", " Pageable\n", " Device\n", - " NVIDIA H100 (3)\n", + " NVIDIA H100 (0)\n", " 1\n", " NaN\n", - " 7\n", + " 13\n", " [CUDA memcpy Host-to-Device]\n", " \n", " \n", @@ -306,10 +324,10 @@ " ...\n", " \n", " \n", - " 163375\n", - " 35129053256\n", - " 3674711\n", - " 290520\n", + " 56599\n", + " 38878898445\n", + " 2208\n", + " 198835\n", " NaN\n", " NaN\n", " NaN\n", @@ -319,45 +337,45 @@ " NaN\n", " ...\n", " NaN\n", - " 190,070\n", - " 51699,149\n", + " 0,000\n", + " 1,812\n", " Device\n", - " Pinned\n", - " NVIDIA H100 (3)\n", + " Pageable\n", + " NVIDIA H100 (0)\n", " 1\n", " NaN\n", - " 7\n", + " 13\n", " [CUDA memcpy Device-to-Host]\n", " \n", " \n", - " 163376\n", - " 35131445786\n", - " 3464975\n", - " 291246\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", + " 56600\n", + " 38878940548\n", + " 109920\n", + " 154364\n", + " 3.0\n", + " 1.0\n", + " 1.0\n", + " 256.0\n", + " 1.0\n", + " 1.0\n", + " 18.0\n", + " ...\n", + " 0,001\n", " NaN\n", " NaN\n", " NaN\n", - " ...\n", " NaN\n", - " 190,070\n", - " 54740,275\n", - " Device\n", - " Pinned\n", " NVIDIA H100 (0)\n", " 1\n", " NaN\n", - " 7\n", - " [CUDA memcpy Device-to-Host]\n", + " 13\n", + " mod_time_ops_adapt_dt_cfl_32_gpu__red\n", " \n", " \n", - " 163377\n", - " 35131883569\n", - " 3598606\n", - " 668376\n", + " 56601\n", + " 38879061123\n", + " 2304\n", + " 154366\n", " NaN\n", " NaN\n", " NaN\n", @@ -367,21 +385,21 @@ " NaN\n", " ...\n", " NaN\n", - " 190,070\n", - " 52649,501\n", + " 0,000\n", + " 1,736\n", " Device\n", - " Pinned\n", - " NVIDIA H100 (1)\n", + " Pageable\n", + " NVIDIA H100 (0)\n", " 1\n", " NaN\n", - " 7\n", + " 13\n", " [CUDA memcpy Device-to-Host]\n", " \n", " \n", - " 163378\n", - " 35132333093\n", - " 3628153\n", - " 651725\n", + " 56602\n", + " 38879071844\n", + " 2304\n", + " 154367\n", " NaN\n", " NaN\n", " NaN\n", @@ -391,21 +409,21 @@ " NaN\n", " ...\n", " NaN\n", - " 190,070\n", - " 52269,360\n", + " 0,000\n", + " 1,736\n", " Device\n", - " Pinned\n", - " NVIDIA H100 (2)\n", + " Pageable\n", + " NVIDIA H100 (0)\n", " 1\n", " NaN\n", - " 7\n", + " 13\n", " [CUDA memcpy Device-to-Host]\n", " \n", " \n", - " 163379\n", - " 35132730143\n", - " 3688279\n", - " 290521\n", + " 56603\n", + " 38879082499\n", + " 2304\n", + " 154368\n", " NaN\n", " NaN\n", " NaN\n", @@ -415,78 +433,78 @@ " NaN\n", " ...\n", " NaN\n", - " 190,070\n", - " 51509,078\n", + " 0,000\n", + " 1,736\n", " Device\n", - " Pinned\n", - " NVIDIA H100 (3)\n", + " Pageable\n", + " NVIDIA H100 (0)\n", " 1\n", " NaN\n", - " 7\n", + " 13\n", " [CUDA memcpy Device-to-Host]\n", " \n", " \n", "\n", - "

163380 rows × 21 columns

\n", + "

56604 rows × 21 columns

\n", "" ], "text/plain": [ - " Start (ns) Duration (ns) CorrID GrdX GrdY GrdZ BlkX BlkY \\\n", - "0 5052824755 5792 760 NaN NaN NaN NaN NaN \n", - "1 5052881747 5792 761 NaN NaN NaN NaN NaN \n", - "2 5054622386 5856 760 NaN NaN NaN NaN NaN \n", - "3 5054666386 6528 761 NaN NaN NaN NaN NaN \n", - "4 5063403468 6592 760 NaN NaN NaN NaN NaN \n", - "... ... ... ... ... ... ... ... ... \n", - "163375 35129053256 3674711 290520 NaN NaN NaN NaN NaN \n", - "163376 35131445786 3464975 291246 NaN NaN NaN NaN NaN \n", - "163377 35131883569 3598606 668376 NaN NaN NaN NaN NaN \n", - "163378 35132333093 3628153 651725 NaN NaN NaN NaN NaN \n", - "163379 35132730143 3688279 290521 NaN NaN NaN NaN NaN \n", + " Start (ns) Duration (ns) CorrID GrdX GrdY GrdZ BlkX BlkY \\\n", + "0 1924937486 1056 213 NaN NaN NaN NaN NaN \n", + "1 1926678138 1024 211 NaN NaN NaN NaN NaN \n", + "2 1928094871 992 213 NaN NaN NaN NaN NaN \n", + "3 1932409317 992 211 NaN NaN NaN NaN NaN \n", + "4 1980006677 896 684 NaN NaN NaN NaN NaN \n", + "... ... ... ... ... ... ... ... ... \n", + "56599 38878898445 2208 198835 NaN NaN NaN NaN NaN \n", + "56600 38878940548 109920 154364 3.0 1.0 1.0 256.0 1.0 \n", + "56601 38879061123 2304 154366 NaN NaN NaN NaN NaN \n", + "56602 38879071844 2304 154367 NaN NaN NaN NaN NaN \n", + "56603 38879082499 2304 154368 NaN NaN NaN NaN NaN \n", "\n", - " BlkZ Reg/Trd ... DymSMem (MB) Bytes (MB) Throughput (MB/s) \\\n", - "0 NaN NaN ... NaN 0,183 31553,006 \n", - "1 NaN NaN ... NaN 0,183 31553,006 \n", - "2 NaN NaN ... NaN 0,183 31208,328 \n", - "3 NaN NaN ... NaN 0,183 27995,661 \n", - "4 NaN NaN ... NaN 0,183 27723,902 \n", - "... ... ... ... ... ... ... \n", - "163375 NaN NaN ... NaN 190,070 51699,149 \n", - "163376 NaN NaN ... NaN 190,070 54740,275 \n", - "163377 NaN NaN ... NaN 190,070 52649,501 \n", - "163378 NaN NaN ... NaN 190,070 52269,360 \n", - "163379 NaN NaN ... NaN 190,070 51509,078 \n", + " BlkZ Reg/Trd ... DymSMem (MB) Bytes (MB) Throughput (MB/s) SrcMemKd \\\n", + "0 NaN NaN ... NaN 0,000 30,303 Pageable \n", + "1 NaN NaN ... NaN 0,000 31,250 Pageable \n", + "2 NaN NaN ... NaN 0,000 32,258 Pageable \n", + "3 NaN NaN ... NaN 0,000 32,258 Pageable \n", + "4 NaN NaN ... NaN 0,000 142,857 Pageable \n", + "... ... ... ... ... ... ... ... \n", + "56599 NaN NaN ... NaN 0,000 1,812 Device \n", + "56600 1.0 18.0 ... 0,001 NaN NaN NaN \n", + "56601 NaN NaN ... NaN 0,000 1,736 Device \n", + "56602 NaN NaN ... NaN 0,000 1,736 Device \n", + "56603 NaN NaN ... NaN 0,000 1,736 Device \n", "\n", - " SrcMemKd DstMemKd Device Ctx GreenCtx Strm \\\n", - "0 Pageable Device NVIDIA H100 (2) 1 NaN 7 \n", - "1 Pageable Device NVIDIA H100 (2) 1 NaN 7 \n", - "2 Pageable Device NVIDIA H100 (1) 1 NaN 7 \n", - "3 Pageable Device NVIDIA H100 (1) 1 NaN 7 \n", - "4 Pageable Device NVIDIA H100 (3) 1 NaN 7 \n", - "... ... ... ... .. ... ... \n", - "163375 Device Pinned NVIDIA H100 (3) 1 NaN 7 \n", - "163376 Device Pinned NVIDIA H100 (0) 1 NaN 7 \n", - "163377 Device Pinned NVIDIA H100 (1) 1 NaN 7 \n", - "163378 Device Pinned NVIDIA H100 (2) 1 NaN 7 \n", - "163379 Device Pinned NVIDIA H100 (3) 1 NaN 7 \n", + " DstMemKd Device Ctx GreenCtx Strm \\\n", + "0 Device NVIDIA H100 (0) 1 NaN 13 \n", + "1 Device NVIDIA H100 (0) 1 NaN 13 \n", + "2 Device NVIDIA H100 (0) 1 NaN 13 \n", + "3 Device NVIDIA H100 (0) 1 NaN 13 \n", + "4 Device NVIDIA H100 (0) 1 NaN 13 \n", + "... ... ... .. ... ... \n", + "56599 Pageable NVIDIA H100 (0) 1 NaN 13 \n", + "56600 NaN NVIDIA H100 (0) 1 NaN 13 \n", + "56601 Pageable NVIDIA H100 (0) 1 NaN 13 \n", + "56602 Pageable NVIDIA H100 (0) 1 NaN 13 \n", + "56603 Pageable NVIDIA H100 (0) 1 NaN 13 \n", "\n", - " Name \n", - "0 [CUDA memcpy Host-to-Device] \n", - "1 [CUDA memcpy Host-to-Device] \n", - "2 [CUDA memcpy Host-to-Device] \n", - "3 [CUDA memcpy Host-to-Device] \n", - "4 [CUDA memcpy Host-to-Device] \n", - "... ... \n", - "163375 [CUDA memcpy Device-to-Host] \n", - "163376 [CUDA memcpy Device-to-Host] \n", - "163377 [CUDA memcpy Device-to-Host] \n", - "163378 [CUDA memcpy Device-to-Host] \n", - "163379 [CUDA memcpy Device-to-Host] \n", + " Name \n", + "0 [CUDA memcpy Host-to-Device] \n", + "1 [CUDA memcpy Host-to-Device] \n", + "2 [CUDA memcpy Host-to-Device] \n", + "3 [CUDA memcpy Host-to-Device] \n", + "4 [CUDA memcpy Host-to-Device] \n", + "... ... \n", + "56599 [CUDA memcpy Device-to-Host] \n", + "56600 mod_time_ops_adapt_dt_cfl_32_gpu__red \n", + "56601 [CUDA memcpy Device-to-Host] \n", + "56602 [CUDA memcpy Device-to-Host] \n", + "56603 [CUDA memcpy Device-to-Host] \n", "\n", - "[163380 rows x 21 columns]" + "[56604 rows x 21 columns]" ] }, - "execution_count": 4, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -499,7 +517,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "5cc1c0a6-8631-44f4-8dcb-66f59ef78c17", "metadata": {}, "outputs": [ @@ -538,85 +556,85 @@ " \n", " \n", " 0\n", - " 3298388027\n", - " 2015\n", + " 1175970387\n", + " 4550\n", " cuModuleGetLoadingMode\n", " 0\n", " 1\n", - " 413821\n", - " 413821\n", + " 585611\n", + " 585611\n", " 0\n", - " MPI Rank 1\n", + " MPI Rank 0\n", " \n", " \n", " 1\n", - " 3466891507\n", - " 1785\n", - " cuModuleGetLoadingMode\n", + " 1176034134\n", + " 271\n", + " cuDeviceGetCount\n", " 0\n", - " 1\n", - " 413818\n", - " 413818\n", + " 2\n", + " 585611\n", + " 585611\n", " 0\n", - " MPI Rank 2\n", + " MPI Rank 0\n", " \n", " \n", " 2\n", - " 3467659248\n", - " 1680\n", - " cuModuleGetLoadingMode\n", + " 1176034842\n", + " 142\n", + " cuDeviceGet\n", " 0\n", - " 1\n", - " 413820\n", - " 413820\n", + " 3\n", + " 585611\n", + " 585611\n", " 0\n", " MPI Rank 0\n", " \n", " \n", " 3\n", - " 3467739356\n", - " 1547\n", - " cuModuleGetLoadingMode\n", + " 1176035727\n", + " 9146\n", + " cuDeviceGetName\n", " 0\n", - " 1\n", - " 413819\n", - " 413819\n", + " 4\n", + " 585611\n", + " 585611\n", " 0\n", - " MPI Rank 3\n", + " MPI Rank 0\n", " \n", " \n", " 4\n", - " 4811123712\n", - " 100325\n", - " cudaStreamCreateWithPriority\n", + " 1176045240\n", + " 6333\n", + " cuDeviceTotalMem_v2\n", " 0\n", - " 745\n", - " 413818\n", - " 413818\n", + " 5\n", + " 585611\n", + " 585611\n", " 0\n", - " MPI Rank 2\n", + " MPI Rank 0\n", " \n", " \n", "\n", "" ], "text/plain": [ - " Start (ns) Duration (ns) Name Result CorrID \\\n", - "0 3298388027 2015 cuModuleGetLoadingMode 0 1 \n", - "1 3466891507 1785 cuModuleGetLoadingMode 0 1 \n", - "2 3467659248 1680 cuModuleGetLoadingMode 0 1 \n", - "3 3467739356 1547 cuModuleGetLoadingMode 0 1 \n", - "4 4811123712 100325 cudaStreamCreateWithPriority 0 745 \n", + " Start (ns) Duration (ns) Name Result CorrID Pid \\\n", + "0 1175970387 4550 cuModuleGetLoadingMode 0 1 585611 \n", + "1 1176034134 271 cuDeviceGetCount 0 2 585611 \n", + "2 1176034842 142 cuDeviceGet 0 3 585611 \n", + "3 1176035727 9146 cuDeviceGetName 0 4 585611 \n", + "4 1176045240 6333 cuDeviceTotalMem_v2 0 5 585611 \n", "\n", - " Pid Tid T-Pri Thread Name \n", - "0 413821 413821 0 MPI Rank 1 \n", - "1 413818 413818 0 MPI Rank 2 \n", - "2 413820 413820 0 MPI Rank 0 \n", - "3 413819 413819 0 MPI Rank 3 \n", - "4 413818 413818 0 MPI Rank 2 " + " Tid T-Pri Thread Name \n", + "0 585611 0 MPI Rank 0 \n", + "1 585611 0 MPI Rank 0 \n", + "2 585611 0 MPI Rank 0 \n", + "3 585611 0 MPI Rank 0 \n", + "4 585611 0 MPI Rank 0 " ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -628,7 +646,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 7, "id": "05d0fa22-45f5-405b-8fe7-6a822b4ea64e", "metadata": {}, "outputs": [ @@ -672,117 +690,117 @@ " \n", " \n", " 0\n", - " 5614893\n", - " 23874249117\n", - " 23868634224\n", - " 23868595137\n", - " 39087\n", - " gradient_range\n", - " 1336373\n", - " 1336373\n", + " 1927980172\n", + " 8059828975\n", + " 6131848803\n", " 0\n", - " 1\n", - " 27\n", + " 6131848803\n", + " Open mesh\n", + " 585613\n", + " 585613\n", + " 0\n", + " 0\n", + " 9\n", " NaN\n", - " :27\n", - " gradient_range\n", + " :9\n", + " Open mesh\n", " \n", " \n", " 1\n", - " 5632294\n", - " 23874227431\n", - " 23868595137\n", - " 23868427904\n", - " 167233\n", - " step10\n", - " 1336373\n", - " 1336373\n", - " 1\n", - " 1\n", - " 28\n", - " 27.0\n", - " :27:28\n", - " --step10\n", + " 1929174557\n", + " 8059831699\n", + " 6130657142\n", + " 0\n", + " 6130657142\n", + " Open mesh\n", + " 585614\n", + " 585614\n", + " 0\n", + " 0\n", + " 10\n", + " NaN\n", + " :10\n", + " Open mesh\n", " \n", " \n", " 2\n", - " 5767713\n", - " 23874195617\n", - " 23868427904\n", - " 23649727718\n", - " 218700186\n", - " gradient_range\n", - " 1336373\n", - " 1336373\n", - " 2\n", - " 21\n", - " 29\n", - " 28.0\n", - " :27:28:29\n", - " ----gradient_range\n", + " 1929371724\n", + " 8059831396\n", + " 6130459672\n", + " 0\n", + " 6130459672\n", + " Open mesh\n", + " 585611\n", + " 585611\n", + " 0\n", + " 0\n", + " 11\n", + " NaN\n", + " :11\n", + " Open mesh\n", " \n", " \n", " 3\n", - " 5768678\n", - " 12280877661\n", - " 12275108983\n", - " 12275047299\n", - " 61684\n", - " step10\n", - " 1336373\n", - " 1336373\n", - " 3\n", - " 1\n", - " 30\n", - " 29.0\n", - " :27:28:29:30\n", - " ------step10\n", + " 1933533501\n", + " 8059836496\n", + " 6126302995\n", + " 0\n", + " 6126302995\n", + " Open mesh\n", + " 585612\n", + " 585612\n", + " 0\n", + " 0\n", + " 12\n", + " NaN\n", + " :12\n", + " Open mesh\n", " \n", " \n", " 4\n", - " 5807903\n", - " 12280855202\n", - " 12275047299\n", - " 12275011872\n", - " 35427\n", - " gradient_range\n", - " 1336373\n", - " 1336373\n", - " 4\n", - " 1\n", - " 31\n", - " 30.0\n", - " :27:28:29:30:31\n", - " --------gradient_range\n", + " 8062014698\n", + " 8064261693\n", + " 2246995\n", + " 0\n", + " 2246995\n", + " Gaussian Quadrature\n", + " 585614\n", + " 585614\n", + " 0\n", + " 0\n", + " 13\n", + " NaN\n", + " :13\n", + " Gaussian Quadrature\n", " \n", " \n", "\n", "" ], "text/plain": [ - " Start (ns) End (ns) Duration (ns) DurChild (ns) DurNonChild (ns) \\\n", - "0 5614893 23874249117 23868634224 23868595137 39087 \n", - "1 5632294 23874227431 23868595137 23868427904 167233 \n", - "2 5767713 23874195617 23868427904 23649727718 218700186 \n", - "3 5768678 12280877661 12275108983 12275047299 61684 \n", - "4 5807903 12280855202 12275047299 12275011872 35427 \n", + " Start (ns) End (ns) Duration (ns) DurChild (ns) DurNonChild (ns) \\\n", + "0 1927980172 8059828975 6131848803 0 6131848803 \n", + "1 1929174557 8059831699 6130657142 0 6130657142 \n", + "2 1929371724 8059831396 6130459672 0 6130459672 \n", + "3 1933533501 8059836496 6126302995 0 6126302995 \n", + "4 8062014698 8064261693 2246995 0 2246995 \n", "\n", - " Name PID TID Lvl NumChild RangeId ParentId \\\n", - "0 gradient_range 1336373 1336373 0 1 27 NaN \n", - "1 step10 1336373 1336373 1 1 28 27.0 \n", - "2 gradient_range 1336373 1336373 2 21 29 28.0 \n", - "3 step10 1336373 1336373 3 1 30 29.0 \n", - "4 gradient_range 1336373 1336373 4 1 31 30.0 \n", + " Name PID TID Lvl NumChild RangeId ParentId \\\n", + "0 Open mesh 585613 585613 0 0 9 NaN \n", + "1 Open mesh 585614 585614 0 0 10 NaN \n", + "2 Open mesh 585611 585611 0 0 11 NaN \n", + "3 Open mesh 585612 585612 0 0 12 NaN \n", + "4 Gaussian Quadrature 585614 585614 0 0 13 NaN \n", "\n", - " RangeStack NameTree \n", - "0 :27 gradient_range \n", - "1 :27:28 --step10 \n", - "2 :27:28:29 ----gradient_range \n", - "3 :27:28:29:30 ------step10 \n", - "4 :27:28:29:30:31 --------gradient_range " + " RangeStack NameTree \n", + "0 :9 Open mesh \n", + "1 :10 Open mesh \n", + "2 :11 Open mesh \n", + "3 :12 Open mesh \n", + "4 :13 Gaussian Quadrature " ] }, - "execution_count": 31, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -1035,6 +1053,105 @@ "mpi_df.iloc[30:40,]" ] }, + { + "cell_type": "code", + "execution_count": 8, + "id": "27573ad8", + "metadata": {}, + "outputs": [], + "source": [ + "sql_openacc_other = \"\"\"\n", + "SELECT\n", + " CASE\n", + " WHEN srcFile NOT NULL\n", + " THEN nameIds.value || '@' || srcFileIds.value || ':' || lineNo\n", + " ELSE nameIds.value\n", + " END AS name,\n", + " start,\n", + " end,\n", + " eventKind,\n", + "\tfuncIds.value as func,\n", + " globalTid / 0x1000000 % 0x1000000 AS Pid, globalTid % 0x1000000 AS Tid\n", + "FROM\n", + " CUPTI_ACTIVITY_KIND_OPENACC_OTHER\n", + "LEFT JOIN\n", + " StringIds AS srcFileIds\n", + " ON srcFileIds.id == srcFile\n", + "LEFT JOIN\n", + " StringIds AS nameIds\n", + " ON nameIds.id == nameId\n", + "LEFT JOIN\n", + "\tStringIds AS funcIds\n", + "\tON funcIds.id == funcName\n", + "\"\"\"\n", + "\n", + "sql_openacc_launch = \"\"\"\n", + "SELECT\n", + " CASE\n", + " WHEN srcFile NOT NULL\n", + " THEN nameIds.value || '@' || srcFileIds.value || ':' || lineNo\n", + " ELSE nameIds.value\n", + " END AS name,\n", + " start,\n", + " end,\n", + " eventKind,\n", + " kernelIds.value as kernelName,\n", + "\tfuncIds.value as func,\n", + " globalTid / 0x1000000 % 0x1000000 AS Pid, globalTid % 0x1000000 AS Tid\n", + "FROM\n", + " CUPTI_ACTIVITY_KIND_OPENACC_LAUNCH\n", + "LEFT JOIN\n", + " StringIds AS srcFileIds\n", + " ON srcFileIds.id == srcFile\n", + "LEFT JOIN\n", + " StringIds AS nameIds\n", + " ON nameIds.id == nameId\n", + "LEFT JOIN\n", + "\tStringIds AS funcIds\n", + "\tON funcIds.id == funcName\n", + "LEFT JOIN\n", + "\tStringIds AS kernelIds\n", + "\tON kernelIds.id == kernelName\n", + "\"\"\"\n", + "\n", + "sql_openacc_data = \"\"\"\n", + "SELECT\n", + " CASE\n", + " WHEN srcFile NOT NULL\n", + " THEN nameIds.value || '@' || srcFileIds.value || ':' || lineNo\n", + " ELSE nameIds.value\n", + " END AS name,\n", + " start,\n", + " end,\n", + " eventKind,\n", + " varIds.value as variableName,\n", + "\tfuncIds.value as func,\n", + "\tbytes,\n", + " globalTid / 0x1000000 % 0x1000000 AS Pid, globalTid % 0x1000000 AS Tid\n", + "FROM\n", + " CUPTI_ACTIVITY_KIND_OPENACC_DATA\n", + "LEFT JOIN\n", + " StringIds AS srcFileIds\n", + " ON srcFileIds.id == srcFile\n", + "LEFT JOIN\n", + " StringIds AS nameIds\n", + " ON nameIds.id == nameId\n", + "LEFT JOIN\n", + "\tStringIds AS funcIds\n", + "\tON funcIds.id == funcName\n", + "LEFT JOIN\n", + "\tStringIds AS varIds\n", + "\tON varIds.id == varName\n", + "\"\"\"\n", + "\n", + "engine = create_engine(f\"sqlite:///{os.path.splitext(REPORT_FILE)[0]}.sqlite\")\n", + "with engine.connect() as conn, conn.begin():\n", + " openacc_other_df = pd.read_sql_query(sql_openacc_other, conn)\n", + " openacc_launch_df = pd.read_sql_query(sql_openacc_launch, conn)\n", + " openacc_data_df = pd.read_sql_query(sql_openacc_data, conn)\n", + " openacc_event_kind = pd.read_sql_table(\"ENUM_OPENACC_EVENT_KIND\", conn)" + ] + }, { "cell_type": "code", "execution_count": 9, @@ -1074,65 +1191,65 @@ " \n", " \n", " \n", - " 3\n", + " 0\n", " 7\n", " 0\n", " 1\n", - " 413818\n", - " 2\n", + " 585611\n", + " 0\n", " 1\n", - " None\n", - " None\n", + " 0\n", + " 0\n", " \n", " \n", - " 1\n", + " 2\n", " 7\n", " 0\n", " 1\n", - " 413819\n", - " 3\n", + " 585612\n", + " 0\n", " 1\n", - " None\n", - " None\n", + " 0\n", + " 0\n", " \n", " \n", - " 0\n", + " 3\n", " 7\n", " 0\n", " 1\n", - " 413820\n", + " 585613\n", " 0\n", " 1\n", - " None\n", - " None\n", + " 0\n", + " 0\n", " \n", " \n", - " 2\n", + " 1\n", " 7\n", " 0\n", " 1\n", - " 413821\n", - " 1\n", + " 585614\n", + " 0\n", " 1\n", - " None\n", - " None\n", + " 0\n", + " 0\n", " \n", " \n", "\n", "" ], "text/plain": [ - " nullStreamId hwId vmId processId deviceId contextId parentContextId \\\n", - "3 7 0 1 413818 2 1 None \n", - "1 7 0 1 413819 3 1 None \n", - "0 7 0 1 413820 0 1 None \n", - "2 7 0 1 413821 1 1 None \n", + " nullStreamId hwId vmId processId deviceId contextId parentContextId \\\n", + "0 7 0 1 585611 0 1 0 \n", + "2 7 0 1 585612 0 1 0 \n", + "3 7 0 1 585613 0 1 0 \n", + "1 7 0 1 585614 0 1 0 \n", "\n", - " isGreenContext \n", - "3 None \n", - "1 None \n", - "0 None \n", - "2 None " + " isGreenContext \n", + "0 0 \n", + "2 0 \n", + "3 0 \n", + "1 0 " ] }, "execution_count": 9, @@ -1151,7 +1268,20 @@ "execution_count": 10, "id": "90cbe0f8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ValueError", + "evalue": "Table GPU_METRICS not found", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[10], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m gpu_metrics \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_sql_table\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mGPU_METRICS\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msqlite:///\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msplitext\u001b[49m\u001b[43m(\u001b[49m\u001b[43mREPORT_FILE\u001b[49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.sqlite\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2\u001b[0m metrics_description \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_sql_table(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTARGET_INFO_GPU_METRICS\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msqlite:///\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mos\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39msplitext(REPORT_FILE)[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.sqlite\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m~/Documents/BePPP/heka/tooling/env/lib/python3.10/site-packages/pandas/io/sql.py:386\u001b[0m, in \u001b[0;36mread_sql_table\u001b[0;34m(table_name, con, schema, index_col, coerce_float, parse_dates, columns, chunksize, dtype_backend)\u001b[0m\n\u001b[1;32m 384\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m pandasSQL_builder(con, schema\u001b[38;5;241m=\u001b[39mschema, need_transaction\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m pandas_sql:\n\u001b[1;32m 385\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m pandas_sql\u001b[38;5;241m.\u001b[39mhas_table(table_name):\n\u001b[0;32m--> 386\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTable \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtable_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not found\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 388\u001b[0m table \u001b[38;5;241m=\u001b[39m pandas_sql\u001b[38;5;241m.\u001b[39mread_table(\n\u001b[1;32m 389\u001b[0m table_name,\n\u001b[1;32m 390\u001b[0m index_col\u001b[38;5;241m=\u001b[39mindex_col,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 395\u001b[0m dtype_backend\u001b[38;5;241m=\u001b[39mdtype_backend,\n\u001b[1;32m 396\u001b[0m )\n\u001b[1;32m 398\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m table \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "\u001b[0;31mValueError\u001b[0m: Table GPU_METRICS not found" + ] + } + ], "source": [ "gpu_metrics = pd.read_sql_table(\"GPU_METRICS\", f\"sqlite:///{os.path.splitext(REPORT_FILE)[0]}.sqlite\")\n", "metrics_description = pd.read_sql_table(\"TARGET_INFO_GPU_METRICS\", f\"sqlite:///{os.path.splitext(REPORT_FILE)[0]}.sqlite\")" @@ -1709,7 +1839,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 11, "id": "69bf520f-cbe4-4c34-b100-7b1b6ede4f17", "metadata": {}, "outputs": [ @@ -1717,7 +1847,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Unique processes: [413821 413818 413820 413819], and unique threads: [413821 413818 413820 413819]\n" + "Unique processes: [585611 585612 585613 585614], and unique threads: [585611 585612 585613 585614]\n" ] } ], @@ -1837,7 +1967,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 12, "id": "5664ba75-7e11-42c6-867b-7ea1a7acd7b7", "metadata": {}, "outputs": [ @@ -1871,50 +2001,50 @@ " \n", " \n", " \n", - " 1\n", - " 413818\n", - " 413818\n", + " 0\n", + " 585611\n", + " 585611\n", " 1\n", " 1\n", - " 2\n", + " 0\n", " \n", " \n", - " 3\n", - " 413819\n", - " 413819\n", + " 146\n", + " 585612\n", + " 585612\n", " 1\n", " 2\n", - " 3\n", + " 0\n", " \n", " \n", - " 2\n", - " 413820\n", - " 413820\n", + " 292\n", + " 585613\n", + " 585613\n", " 1\n", " 3\n", " 0\n", " \n", " \n", - " 0\n", - " 413821\n", - " 413821\n", + " 421\n", + " 585614\n", + " 585614\n", " 1\n", " 4\n", - " 1\n", + " 0\n", " \n", " \n", "\n", "" ], "text/plain": [ - " Pid Tid thread task device\n", - "1 413818 413818 1 1 2\n", - "3 413819 413819 1 2 3\n", - "2 413820 413820 1 3 0\n", - "0 413821 413821 1 4 1" + " Pid Tid thread task device\n", + "0 585611 585611 1 1 0\n", + "146 585612 585612 1 2 0\n", + "292 585613 585613 1 3 0\n", + "421 585614 585614 1 4 0" ] }, - "execution_count": 19, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -1934,7 +2064,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 13, "id": "8f6ca36c", "metadata": {}, "outputs": [ @@ -1975,31 +2105,31 @@ " \n", " \n", " 1\n", - " 413818\n", - " {413818}\n", + " 585611\n", + " {585611}\n", " 1\n", - " 2\n", + " 0\n", " \n", " \n", " 2\n", - " 413819\n", - " {413819}\n", + " 585612\n", + " {585612}\n", " 1\n", - " 3\n", + " 0\n", " \n", " \n", " 3\n", - " 413820\n", - " {413820}\n", + " 585613\n", + " {585613}\n", " 1\n", " 0\n", " \n", " \n", " 4\n", - " 413821\n", - " {413821}\n", - " 1\n", + " 585614\n", + " {585614}\n", " 1\n", + " 0\n", " \n", " \n", "\n", @@ -2008,13 +2138,13 @@ "text/plain": [ " Pid Tid thread device\n", "task \n", - "1 413818 {413818} 1 2\n", - "2 413819 {413819} 1 3\n", - "3 413820 {413820} 1 0\n", - "4 413821 {413821} 1 1" + "1 585611 {585611} 1 0\n", + "2 585612 {585612} 1 0\n", + "3 585613 {585613} 1 0\n", + "4 585614 {585614} 1 0" ] }, - "execution_count": 20, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -2038,6 +2168,12 @@ "cuda_api_df[\"task\"] = 0\n", "#nvtx_df[\"thread\"] = 0\n", "#nvtx_df[\"task\"] = 0\n", + "openacc_other_df[\"thread\"] = 0\n", + "openacc_other_df[\"task\"] = 0\n", + "openacc_launch_df[\"thread\"] = 0\n", + "openacc_launch_df[\"task\"] = 0\n", + "openacc_data_df[\"thread\"] = 0\n", + "openacc_data_df[\"task\"] = 0\n", "\n", "threads['row_name'] = \"THREAD 1.\" + threads['task'].astype(str) + '.' + threads['thread'].astype(str)\n", "\n", @@ -2048,6 +2184,22 @@ "#nvtx_df[\"task\"] = nvtx_df[\"Tid\"].map(threads.set_index('Tid')[\"task\"])\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "dca9eb26", + "metadata": {}, + "outputs": [], + "source": [ + "if t_openacc:\n", + " openacc_other_df[\"thread\"] = openacc_other_df[\"Tid\"].map(threads.set_index('Tid')[\"thread\"])\n", + " openacc_other_df[\"task\"] = openacc_other_df[\"Tid\"].map(threads.set_index('Tid')[\"task\"])\n", + " openacc_launch_df[\"thread\"] = openacc_launch_df[\"Tid\"].map(threads.set_index('Tid')[\"thread\"])\n", + " openacc_launch_df[\"task\"] = openacc_launch_df[\"Tid\"].map(threads.set_index('Tid')[\"task\"])\n", + " openacc_data_df[\"thread\"] = openacc_data_df[\"Tid\"].map(threads.set_index('Tid')[\"thread\"])\n", + " openacc_data_df[\"task\"] = openacc_data_df[\"Tid\"].map(threads.set_index('Tid')[\"task\"])" + ] + }, { "cell_type": "markdown", "id": "eac096f4", @@ -3776,6 +3928,44 @@ "ranges_names.sort_values(\"event_value\", inplace=True)" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "43e05db4", + "metadata": {}, + "outputs": [], + "source": [ + "if t_openacc:\n", + " openacc_event_kind[\"id\"] += 1\n", + " openacc_launch_df[\"eventKind\"] += 1\n", + " openacc_data_df[\"eventKind\"] += 1\n", + " openacc_other_df[\"eventKind\"] += 1\n", + "\n", + " openacc_data_df[\"name_value\"] = openacc_data_df.groupby([\"name\"], dropna=False).ngroup() + 1\n", + " openacc_full_data_names = openacc_data_df[['name_value', 'name']].drop_duplicates()\n", + " openacc_full_data_names.sort_values([\"name_value\"], inplace=True)\n", + "\n", + " openacc_launch_df[\"name_value\"] = openacc_launch_df.groupby([\"name\"], dropna=False).ngroup() + 1 + openacc_full_data_names.count().iloc[0]\n", + " openacc_full_launch_names = openacc_launch_df[['name_value', 'name']].drop_duplicates()\n", + " openacc_full_launch_names.sort_values([\"name_value\"], inplace=True)\n", + "\n", + " openacc_other_df[\"name_value\"] = openacc_other_df.groupby([\"name\"], dropna=False).ngroup() + 1 + openacc_full_data_names.count().iloc[0] + openacc_full_launch_names.count().iloc[0]\n", + " openacc_full_other_names = openacc_other_df[['name_value', 'name']].drop_duplicates()\n", + " openacc_full_other_names.sort_values([\"name_value\"], inplace=True)\n", + "\n", + " openacc_data_df[\"func_value\"] = openacc_data_df.groupby([\"func\"], dropna=False).ngroup() + 1\n", + " openacc_full_data_funcs = openacc_data_df[['func_value', 'func']].drop_duplicates()\n", + " openacc_full_data_funcs.sort_values([\"func_value\"], inplace=True)\n", + "\n", + " openacc_launch_df[\"func_value\"] = openacc_launch_df.groupby([\"func\"], dropna=False).ngroup() + 1 + openacc_full_data_funcs.count().iloc[0]\n", + " openacc_full_launch_funcs = openacc_launch_df[['func_value', 'func']].drop_duplicates()\n", + " openacc_full_launch_funcs.sort_values([\"func_value\"], inplace=True)\n", + "\n", + " openacc_other_df[\"func_value\"] = openacc_other_df.groupby([\"func\"], dropna=False).ngroup() + 1 + openacc_full_data_funcs.count().iloc[0] + openacc_full_launch_funcs.count().iloc[0]\n", + " openacc_full_other_funcs = openacc_other_df[['func_value', 'func']].drop_duplicates()\n", + " openacc_full_other_funcs.sort_values([\"func_value\"], inplace=True)" + ] + }, { "cell_type": "code", "execution_count": 90, @@ -3875,6 +4065,79 @@ " pcf_file.write(\"{} {}\\n\".format(row[\"event_value\"], row[\"Name\"]))\n", "pcf_file.write(\"\\n\")\n", "\n", + "if t_openacc:\n", + " pcf_file.write(\"EVENT_TYPE\\n\")\n", + " pcf_file.write(\"0 {} OpenACC Data Events\\n\".format(event_type_openacc_data))\n", + " pcf_file.write(\"VALUES\\n\")\n", + " pcf_file.write(\"0 End\\n\")\n", + " for index, row in openacc_event_kind.iterrows():\n", + " pcf_file.write(\"{} {}\\n\".format(row[\"event_kind\"], row[\"label\"]))\n", + " pcf_file.write(\"\\n\")\n", + "\n", + " pcf_file.write(\"EVENT_TYPE\\n\")\n", + " pcf_file.write(\"0 {} OpenACC Launch Events\\n\".format(event_type_openacc_launch))\n", + " pcf_file.write(\"VALUES\\n\")\n", + " pcf_file.write(\"0 End\\n\")\n", + " for index, row in openacc_event_kind.iterrows():\n", + " pcf_file.write(\"{} {}\\n\".format(row[\"event_kind\"], row[\"label\"]))\n", + " pcf_file.write(\"\\n\")\n", + "\n", + " pcf_file.write(\"EVENT_TYPE\\n\")\n", + " pcf_file.write(\"0 {} OpenACC Other Events\\n\".format(event_type_openacc))\n", + " pcf_file.write(\"VALUES\\n\")\n", + " pcf_file.write(\"0 End\\n\")\n", + " for index, row in openacc_event_kind.iterrows():\n", + " pcf_file.write(\"{} {}\\n\".format(row[\"event_kind\"], row[\"label\"]))\n", + " pcf_file.write(\"\\n\")\n", + "\n", + " pcf_file.write(\"EVENT_TYPE\\n\")\n", + " pcf_file.write(\"0 {} OpenACC data region source\\n\".format(event_type_name_openacc_data))\n", + " pcf_file.write(\"VALUES\\n\")\n", + " pcf_file.write(\"0 End\\n\")\n", + " for index, row in openacc_full_data_names.iterrows():\n", + " pcf_file.write(\"{} {}\\n\".format(row[\"name_value\"], row[\"name\"]))\n", + " pcf_file.write(\"\\n\")\n", + "\n", + " pcf_file.write(\"EVENT_TYPE\\n\")\n", + " pcf_file.write(\"0 {} OpenACC launch region source\\n\".format(event_type_name_openacc_launch))\n", + " pcf_file.write(\"VALUES\\n\")\n", + " pcf_file.write(\"0 End\\n\")\n", + " for index, row in openacc_full_launch_names.iterrows():\n", + " pcf_file.write(\"{} {}\\n\".format(row[\"name_value\"], row[\"name\"]))\n", + " pcf_file.write(\"\\n\")\n", + "\n", + " pcf_file.write(\"EVENT_TYPE\\n\")\n", + " pcf_file.write(\"0 {} OpenACC other region source\\n\".format(event_type_name_openacc))\n", + " pcf_file.write(\"VALUES\\n\")\n", + " pcf_file.write(\"0 End\\n\")\n", + " for index, row in openacc_full_other_names.iterrows():\n", + " pcf_file.write(\"{} {}\\n\".format(row[\"name_value\"], row[\"name\"]))\n", + " pcf_file.write(\"\\n\")\n", + "\n", + " pcf_file.write(\"EVENT_TYPE\\n\")\n", + " pcf_file.write(\"0 {} OpenACC data function name\\n\".format(event_type_func_openacc_data))\n", + " pcf_file.write(\"VALUES\\n\")\n", + " pcf_file.write(\"0 End\\n\")\n", + " for index, row in openacc_full_data_funcs.iterrows():\n", + " pcf_file.write(\"{} {}\\n\".format(row[\"func_value\"], row[\"func\"]))\n", + " pcf_file.write(\"\\n\")\n", + "\n", + " pcf_file.write(\"EVENT_TYPE\\n\")\n", + " pcf_file.write(\"0 {} OpenACC launch function name\\n\".format(event_type_func_openacc_launch))\n", + " pcf_file.write(\"VALUES\\n\")\n", + " pcf_file.write(\"0 End\\n\")\n", + " for index, row in openacc_full_launch_funcs.iterrows():\n", + " pcf_file.write(\"{} {}\\n\".format(row[\"func_value\"], row[\"func\"]))\n", + " pcf_file.write(\"\\n\")\n", + "\n", + " pcf_file.write(\"EVENT_TYPE\\n\")\n", + " pcf_file.write(\"0 {} OpenACC other function name\\n\".format(event_type_func_openacc))\n", + " pcf_file.write(\"VALUES\\n\")\n", + " pcf_file.write(\"0 End\\n\")\n", + " for index, row in openacc_full_other_funcs.iterrows():\n", + " pcf_file.write(\"{} {}\\n\".format(row[\"func_value\"], row[\"func\"]))\n", + " pcf_file.write(\"\\n\")\n", + "\n", "pcf_file.close()" ] }, @@ -4396,6 +4659,26 @@ "prv_file.write(chunk)\n", "\n", "chunk = \"\"\n", + "t_acc_d = [event_type_openacc_data, event_type_name_openacc_data, event_type_func_openacc_data, event_type_openacc_data_size]\n", + "for index, r in openacc_data_df.iterrows():\n", + " values = [r[\"eventKind\"], r[\"name_value\"], r[\"func_value\"], r[\"bytes\"]]\n", + " chunk += create_combined_events_record(r[\"start\"], r[\"end\"] - r[\"start\"], r[\"thread\"], r[\"task\"], t_acc_d, values)\n", + "prv_file.write(chunk)\n", + "chunk = \"\"\n", + "t_acc_l = [event_type_openacc_launch, event_type_name_openacc_launch, event_type_func_openacc_launch]\n", + "for index, r in openacc_launch_df.iterrows():\n", + " values = [r[\"eventKind\"], r[\"name_value\"], r[\"func_value\"]]\n", + " chunk += create_combined_events_record(r[\"start\"], r[\"end\"] - r[\"start\"], r[\"thread\"], r[\"task\"], t_acc_l, values)\n", + "prv_file.write(chunk)\n", + "chunk = \"\"\n", + "t_acc_o = [event_type_openacc, event_type_name_openacc, event_type_func_openacc]\n", + "for index, r in openacc_other_df.iterrows():\n", + " values = [r[\"eventKind\"], r[\"name_value\"], r[\"func_value\"]]\n", + " chunk += create_combined_events_record(r[\"start\"], r[\"end\"] - r[\"start\"], r[\"thread\"], r[\"task\"], t_acc_o, values)\n", + "prv_file.write(chunk)\n", + "\n", + "\n", + "chunk = \"\"\n", "for index, row in comm_kernel_df.iterrows():\n", " chunk += create_communication_record(row[\"task\"], row[\"thread_call\"], row[\"task\"], row[\"thread_k\"], (row[\"Start (ns)_call\"] + row[\"Duration (ns)_call\"]), row[\"Start (ns)_k\"], 0, comm_tag_launch)\n", "prv_file.write(chunk)\n", diff --git a/parse-nsys-stats.py b/parse-nsys-stats.py index 651a4476a669022f38be734330b6e3428c108e61..29ee79a594fc0ba3e70a9ae99429d5983d489211 100755 --- a/parse-nsys-stats.py +++ b/parse-nsys-stats.py @@ -11,6 +11,7 @@ import subprocess import os import locale import sqlite3 +from sqlalchemy import create_engine locale.setlocale(locale.LC_ALL, '') @@ -19,7 +20,7 @@ parser = argparse.ArgumentParser(description="Convert a NVIDIA Nsight System tra epilog="The environment variables NSIGHT_HOME and PARAVER_HOME are needed") parser.add_argument("-f", "--filter-nvtx", help="Filter by this NVTX range") -parser.add_argument("-t", "--trace", required=True, help="Comma separated names of events to translate: [mpi_event_trace, nvtx_pushpop_trace, cuda_api_trace, cuda_gpu_trace, gpu_metrics]") +parser.add_argument("-t", "--trace", required=True, help="Comma separated names of events to translate: [mpi_event_trace, nvtx_pushpop_trace, cuda_api_trace, cuda_gpu_trace, gpu_metrics, openacc]") parser.add_argument("--force-sqlite", action="store_true", help="Force Nsight System to export SQLite database") @@ -36,6 +37,89 @@ args = parser.parse_args() # # Trace configuration and setup +sql_openacc_other = """ +SELECT + CASE + WHEN srcFile NOT NULL + THEN nameIds.value || '@' || srcFileIds.value || ':' || lineNo + ELSE nameIds.value + END AS name, + start, + end, + eventKind, + funcIds.value as func, + globalTid / 0x1000000 % 0x1000000 AS Pid, globalTid % 0x1000000 AS Tid +FROM + CUPTI_ACTIVITY_KIND_OPENACC_OTHER +LEFT JOIN + StringIds AS srcFileIds + ON srcFileIds.id == srcFile +LEFT JOIN + StringIds AS nameIds + ON nameIds.id == nameId +LEFT JOIN + StringIds AS funcIds + ON funcIds.id == funcName +""" + +sql_openacc_launch = """ +SELECT + CASE + WHEN srcFile NOT NULL + THEN nameIds.value || '@' || srcFileIds.value || ':' || lineNo + ELSE nameIds.value + END AS name, + start, + end, + eventKind, + kernelIds.value as kernelName, + funcIds.value as func, + globalTid / 0x1000000 % 0x1000000 AS Pid, globalTid % 0x1000000 AS Tid +FROM + CUPTI_ACTIVITY_KIND_OPENACC_LAUNCH +LEFT JOIN + StringIds AS srcFileIds + ON srcFileIds.id == srcFile +LEFT JOIN + StringIds AS nameIds + ON nameIds.id == nameId +LEFT JOIN + StringIds AS funcIds + ON funcIds.id == funcName +LEFT JOIN + StringIds AS kernelIds + ON kernelIds.id == kernelName +""" + +sql_openacc_data = """ +SELECT + CASE + WHEN srcFile NOT NULL + THEN nameIds.value || '@' || srcFileIds.value || ':' || lineNo + ELSE nameIds.value + END AS name, + start, + end, + eventKind, + varIds.value as variableName, + funcIds.value as func, + bytes, + globalTid / 0x1000000 % 0x1000000 AS Pid, globalTid % 0x1000000 AS Tid +FROM + CUPTI_ACTIVITY_KIND_OPENACC_DATA +LEFT JOIN + StringIds AS srcFileIds + ON srcFileIds.id == srcFile +LEFT JOIN + StringIds AS nameIds + ON nameIds.id == nameId +LEFT JOIN + StringIds AS funcIds + ON funcIds.id == funcName +LEFT JOIN + StringIds AS varIds + ON varIds.id == varName +""" #NSIGHT_HOME="/home/mclasca/Apps/nsight-system/2024.1" NSIGHT_HOME = os.getenv('NSIGHT_HOME') @@ -54,12 +138,14 @@ NVTX_FILTER = args.filter_nvtx != None NVTX_RANGE = args.filter_nvtx reports = args.trace.split(",") +reports_og = reports.copy() t_nvtx = False t_kernels = False t_apicalls = False t_mpi = False t_metrics = False +t_openacc = False if "nvtx_pushpop_trace" in reports: t_nvtx = True if "cuda_gpu_trace" in reports: t_kernels = True @@ -68,6 +154,9 @@ if "mpi_event_trace" in reports: t_mpi = True if "gpu_metrics" in reports: t_metrics = True reports.remove("gpu_metrics") +if "openacc" in reports: + t_openacc = True + reports.remove("openacc") #trace_name = "llava_cesga" event_type_kernels = 63000006 @@ -82,6 +171,20 @@ event_type_correlation = 9200 event_type_mpi = 9300 event_type_metrics_base = 9400 +event_type_openacc = 66000000 +event_type_openacc_data = 66000001 +event_type_openacc_launch = 66000002 + +event_type_name_openacc = 66100000 +event_type_name_openacc_data = 66100001 +event_type_name_openacc_launch = 66100002 + +event_type_func_openacc = 66200000 +event_type_func_openacc_data = 66200001 +event_type_func_openacc_launch = 66200002 + +event_type_openacc_data_size = 66300001 + comm_tag_launch = 55001 comm_tag_memory = 55002 comm_tag_dependency = 55003 @@ -102,7 +205,7 @@ def build_nsys_stats_name(report_name): return os.path.join(REPORT_DIR, base_name+"_{}.csv".format(report_name)) -print("Extracting reports for: {}".format(reports)) +print("Extracting reports for: {}".format(reports_og)) nsys_call = (NSIGHT_HOME+"/bin/nsys", "stats", "-r", ",".join(reports), "--timeunit", "nsec", "-f", "csv", "--force-overwrite", "true", "-o", ".") @@ -163,6 +266,14 @@ if t_metrics: gpu_metrics_agg.reset_index(inplace=True) +if t_openacc: + engine = create_engine(f"sqlite:///{os.path.splitext(REPORT_FILE)[0]}.sqlite") + with engine.connect() as conn, conn.begin(): + openacc_other_df = pd.read_sql_query(sql_openacc_other, conn) + openacc_launch_df = pd.read_sql_query(sql_openacc_launch, conn) + openacc_data_df = pd.read_sql_query(sql_openacc_data, conn) + openacc_event_kind = pd.read_sql_table("ENUM_OPENACC_EVENT_KIND", conn) + # # Building object model @@ -171,7 +282,8 @@ if t_metrics: if t_apicalls: print("CUDA calls unique processes: {}, and unique threads: {}".format(cuda_api_df["Pid"].unique(), cuda_api_df["Tid"].unique())) if t_nvtx: print("NVTX ranges unique processes: {}, and unique threads: {}".format(nvtx_df["PID"].unique(), nvtx_df["TID"].unique())) -if t_mpi: print("MPI ranges unique processes: {}, and unique threads: {}".format(mpi_df["Pid"].unique(), mpi_df["Tid"].unique())) +if t_mpi: print("MPI calls unique processes: {}, and unique threads: {}".format(mpi_df["Pid"].unique(), mpi_df["Tid"].unique())) +if t_openacc: print("OpenACC calls unique processes: {}, and unique threads: {}".format(openacc_other_df["Pid"].unique(), openacc_other_df["Tid"].unique())) if t_nvtx: nvtx_df.rename(columns={"PID":"Pid", "TID":"Tid"}, inplace=True) @@ -179,6 +291,7 @@ compute_threads_with = [] if t_apicalls: compute_threads_with.append(cuda_api_df[['Pid', 'Tid']]) if t_nvtx: compute_threads_with.append(nvtx_df[["Pid", "Tid"]]) if t_mpi: compute_threads_with.append(mpi_df[["Pid", "Tid"]]) +if t_openacc: compute_threads_with.append(openacc_other_df[["Pid", "Tid"]]) threads = pd.concat(compute_threads_with).drop_duplicates() threads.sort_values(["Pid"], inplace=True) @@ -199,6 +312,12 @@ nvtx_df["thread"] = 0 nvtx_df["task"] = 0 mpi_df["thread"] = 0 mpi_df["task"] = 0 +openacc_other_df["thread"] = 0 +openacc_other_df["task"] = 0 +openacc_launch_df["thread"] = 0 +openacc_launch_df["task"] = 0 +openacc_data_df["thread"] = 0 +openacc_data_df["task"] = 0 threads['row_name'] = "THREAD 1." + threads['task'].astype(str) + '.' + threads['thread'].astype(str) @@ -222,6 +341,15 @@ if t_mpi: mpi_df["thread"] = mpi_df["Tid"].map(threads.set_index('Tid')["thread"]) mpi_df["task"] = mpi_df["Tid"].map(threads.set_index('Tid')["task"]) +if t_openacc: + openacc_other_df["thread"] = openacc_other_df["Tid"].map(threads.set_index('Tid')["thread"]) + openacc_other_df["task"] = openacc_other_df["Tid"].map(threads.set_index('Tid')["task"]) + openacc_launch_df["thread"] = openacc_launch_df["Tid"].map(threads.set_index('Tid')["thread"]) + openacc_launch_df["task"] = openacc_launch_df["Tid"].map(threads.set_index('Tid')["task"]) + openacc_data_df["thread"] = openacc_data_df["Tid"].map(threads.set_index('Tid')["thread"]) + openacc_data_df["task"] = openacc_data_df["Tid"].map(threads.set_index('Tid')["task"]) + + # # ## GPU devices # First, detect number of devices and streams. To respect Paraver's resource model, we will create a THREAD for each stream. To do that, select each unique pair of Device and Stream and assign an incremental ID. @@ -335,6 +463,37 @@ if t_nvtx: ranges_names = nvtx_df_subset[['event_value', 'Name']].drop_duplicates() ranges_names.sort_values("event_value", inplace=True) +if t_openacc: + openacc_event_kind["id"] += 1 + openacc_launch_df["eventKind"] += 1 + openacc_data_df["eventKind"] += 1 + openacc_other_df["eventKind"] += 1 + + openacc_data_df["name_value"] = openacc_data_df.groupby(["name"], dropna=False).ngroup() + 1 + openacc_full_data_names = openacc_data_df[['name_value', 'name']].drop_duplicates() + openacc_full_data_names.sort_values(["name_value"], inplace=True) + + openacc_launch_df["name_value"] = openacc_launch_df.groupby(["name"], dropna=False).ngroup() + 1 + openacc_full_data_names.count().iloc[0] + openacc_full_launch_names = openacc_launch_df[['name_value', 'name']].drop_duplicates() + openacc_full_launch_names.sort_values(["name_value"], inplace=True) + + openacc_other_df["name_value"] = openacc_other_df.groupby(["name"], dropna=False).ngroup() + 1 + openacc_full_data_names.count().iloc[0] + openacc_full_launch_names.count().iloc[0] + openacc_full_other_names = openacc_other_df[['name_value', 'name']].drop_duplicates() + openacc_full_other_names.sort_values(["name_value"], inplace=True) + + openacc_data_df["func_value"] = openacc_data_df.groupby(["func"], dropna=False).ngroup() + 1 + openacc_full_data_funcs = openacc_data_df[['func_value', 'func']].drop_duplicates() + openacc_full_data_funcs.sort_values(["func_value"], inplace=True) + + openacc_launch_df["func_value"] = openacc_launch_df.groupby(["func"], dropna=False).ngroup() + 1 + openacc_full_data_funcs.count().iloc[0] + openacc_full_launch_funcs = openacc_launch_df[['func_value', 'func']].drop_duplicates() + openacc_full_launch_funcs.sort_values(["func_value"], inplace=True) + + openacc_other_df["func_value"] = openacc_other_df.groupby(["func"], dropna=False).ngroup() + 1 + openacc_full_data_funcs.count().iloc[0] + openacc_full_launch_funcs.count().iloc[0] + openacc_full_other_funcs = openacc_other_df[['func_value', 'func']].drop_duplicates() + openacc_full_other_funcs.sort_values(["func_value"], inplace=True) + + print("-\tWriting pcf file...") @@ -444,6 +603,79 @@ if t_nvtx: pcf_file.write("{} {}\n".format(row["event_value"], row["Name"])) pcf_file.write("\n") +if t_openacc: + pcf_file.write("EVENT_TYPE\n") + pcf_file.write("0 {} OpenACC Data Events\n".format(event_type_openacc_data)) + pcf_file.write("VALUES\n") + pcf_file.write("0 End\n") + for index, row in openacc_event_kind.iterrows(): + pcf_file.write("{} {}\n".format(row["id"], row["label"])) + pcf_file.write("\n") + + pcf_file.write("EVENT_TYPE\n") + pcf_file.write("0 {} OpenACC Launch Events\n".format(event_type_openacc_launch)) + pcf_file.write("VALUES\n") + pcf_file.write("0 End\n") + for index, row in openacc_event_kind.iterrows(): + pcf_file.write("{} {}\n".format(row["id"], row["label"])) + pcf_file.write("\n") + + pcf_file.write("EVENT_TYPE\n") + pcf_file.write("0 {} OpenACC Other Events\n".format(event_type_openacc)) + pcf_file.write("VALUES\n") + pcf_file.write("0 End\n") + for index, row in openacc_event_kind.iterrows(): + pcf_file.write("{} {}\n".format(row["id"], row["label"])) + pcf_file.write("\n") + + pcf_file.write("EVENT_TYPE\n") + pcf_file.write("0 {} OpenACC data region source\n".format(event_type_name_openacc_data)) + pcf_file.write("VALUES\n") + pcf_file.write("0 End\n") + for index, row in openacc_full_data_names.iterrows(): + pcf_file.write("{} {}\n".format(row["name_value"], row["name"])) + pcf_file.write("\n") + + pcf_file.write("EVENT_TYPE\n") + pcf_file.write("0 {} OpenACC launch region source\n".format(event_type_name_openacc_launch)) + pcf_file.write("VALUES\n") + pcf_file.write("0 End\n") + for index, row in openacc_full_launch_names.iterrows(): + pcf_file.write("{} {}\n".format(row["name_value"], row["name"])) + pcf_file.write("\n") + + pcf_file.write("EVENT_TYPE\n") + pcf_file.write("0 {} OpenACC other region source\n".format(event_type_name_openacc)) + pcf_file.write("VALUES\n") + pcf_file.write("0 End\n") + for index, row in openacc_full_other_names.iterrows(): + pcf_file.write("{} {}\n".format(row["name_value"], row["name"])) + pcf_file.write("\n") + + pcf_file.write("EVENT_TYPE\n") + pcf_file.write("0 {} OpenACC data function name\n".format(event_type_func_openacc_data)) + pcf_file.write("VALUES\n") + pcf_file.write("0 End\n") + for index, row in openacc_full_data_funcs.iterrows(): + pcf_file.write("{} {}\n".format(row["func_value"], row["func"])) + pcf_file.write("\n") + + pcf_file.write("EVENT_TYPE\n") + pcf_file.write("0 {} OpenACC launch function name\n".format(event_type_func_openacc_launch)) + pcf_file.write("VALUES\n") + pcf_file.write("0 End\n") + for index, row in openacc_full_launch_funcs.iterrows(): + pcf_file.write("{} {}\n".format(row["func_value"], row["func"])) + pcf_file.write("\n") + + pcf_file.write("EVENT_TYPE\n") + pcf_file.write("0 {} OpenACC other function name\n".format(event_type_func_openacc)) + pcf_file.write("VALUES\n") + pcf_file.write("0 End\n") + for index, row in openacc_full_other_funcs.iterrows(): + pcf_file.write("{} {}\n".format(row["func_value"], row["func"])) + pcf_file.write("\n") + pcf_file.close() # # Split of kernel execution between compute and memory @@ -560,6 +792,27 @@ if t_mpi: prv_file.write(chunk) chunk = "" +if t_openacc: + print("-\tWriting OpenACC events...") + t_acc_d = [event_type_openacc_data, event_type_name_openacc_data, event_type_func_openacc_data, event_type_openacc_data_size] + for index, r in openacc_data_df.iterrows(): + values = [r["eventKind"], r["name_value"], r["func_value"], r["bytes"]] + chunk += create_combined_events_record(r["start"], r["end"] - r["start"], r["thread"], r["task"], t_acc_d, values) + prv_file.write(chunk) + chunk = "" + t_acc_l = [event_type_openacc_launch, event_type_name_openacc_launch, event_type_func_openacc_launch] + for index, r in openacc_launch_df.iterrows(): + values = [r["eventKind"], r["name_value"], r["func_value"]] + chunk += create_combined_events_record(r["start"], r["end"] - r["start"], r["thread"], r["task"], t_acc_l, values) + prv_file.write(chunk) + chunk = "" + t_acc_o = [event_type_openacc, event_type_name_openacc, event_type_func_openacc] + for index, r in openacc_other_df.iterrows(): + values = [r["eventKind"], r["name_value"], r["func_value"]] + chunk += create_combined_events_record(r["start"], r["end"] - r["start"], r["thread"], r["task"], t_acc_o, values) + prv_file.write(chunk) + chunk = "" + if t_metrics: print("-\tWriting GPU metrics...") for index, row in gpu_metrics_agg.iterrows(): diff --git a/parser-playground.ipynb b/parser-playground.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..3a2169c3b1ec318333e91b25ec94d3f9044f4ff1 --- /dev/null +++ b/parser-playground.ipynb @@ -0,0 +1,1466 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import time\n", + "import subprocess\n", + "import os\n", + "import locale\n", + "import sqlite3\n", + "from sqlalchemy import create_engine\n", + "\n", + "NSIGHT_HOME=\"/home/mclasca/Apps/nsight-system/2024.1\"\n", + "#NSIGHT_HOME = os.getenv('NSIGHT_HOME')\n", + "PARAVER_HOME = os.getenv('PARAVER_HOME')\n", + "NVTX_RANGE=\"step53\"\n", + "#REPORT_FILE = os.path.abspath(\"/home/mclasca/Documents/BePPP/heka/profiles/mistral-mn5/heka-axolotl-Mistral7B0.1-4s_withmetrics-2432719.nsys-rep\")\n", + "#REPORT_FILE = os.path.abspath(\"/home/mclasca/Documents/BePPP/traces/xshells/nsys/xshells.par.medium-1N_withmetrics.nsys-rep\")\n", + "REPORT_FILE = os.path.abspath(\"/home/mclasca/Documents/BePPP/traces/sod2d/nsight4_sod2d.nsys-rep\")\n", + "REPORT_DIR = os.path.dirname(REPORT_FILE)\n", + "#REPORT_NAME=\"heka-step53+accum1-profile-2023.4-5721957\"\n", + "#REPORT_NAME=\"heka-axolotl-Mistral7B0.1-profile-2110598\"\n", + "\n", + "locale.setlocale(locale.LC_ALL, '')\n", + "\n", + "trace_name = \"test-sod2d-openacc\"\n", + "event_type_kernels = 63000006\n", + "event_type_memcopy_size = 63000002\n", + "event_type_api = 63000000\n", + "event_type_nvtx = 9003\n", + "event_type_blkgrd_name = 9100\n", + "event_types_block_grid_values = [9101, 9102, 9103, 9104, 9105, 9106]\n", + "event_types_block_grid_values_names = ['GrdX', 'GrdY', 'GrdZ', 'BlkX', 'BlkY', 'BlkZ']\n", + "event_type_registers_thread = 9107\n", + "event_type_correlation = 9200\n", + "event_type_mpi = 9300\n", + "event_type_metrics_base = 9400\n", + "\n", + "event_type_openacc = 66000000\n", + "event_type_openacc_data = 66000001\n", + "event_type_openacc_launch = 66000002\n", + "\n", + "event_type_name_openacc = 66100000\n", + "event_type_name_openacc_data = 66100001\n", + "event_type_name_openacc_launch = 66100002\n", + "\n", + "event_type_func_openacc = 66200000\n", + "event_type_func_openacc_data = 66200001\n", + "event_type_func_openacc_launch = 66200002\n", + "\n", + "event_type_openacc_data_size = 66300001\n", + "\n", + "comm_tag_launch = 55001\n", + "comm_tag_memory = 55002\n", + "comm_tag_dependency = 55003\n", + "\n", + "nvtx_select_frames = True\n", + "nvtx_stack_top = 1\n", + "nvtx_stack_bottom = 4\n", + "\n", + "reports = [\"nvtx_pushpop_trace\", \"cuda_api_trace\", \"cuda_gpu_trace\"]\n", + "\n", + "def build_nsys_stats_name(report_name):\n", + " base_name = os.path.splitext(os.path.basename(REPORT_FILE))[0]\n", + " return os.path.join(REPORT_DIR, base_name+\"_{}.csv\".format(report_name))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting reports for: ['nvtx_pushpop_trace', 'cuda_api_trace', 'cuda_gpu_trace']\n", + "\n", + "NOTICE: Existing SQLite export found: /home/mclasca/Documents/BePPP/traces/sod2d/nsight4_sod2d.sqlite\n", + " It is assumed file was previously exported from: /home/mclasca/Documents/BePPP/traces/sod2d/nsight4_sod2d.nsys-rep\n", + " Consider using --force-export=true if needed.\n", + "\n", + "Processing [/home/mclasca/Documents/BePPP/traces/sod2d/nsight4_sod2d.sqlite] with [/home/mclasca/Apps/nsight-system/2024.1/host-linux-x64/reports/nvtx_pushpop_trace.py] to [/home/mclasca/Documents/BePPP/traces/sod2d/nsight4_sod2d_nvtx_pushpop_trace.csv]... PROCESSED\n", + "\n", + "Processing [/home/mclasca/Documents/BePPP/traces/sod2d/nsight4_sod2d.sqlite] with [/home/mclasca/Apps/nsight-system/2024.1/host-linux-x64/reports/cuda_api_trace.py] to [/home/mclasca/Documents/BePPP/traces/sod2d/nsight4_sod2d_cuda_api_trace.csv]... PROCESSED\n", + "\n", + "Processing [/home/mclasca/Documents/BePPP/traces/sod2d/nsight4_sod2d.sqlite] with [/home/mclasca/Apps/nsight-system/2024.1/host-linux-x64/reports/cuda_gpu_trace.py] to [/home/mclasca/Documents/BePPP/traces/sod2d/nsight4_sod2d_cuda_gpu_trace.csv]... PROCESSED\n", + "\n" + ] + } + ], + "source": [ + "print(\"Extracting reports for: {}\".format(reports))\n", + "args = (NSIGHT_HOME+\"/bin/nsys\", \"stats\", \"-r\", \",\".join(reports), \n", + " \"--timeunit\", \"nsec\", \"-f\", \"csv\", \n", + " \"--force-overwrite\", \"true\", \"-o\", \".\", REPORT_FILE)\n", + "\n", + "with subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) as p:\n", + " for line in p.stdout:\n", + " print(line.decode(), end='')\n", + "\n", + "if p.returncode != 0:\n", + " raise CalledProcessError(p.returncode, p.args)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Start (ns)Duration (ns)CorrIDGrdXGrdYGrdZBlkXBlkYBlkZReg/Trd...DymSMem (MB)Bytes (MB)Throughput (MB/s)SrcMemKdDstMemKdDeviceCtxGreenCtxStrmName
019249374861056213NaNNaNNaNNaNNaNNaNNaN...NaN0,00030,303PageableDeviceNVIDIA H100 (0)1NaN13[CUDA memcpy Host-to-Device]
119266781381024211NaNNaNNaNNaNNaNNaNNaN...NaN0,00031,250PageableDeviceNVIDIA H100 (0)1NaN13[CUDA memcpy Host-to-Device]
21928094871992213NaNNaNNaNNaNNaNNaNNaN...NaN0,00032,258PageableDeviceNVIDIA H100 (0)1NaN13[CUDA memcpy Host-to-Device]
31932409317992211NaNNaNNaNNaNNaNNaNNaN...NaN0,00032,258PageableDeviceNVIDIA H100 (0)1NaN13[CUDA memcpy Host-to-Device]
41980006677896684NaNNaNNaNNaNNaNNaNNaN...NaN0,000142,857PageableDeviceNVIDIA H100 (0)1NaN13[CUDA memcpy Host-to-Device]
..................................................................
56599388788984452208198835NaNNaNNaNNaNNaNNaNNaN...NaN0,0001,812DevicePageableNVIDIA H100 (0)1NaN13[CUDA memcpy Device-to-Host]
56600388789405481099201543643.01.01.0256.01.01.018.0...0,001NaNNaNNaNNaNNVIDIA H100 (0)1NaN13mod_time_ops_adapt_dt_cfl_32_gpu__red
56601388790611232304154366NaNNaNNaNNaNNaNNaNNaN...NaN0,0001,736DevicePageableNVIDIA H100 (0)1NaN13[CUDA memcpy Device-to-Host]
56602388790718442304154367NaNNaNNaNNaNNaNNaNNaN...NaN0,0001,736DevicePageableNVIDIA H100 (0)1NaN13[CUDA memcpy Device-to-Host]
56603388790824992304154368NaNNaNNaNNaNNaNNaNNaN...NaN0,0001,736DevicePageableNVIDIA H100 (0)1NaN13[CUDA memcpy Device-to-Host]
\n", + "

56604 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " Start (ns) Duration (ns) CorrID GrdX GrdY GrdZ BlkX BlkY \\\n", + "0 1924937486 1056 213 NaN NaN NaN NaN NaN \n", + "1 1926678138 1024 211 NaN NaN NaN NaN NaN \n", + "2 1928094871 992 213 NaN NaN NaN NaN NaN \n", + "3 1932409317 992 211 NaN NaN NaN NaN NaN \n", + "4 1980006677 896 684 NaN NaN NaN NaN NaN \n", + "... ... ... ... ... ... ... ... ... \n", + "56599 38878898445 2208 198835 NaN NaN NaN NaN NaN \n", + "56600 38878940548 109920 154364 3.0 1.0 1.0 256.0 1.0 \n", + "56601 38879061123 2304 154366 NaN NaN NaN NaN NaN \n", + "56602 38879071844 2304 154367 NaN NaN NaN NaN NaN \n", + "56603 38879082499 2304 154368 NaN NaN NaN NaN NaN \n", + "\n", + " BlkZ Reg/Trd ... DymSMem (MB) Bytes (MB) Throughput (MB/s) SrcMemKd \\\n", + "0 NaN NaN ... NaN 0,000 30,303 Pageable \n", + "1 NaN NaN ... NaN 0,000 31,250 Pageable \n", + "2 NaN NaN ... NaN 0,000 32,258 Pageable \n", + "3 NaN NaN ... NaN 0,000 32,258 Pageable \n", + "4 NaN NaN ... NaN 0,000 142,857 Pageable \n", + "... ... ... ... ... ... ... ... \n", + "56599 NaN NaN ... NaN 0,000 1,812 Device \n", + "56600 1.0 18.0 ... 0,001 NaN NaN NaN \n", + "56601 NaN NaN ... NaN 0,000 1,736 Device \n", + "56602 NaN NaN ... NaN 0,000 1,736 Device \n", + "56603 NaN NaN ... NaN 0,000 1,736 Device \n", + "\n", + " DstMemKd Device Ctx GreenCtx Strm \\\n", + "0 Device NVIDIA H100 (0) 1 NaN 13 \n", + "1 Device NVIDIA H100 (0) 1 NaN 13 \n", + "2 Device NVIDIA H100 (0) 1 NaN 13 \n", + "3 Device NVIDIA H100 (0) 1 NaN 13 \n", + "4 Device NVIDIA H100 (0) 1 NaN 13 \n", + "... ... ... .. ... ... \n", + "56599 Pageable NVIDIA H100 (0) 1 NaN 13 \n", + "56600 NaN NVIDIA H100 (0) 1 NaN 13 \n", + "56601 Pageable NVIDIA H100 (0) 1 NaN 13 \n", + "56602 Pageable NVIDIA H100 (0) 1 NaN 13 \n", + "56603 Pageable NVIDIA H100 (0) 1 NaN 13 \n", + "\n", + " Name \n", + "0 [CUDA memcpy Host-to-Device] \n", + "1 [CUDA memcpy Host-to-Device] \n", + "2 [CUDA memcpy Host-to-Device] \n", + "3 [CUDA memcpy Host-to-Device] \n", + "4 [CUDA memcpy Host-to-Device] \n", + "... ... \n", + "56599 [CUDA memcpy Device-to-Host] \n", + "56600 mod_time_ops_adapt_dt_cfl_32_gpu__red \n", + "56601 [CUDA memcpy Device-to-Host] \n", + "56602 [CUDA memcpy Device-to-Host] \n", + "56603 [CUDA memcpy Device-to-Host] \n", + "\n", + "[56604 rows x 21 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kernels_df = pd.read_csv(build_nsys_stats_name(\"cuda_gpu_trace\"))\n", + "kernels_df.rename(columns={\"CorrId\": \"CorrID\"}, inplace=True)\n", + "kernels_df" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "sql_openacc_other = \"\"\"\n", + "SELECT\n", + " CASE\n", + " WHEN srcFile NOT NULL\n", + " THEN nameIds.value || '@' || srcFileIds.value || ':' || lineNo\n", + " ELSE nameIds.value\n", + " END AS name,\n", + " start,\n", + " end,\n", + " eventKind,\n", + "\tfuncIds.value as func,\n", + " globalTid / 0x1000000 % 0x1000000 AS Pid, globalTid % 0x1000000 AS Tid\n", + "FROM\n", + " CUPTI_ACTIVITY_KIND_OPENACC_OTHER\n", + "LEFT JOIN\n", + " StringIds AS srcFileIds\n", + " ON srcFileIds.id == srcFile\n", + "LEFT JOIN\n", + " StringIds AS nameIds\n", + " ON nameIds.id == nameId\n", + "LEFT JOIN\n", + "\tStringIds AS funcIds\n", + "\tON funcIds.id == funcName\n", + "\"\"\"\n", + "\n", + "sql_openacc_launch = \"\"\"\n", + "SELECT\n", + " CASE\n", + " WHEN srcFile NOT NULL\n", + " THEN nameIds.value || '@' || srcFileIds.value || ':' || lineNo\n", + " ELSE nameIds.value\n", + " END AS name,\n", + " start,\n", + " end,\n", + " eventKind,\n", + " kernelIds.value as kernelName,\n", + "\tfuncIds.value as func,\n", + " globalTid / 0x1000000 % 0x1000000 AS Pid, globalTid % 0x1000000 AS Tid\n", + "FROM\n", + " CUPTI_ACTIVITY_KIND_OPENACC_LAUNCH\n", + "LEFT JOIN\n", + " StringIds AS srcFileIds\n", + " ON srcFileIds.id == srcFile\n", + "LEFT JOIN\n", + " StringIds AS nameIds\n", + " ON nameIds.id == nameId\n", + "LEFT JOIN\n", + "\tStringIds AS funcIds\n", + "\tON funcIds.id == funcName\n", + "LEFT JOIN\n", + "\tStringIds AS kernelIds\n", + "\tON kernelIds.id == kernelName\n", + "\"\"\"\n", + "\n", + "sql_openacc_data = \"\"\"\n", + "SELECT\n", + " CASE\n", + " WHEN srcFile NOT NULL\n", + " THEN nameIds.value || '@' || srcFileIds.value || ':' || lineNo\n", + " ELSE nameIds.value\n", + " END AS name,\n", + " start,\n", + " end,\n", + " eventKind,\n", + " varIds.value as variableName,\n", + "\tfuncIds.value as func,\n", + "\tbytes,\n", + " globalTid / 0x1000000 % 0x1000000 AS Pid, globalTid % 0x1000000 AS Tid\n", + "FROM\n", + " CUPTI_ACTIVITY_KIND_OPENACC_DATA\n", + "LEFT JOIN\n", + " StringIds AS srcFileIds\n", + " ON srcFileIds.id == srcFile\n", + "LEFT JOIN\n", + " StringIds AS nameIds\n", + " ON nameIds.id == nameId\n", + "LEFT JOIN\n", + "\tStringIds AS funcIds\n", + "\tON funcIds.id == funcName\n", + "LEFT JOIN\n", + "\tStringIds AS varIds\n", + "\tON varIds.id == varName\n", + "\"\"\"\n", + "\n", + "engine = create_engine(f\"sqlite:///{os.path.splitext(REPORT_FILE)[0]}.sqlite\")\n", + "with engine.connect() as conn, conn.begin():\n", + " openacc_other_df = pd.read_sql_query(sql_openacc_other, conn)\n", + " openacc_launch_df = pd.read_sql_query(sql_openacc_launch, conn)\n", + " openacc_data_df = pd.read_sql_query(sql_openacc_data, conn)\n", + " openacc_event_kind = pd.read_sql_table(\"ENUM_OPENACC_EVENT_KIND\", conn)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamelabel
00CUPTI_OPENACC_EVENT_KIND_INVALIDInvalid
11CUPTI_OPENACC_EVENT_KIND_DEVICE_INITDevice init
22CUPTI_OPENACC_EVENT_KIND_DEVICE_SHUTDOWNDevice shutdown
33CUPTI_OPENACC_EVENT_KIND_RUNTIME_SHUTDOWNRuntime shutdown
44CUPTI_OPENACC_EVENT_KIND_ENQUEUE_LAUNCHEnqueue launch
55CUPTI_OPENACC_EVENT_KIND_ENQUEUE_UPLOADEnqueue upload
66CUPTI_OPENACC_EVENT_KIND_ENQUEUE_DOWNLOADEnqueue download
77CUPTI_OPENACC_EVENT_KIND_WAITWait
88CUPTI_OPENACC_EVENT_KIND_IMPLICIT_WAITImplicit wait
99CUPTI_OPENACC_EVENT_KIND_COMPUTE_CONSTRUCTCompute construct
1010CUPTI_OPENACC_EVENT_KIND_UPDATEUPDATE
1111CUPTI_OPENACC_EVENT_KIND_ENTER_DATAEnter data
1212CUPTI_OPENACC_EVENT_KIND_EXIT_DATAExit data
1313CUPTI_OPENACC_EVENT_KIND_CREATECreate
1414CUPTI_OPENACC_EVENT_KIND_DELETEDelete
1515CUPTI_OPENACC_EVENT_KIND_ALLOCAlloc
1616CUPTI_OPENACC_EVENT_KIND_FREEFree
\n", + "
" + ], + "text/plain": [ + " id name label\n", + "0 0 CUPTI_OPENACC_EVENT_KIND_INVALID Invalid\n", + "1 1 CUPTI_OPENACC_EVENT_KIND_DEVICE_INIT Device init\n", + "2 2 CUPTI_OPENACC_EVENT_KIND_DEVICE_SHUTDOWN Device shutdown\n", + "3 3 CUPTI_OPENACC_EVENT_KIND_RUNTIME_SHUTDOWN Runtime shutdown\n", + "4 4 CUPTI_OPENACC_EVENT_KIND_ENQUEUE_LAUNCH Enqueue launch\n", + "5 5 CUPTI_OPENACC_EVENT_KIND_ENQUEUE_UPLOAD Enqueue upload\n", + "6 6 CUPTI_OPENACC_EVENT_KIND_ENQUEUE_DOWNLOAD Enqueue download\n", + "7 7 CUPTI_OPENACC_EVENT_KIND_WAIT Wait\n", + "8 8 CUPTI_OPENACC_EVENT_KIND_IMPLICIT_WAIT Implicit wait\n", + "9 9 CUPTI_OPENACC_EVENT_KIND_COMPUTE_CONSTRUCT Compute construct\n", + "10 10 CUPTI_OPENACC_EVENT_KIND_UPDATE UPDATE\n", + "11 11 CUPTI_OPENACC_EVENT_KIND_ENTER_DATA Enter data\n", + "12 12 CUPTI_OPENACC_EVENT_KIND_EXIT_DATA Exit data\n", + "13 13 CUPTI_OPENACC_EVENT_KIND_CREATE Create\n", + "14 14 CUPTI_OPENACC_EVENT_KIND_DELETE Delete\n", + "15 15 CUPTI_OPENACC_EVENT_KIND_ALLOC Alloc\n", + "16 16 CUPTI_OPENACC_EVENT_KIND_FREE Free" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "openacc_event_kind" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Event types and values preparation\n", + "### Add 1 to all event values for KIND" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "openacc_event_kind[\"id\"] += 1\n", + "openacc_launch_df[\"eventKind\"] += 1\n", + "openacc_data_df[\"eventKind\"] += 1\n", + "openacc_other_df[\"eventKind\"] += 1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create event values for name of construct and function of construct, and make them consecutive" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "openacc_data_df[\"name_value\"] = openacc_data_df.groupby([\"name\"], dropna=False).ngroup() + 1\n", + "openacc_full_data_names = openacc_data_df[['name_value', 'name']].drop_duplicates()\n", + "openacc_full_data_names.sort_values([\"name_value\"], inplace=True)\n", + "\n", + "openacc_launch_df[\"name_value\"] = openacc_launch_df.groupby([\"name\"], dropna=False).ngroup() + 1 + openacc_full_data_names.count().iloc[0]\n", + "openacc_full_launch_names = openacc_launch_df[['name_value', 'name']].drop_duplicates()\n", + "openacc_full_launch_names.sort_values([\"name_value\"], inplace=True)\n", + "\n", + "openacc_other_df[\"name_value\"] = openacc_other_df.groupby([\"name\"], dropna=False).ngroup() + 1 + openacc_full_data_names.count().iloc[0] + openacc_full_launch_names.count().iloc[0]\n", + "openacc_full_other_names = openacc_other_df[['name_value', 'name']].drop_duplicates()\n", + "openacc_full_other_names.sort_values([\"name_value\"], inplace=True)\n", + "\n", + "openacc_data_df[\"func_value\"] = openacc_data_df.groupby([\"func\"], dropna=False).ngroup() + 1\n", + "openacc_full_data_funcs = openacc_data_df[['func_value', 'func']].drop_duplicates()\n", + "openacc_full_data_funcs.sort_values([\"func_value\"], inplace=True)\n", + "\n", + "openacc_launch_df[\"func_value\"] = openacc_launch_df.groupby([\"func\"], dropna=False).ngroup() + 1 + openacc_full_data_funcs.count().iloc[0]\n", + "openacc_full_launch_funcs = openacc_launch_df[['func_value', 'func']].drop_duplicates()\n", + "openacc_full_launch_funcs.sort_values([\"func_value\"], inplace=True)\n", + "\n", + "openacc_other_df[\"func_value\"] = openacc_other_df.groupby([\"func\"], dropna=False).ngroup() + 1 + openacc_full_data_funcs.count().iloc[0] + openacc_full_launch_funcs.count().iloc[0]\n", + "openacc_full_other_funcs = openacc_other_df[['func_value', 'func']].drop_duplicates()\n", + "openacc_full_other_funcs.sort_values([\"func_value\"], inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namestartendeventKindkernelNamefuncPidTidname_valuefunc_value
0Enqueue Launch@CFDSolverBase.f90:1104808137041180814618125cfdsolverbase_mod_cfdsolverbase_allocatevariab...cfdsolverbase_allocatevariables58561158561177058
1Enqueue Launch@CFDSolverBase.f90:1104808137295380814199245cfdsolverbase_mod_cfdsolverbase_allocatevariab...cfdsolverbase_allocatevariables58561258561277058
2Enqueue Launch@CFDSolverBase.f90:1104808137514580814733365cfdsolverbase_mod_cfdsolverbase_allocatevariab...cfdsolverbase_allocatevariables58561458561477058
3Enqueue Launch@CFDSolverBase.f90:1104808140375980814361375cfdsolverbase_mod_cfdsolverbase_allocatevariab...cfdsolverbase_allocatevariables58561358561377058
4Enqueue Launch@CFDSolverBase.f90:1105808142084380814268725cfdsolverbase_mod_cfdsolverbase_allocatevariab...cfdsolverbase_allocatevariables58561258561277158
.................................
38287Enqueue Launch@mod_time_ops.f90:3238877372018388773749715mod_time_ops_adapt_dt_cfl_32_gpu__redadapt_dt_cfl58561458561488357
38288Enqueue Launch@mod_time_ops.f90:3238877372667388773762335mod_time_ops_adapt_dt_cfl_32_gpu__redadapt_dt_cfl58561358561388357
38289Enqueue Launch@mod_time_ops.f90:3238877372863388773757005mod_time_ops_adapt_dt_cfl_32_gpu__redadapt_dt_cfl58561258561288357
38290Enqueue Launch@mod_time_ops.f90:3238877384375388773879285mod_time_ops_adapt_dt_cfl_32_gpuadapt_dt_cfl58561158561188357
38291Enqueue Launch@mod_time_ops.f90:3238877388123388773909985mod_time_ops_adapt_dt_cfl_32_gpu__redadapt_dt_cfl58561158561188357
\n", + "

38292 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " name start end \\\n", + "0 Enqueue Launch@CFDSolverBase.f90:1104 8081370411 8081461812 \n", + "1 Enqueue Launch@CFDSolverBase.f90:1104 8081372953 8081419924 \n", + "2 Enqueue Launch@CFDSolverBase.f90:1104 8081375145 8081473336 \n", + "3 Enqueue Launch@CFDSolverBase.f90:1104 8081403759 8081436137 \n", + "4 Enqueue Launch@CFDSolverBase.f90:1105 8081420843 8081426872 \n", + "... ... ... ... \n", + "38287 Enqueue Launch@mod_time_ops.f90:32 38877372018 38877374971 \n", + "38288 Enqueue Launch@mod_time_ops.f90:32 38877372667 38877376233 \n", + "38289 Enqueue Launch@mod_time_ops.f90:32 38877372863 38877375700 \n", + "38290 Enqueue Launch@mod_time_ops.f90:32 38877384375 38877387928 \n", + "38291 Enqueue Launch@mod_time_ops.f90:32 38877388123 38877390998 \n", + "\n", + " eventKind kernelName \\\n", + "0 5 cfdsolverbase_mod_cfdsolverbase_allocatevariab... \n", + "1 5 cfdsolverbase_mod_cfdsolverbase_allocatevariab... \n", + "2 5 cfdsolverbase_mod_cfdsolverbase_allocatevariab... \n", + "3 5 cfdsolverbase_mod_cfdsolverbase_allocatevariab... \n", + "4 5 cfdsolverbase_mod_cfdsolverbase_allocatevariab... \n", + "... ... ... \n", + "38287 5 mod_time_ops_adapt_dt_cfl_32_gpu__red \n", + "38288 5 mod_time_ops_adapt_dt_cfl_32_gpu__red \n", + "38289 5 mod_time_ops_adapt_dt_cfl_32_gpu__red \n", + "38290 5 mod_time_ops_adapt_dt_cfl_32_gpu \n", + "38291 5 mod_time_ops_adapt_dt_cfl_32_gpu__red \n", + "\n", + " func Pid Tid name_value func_value \n", + "0 cfdsolverbase_allocatevariables 585611 585611 770 58 \n", + "1 cfdsolverbase_allocatevariables 585612 585612 770 58 \n", + "2 cfdsolverbase_allocatevariables 585614 585614 770 58 \n", + "3 cfdsolverbase_allocatevariables 585613 585613 770 58 \n", + "4 cfdsolverbase_allocatevariables 585612 585612 771 58 \n", + "... ... ... ... ... ... \n", + "38287 adapt_dt_cfl 585614 585614 883 57 \n", + "38288 adapt_dt_cfl 585613 585613 883 57 \n", + "38289 adapt_dt_cfl 585612 585612 883 57 \n", + "38290 adapt_dt_cfl 585611 585611 883 57 \n", + "38291 adapt_dt_cfl 585611 585611 883 57 \n", + "\n", + "[38292 rows x 10 columns]" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "openacc_launch_df" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
func_valuefunc
32641adapt_dt_cfl
7882cfdsolverbase_allocatevariables
21483cfdsolverbase_boundaryfacestonodes
22484cfdsolverbase_eval_elempernode_and_nearboundar...
20405cfdsolverbase_evalatoijkinverse
19206cfdsolverbase_evalcharlength
19487cfdsolverbase_evaljacobians
23808cfdsolverbase_evalmass
4849cfdsolverbase_evalshapefunctions
382410cfdsolverbase_evaltimeiteration
190111cfdsolverbase_evalviscosityfactor
333612cfdsolverbase_initialbuffer
187613cfdsolverbase_initializesourceterms
222114checkifwallmodelon
292415compute_fieldderivs
286016copy_elemgpscalarfield_in_nodes_for_inst
232417copy_from_rcvbuffer_int
243218copy_from_rcvbuffer_real
247219copy_nodescalarfield2save_in_aux_for_inst
276420copy_nodevectorfield2save_in_aux_for_inst
247821copyperiodicnodes_scalarfield
277022copyperiodicnodes_vectorfield
7224723deallocate_filters
199624elem_jacobian
229225elempernode
7248426end_comms
7253227end_comms_bnd
7233428end_hdf5_auxiliar_saving_arrays
7210029end_rk4_solver
230530fill_sendbuffer_int
241331fill_sendbuffer_real
414332full_convec_ijk
412833full_diffusion_ijk
392734generic_scalar_convec_ijk
22835init_comms
32436init_comms_bnd
166837init_filters
38838init_hdf5_auxiliar_saving_arrays
334439init_rk4_solver
14440load_connectivity_hdf5
11641load_coordinates_hdf5
442load_parallel_data_hdf5
240043lumped_mass_spectral
397244lumped_solver_scal
432545lumped_solver_vect
324046maxmach
234647nearboundarynode
392248rk_4_main
546849save_hdf5_restartfile
249250save_hdf5_resultsfile_basefunc
398051smart_visc_spectral
191252sutherland_viscosity
178053tgvsolver_evalinitialconditions
318054visc_dissipationrate
315655volavg_ek
056None
\n", + "
" + ], + "text/plain": [ + " func_value func\n", + "3264 1 adapt_dt_cfl\n", + "788 2 cfdsolverbase_allocatevariables\n", + "2148 3 cfdsolverbase_boundaryfacestonodes\n", + "2248 4 cfdsolverbase_eval_elempernode_and_nearboundar...\n", + "2040 5 cfdsolverbase_evalatoijkinverse\n", + "1920 6 cfdsolverbase_evalcharlength\n", + "1948 7 cfdsolverbase_evaljacobians\n", + "2380 8 cfdsolverbase_evalmass\n", + "484 9 cfdsolverbase_evalshapefunctions\n", + "3824 10 cfdsolverbase_evaltimeiteration\n", + "1901 11 cfdsolverbase_evalviscosityfactor\n", + "3336 12 cfdsolverbase_initialbuffer\n", + "1876 13 cfdsolverbase_initializesourceterms\n", + "2221 14 checkifwallmodelon\n", + "2924 15 compute_fieldderivs\n", + "2860 16 copy_elemgpscalarfield_in_nodes_for_inst\n", + "2324 17 copy_from_rcvbuffer_int\n", + "2432 18 copy_from_rcvbuffer_real\n", + "2472 19 copy_nodescalarfield2save_in_aux_for_inst\n", + "2764 20 copy_nodevectorfield2save_in_aux_for_inst\n", + "2478 21 copyperiodicnodes_scalarfield\n", + "2770 22 copyperiodicnodes_vectorfield\n", + "72247 23 deallocate_filters\n", + "1996 24 elem_jacobian\n", + "2292 25 elempernode\n", + "72484 26 end_comms\n", + "72532 27 end_comms_bnd\n", + "72334 28 end_hdf5_auxiliar_saving_arrays\n", + "72100 29 end_rk4_solver\n", + "2305 30 fill_sendbuffer_int\n", + "2413 31 fill_sendbuffer_real\n", + "4143 32 full_convec_ijk\n", + "4128 33 full_diffusion_ijk\n", + "3927 34 generic_scalar_convec_ijk\n", + "228 35 init_comms\n", + "324 36 init_comms_bnd\n", + "1668 37 init_filters\n", + "388 38 init_hdf5_auxiliar_saving_arrays\n", + "3344 39 init_rk4_solver\n", + "144 40 load_connectivity_hdf5\n", + "116 41 load_coordinates_hdf5\n", + "4 42 load_parallel_data_hdf5\n", + "2400 43 lumped_mass_spectral\n", + "3972 44 lumped_solver_scal\n", + "4325 45 lumped_solver_vect\n", + "3240 46 maxmach\n", + "2346 47 nearboundarynode\n", + "3922 48 rk_4_main\n", + "5468 49 save_hdf5_restartfile\n", + "2492 50 save_hdf5_resultsfile_basefunc\n", + "3980 51 smart_visc_spectral\n", + "1912 52 sutherland_viscosity\n", + "1780 53 tgvsolver_evalinitialconditions\n", + "3180 54 visc_dissipationrate\n", + "3156 55 volavg_ek\n", + "0 56 None" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "openacc_full_data_funcs" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "env", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/scripts/openacc-test.sql b/scripts/openacc-test.sql new file mode 100644 index 0000000000000000000000000000000000000000..7a39c1e4a0521866a98623a5f0f14befa2ea172b --- /dev/null +++ b/scripts/openacc-test.sql @@ -0,0 +1,64 @@ +WITH + openacc AS ( + SELECT + start, + end, + nameId, + eventKind, + lineNo, + srcFile, + globalTid, + bytes, + funcName + FROM CUPTI_ACTIVITY_KIND_OPENACC_DATA + UNION ALL + SELECT + start, + end, + nameId, + eventKind, + lineNo, + srcFile, + globalTid, + null AS bytes, + funcName + FROM CUPTI_ACTIVITY_KIND_OPENACC_LAUNCH + UNION ALL + SELECT + start, + end, + nameId, + eventKind, + lineNo, + srcFile, + globalTid, + null AS bytes, + null AS funcName + FROM CUPTI_ACTIVITY_KIND_OPENACC_OTHER + ) +SELECT + CASE + WHEN srcFile NOT NULL + THEN nameIds.value || '@' || srcFileIds.value || ':' || lineNo + ELSE nameIds.value + END AS name, + start, + end, + eventIds.label, + funcIds.value as func, + bytes, + globalTid / 0x1000000 % 0x1000000 AS Pid, globalTid % 0x1000000 AS Tid +FROM + openacc +LEFT JOIN + StringIds AS srcFileIds + ON srcFileIds.id == srcFile +LEFT JOIN + StringIds AS nameIds + ON nameIds.id == nameId +LEFT JOIN + StringIds AS funcIds + ON funcIds.id == funcName +LEFT JOIN + ENUM_OPENACC_EVENT_KIND as eventIds + ON eventIds.id == eventKind \ No newline at end of file