diff --git a/cfgs/nccl_regions.cfg b/cfgs/nccl_regions.cfg new file mode 100644 index 0000000000000000000000000000000000000000..6c60e81b3f5cad36116232afc3d9bc672f2519d1 --- /dev/null +++ b/cfgs/nccl_regions.cfg @@ -0,0 +1,46 @@ +#ParaverCFG +ConfigFile.Version: 3.4 +ConfigFile.NumWindows: 1 +ConfigFile.BeginDescription +NVTX regions corresponding to NCCL domain +ConfigFile.EndDescription + +################################################################################ +< NEW DISPLAYING WINDOW NCCL regions > +################################################################################ +window_name NCCL regions +window_type single +window_id 1 +window_position_x 816 +window_position_y 494 +window_width 922 +window_height 165 +window_comm_lines_enabled false +window_flags_enabled false +window_noncolor_mode true +window_custom_color_enabled false +window_semantic_scale_min_at_zero false +window_logical_filtered true +window_physical_filtered false +window_comm_fromto true +window_comm_tagsize true +window_comm_typeval true +window_units Nanoseconds +window_maximum_y 5.000000000000 +window_minimum_y 1.000000000000 +window_compute_y_max true +window_level thread +window_scale_relative 1.000000000000 +window_end_time_relative 1.000000000000 +window_object appl { 1, { All } } +window_begin_time_relative 0.000000000000 +window_open true +window_drawmode draw_maximum +window_drawmode_rows draw_last +window_pixel_size 1 +window_labels_to_draw 1 +window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } +window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, Stacked Val}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } +window_filter_module evt_type 1 9500 +window_filter_module evt_type_label 1 "NCCL regions" + diff --git a/cfgs/nvtx_ranges.cfg b/cfgs/nvtx_ranges.cfg index a0db6bfc637f7d44d55eb87dea52e5525b57db4f..67b81b978489cd4495bfeb1f3b4e1e0088137da9 100644 --- a/cfgs/nvtx_ranges.cfg +++ b/cfgs/nvtx_ranges.cfg @@ -2,17 +2,17 @@ ConfigFile.Version: 3.4 ConfigFile.NumWindows: 1 ConfigFile.BeginDescription - +NVTX pushpop ranges of default domain ConfigFile.EndDescription ################################################################################ -< NEW DISPLAYING WINDOW NVTX Ranges > +< NEW DISPLAYING WINDOW NVTX default domain > ################################################################################ -window_name NVTX Ranges +window_name NVTX default domain window_type single window_id 1 -window_position_x 612 -window_position_y 518 +window_position_x 816 +window_position_y 286 window_width 922 window_height 165 window_comm_lines_enabled false @@ -28,7 +28,7 @@ window_comm_typeval true window_units Nanoseconds window_maximum_y 52.000000000000 window_minimum_y 1.000000000000 -window_compute_y_max false +window_compute_y_max true window_level thread window_scale_relative 1.000000000000 window_end_time_relative 1.000000000000 @@ -41,6 +41,6 @@ window_pixel_size 1 window_labels_to_draw 1 window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, Stacked Val}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } -window_filter_module evt_type 1 9003 -window_filter_module evt_type_label 1 "NVTX ranges" +window_filter_module evt_type 1 9600 +window_filter_module evt_type_label 1 "NVTX pushpop ranges default domain" diff --git a/nsys2prv/parse_nsys_stats.py b/nsys2prv/parse_nsys_stats.py index 8804d3ed95a42a75eb1a9d73eb9aa6f54d874ec6..a2dc4d3c0d67099a18004879ad96c3cd38806c21 100755 --- a/nsys2prv/parse_nsys_stats.py +++ b/nsys2prv/parse_nsys_stats.py @@ -99,6 +99,10 @@ def main(): event_type_mpi = 9300 event_type_metrics_base = 9400 + event_type_nvtx_base = 9600 + event_type_nvtx_nesmik = 81000 + event_type_nvtx_nccl = 9500 + event_type_openacc = 66000000 event_type_openacc_data = 66000001 event_type_openacc_launch = 66000002 @@ -175,6 +179,7 @@ def main(): if t_nvtx: nvtx_df = pd.read_csv(build_nsys_stats_name("nvtx_pushpop_trace")) + nvtx_df["domain"] = nvtx_df["Name"].str.split(":").str[0] else: nvtx_df = pd.DataFrame() @@ -468,11 +473,32 @@ def main(): #subset of df nvtx_df_subset = nvtx_df[(nvtx_df["Lvl"] >= nvtx_stack_top) & (nvtx_df["Lvl"] <= nvtx_stack_bottom)] + # split NCCL events + nvtx_nccl_df = nvtx_df_subset[nvtx_df_subset["domain"] == "NCCL"].copy() + nvtx_df_subset = nvtx_df_subset.drop(nvtx_df_subset[nvtx_df_subset["domain"] == "NCCL" ].index) + nvtx_nccl_df["event_type"] = event_type_nvtx_nccl + + # Now recurring domains, starting with nesmik + nvtx_df_subset.loc[nvtx_df_subset["domain"] == "neSmiK", "event_type"] = event_type_nvtx_nesmik + nvtx_df_subset["event_type"] = (nvtx_df_subset[nvtx_df_subset["domain"] != "neSmiK"].sort_values("domain").groupby(["domain"]).ngroup() * 100) + event_type_nvtx_base + nvtx_df_subset.loc[nvtx_df_subset["domain"] == "", "domain"] = "default" + nvtx_df_subset["event_value"] = nvtx_df_subset.groupby(["Name"]).ngroup() + 1 #nvtx_df_subset["event_value"] = nvtx_df_subset["RangeId"] + domain_names = nvtx_df_subset[["event_type", "domain"]].drop_duplicates() + + domains_dict = [] + for i, r in domain_names.iterrows(): + domains_dict.append({"name": r["domain"], "type": r["event_type"], "names": nvtx_df_subset.loc[nvtx_df_subset["domain"] == r["domain"], ['event_value', 'Name']].drop_duplicates().sort_values("event_value")}) + ranges_names = nvtx_df_subset[['event_value', 'Name']].drop_duplicates() ranges_names.sort_values("event_value", inplace=True) + # Now nccl treating + if not nvtx_nccl_df.empty: + nvtx_nccl_df["event_value"] = nvtx_nccl_df.groupby(["Name"]).ngroup() + 1 + nccl_names = nvtx_nccl_df[['event_value', 'Name']].drop_duplicates().sort_values("event_value") + if t_nvtx_startend: nvtx_startend_df["event_value"] = nvtx_startend_df.groupby(["tag"]).ngroup() + 1 nvtx_startend_names = nvtx_startend_df[['tag', 'event_value']].drop_duplicates() @@ -516,55 +542,55 @@ def main(): with open(trace_name+".pcf", "w") as pcf_file: CONFIG = """ - DEFAULT_OPTIONS - - LEVEL THREAD - UNITS NANOSEC - LOOK_BACK 100 - SPEED 1 - FLAG_ICONS ENABLED - NUM_OF_STATE_COLORS 1000 - YMAX_SCALE 37 - - - DEFAULT_SEMANTIC - - THREAD_FUNC State As Is - - GRADIENT_COLOR - 0 {0,255,2} - 1 {0,244,13} - 2 {0,232,25} - 3 {0,220,37} - 4 {0,209,48} - 5 {0,197,60} - 6 {0,185,72} - 7 {0,173,84} - 8 {0,162,95} - 9 {0,150,107} - 10 {0,138,119} - 11 {0,127,130} - 12 {0,115,142} - 13 {0,103,154} - 14 {0,91,166} - - - GRADIENT_NAMES - 0 Gradient 0 - 1 Grad. 1/MPI Events - 2 Grad. 2/OMP Events - 3 Grad. 3/OMP locks - 4 Grad. 4/User func - 5 Grad. 5/User Events - 6 Grad. 6/General Events - 7 Grad. 7/Hardware Counters - 8 Gradient 8 - 9 Gradient 9 - 10 Gradient 10 - 11 Gradient 11 - 12 Gradient 12 - 13 Gradient 13 - 14 Gradient 14 +DEFAULT_OPTIONS + +LEVEL THREAD +UNITS NANOSEC +LOOK_BACK 100 +SPEED 1 +FLAG_ICONS ENABLED +NUM_OF_STATE_COLORS 1000 +YMAX_SCALE 37 + + +DEFAULT_SEMANTIC + +THREAD_FUNC State As Is + +GRADIENT_COLOR +0 {0,255,2} +1 {0,244,13} +2 {0,232,25} +3 {0,220,37} +4 {0,209,48} +5 {0,197,60} +6 {0,185,72} +7 {0,173,84} +8 {0,162,95} +9 {0,150,107} +10 {0,138,119} +11 {0,127,130} +12 {0,115,142} +13 {0,103,154} +14 {0,91,166} + + +GRADIENT_NAMES +0 Gradient 0 +1 Grad. 1/MPI Events +2 Grad. 2/OMP Events +3 Grad. 3/OMP locks +4 Grad. 4/User func +5 Grad. 5/User Events +6 Grad. 6/General Events +7 Grad. 7/Hardware Counters +8 Gradient 8 +9 Gradient 9 +10 Gradient 10 +11 Gradient 11 +12 Gradient 12 +13 Gradient 13 +14 Gradient 14 """ @@ -643,13 +669,23 @@ def main(): pcf_file.write("\n") if t_nvtx: + for i, v in enumerate(domains_dict): + pcf_file.write("EVENT_TYPE\n") + pcf_file.write("0 {} NVTX pushpop ranges {} domain\n".format(v["type"], v["name"])) + pcf_file.write("VALUES\n") + pcf_file.write("0 End\n") + for index, row in v["names"].iterrows(): + pcf_file.write("{} {}\n".format(row["event_value"], row["Name"])) + pcf_file.write("\n") + + if not nvtx_nccl_df.empty: pcf_file.write("EVENT_TYPE\n") - pcf_file.write("0 {} NVTX pushpop ranges\n".format(event_type_nvtx)) + pcf_file.write("0 {} NCCL regions\n".format(event_type_nvtx_nccl)) pcf_file.write("VALUES\n") pcf_file.write("0 End\n") - for index, row in ranges_names.iterrows(): + for index, row in nccl_names.iterrows(): pcf_file.write("{} {}\n".format(row["event_value"], row["Name"])) - pcf_file.write("\n") + if t_nvtx_startend: pcf_file.write("EVENT_TYPE\n") @@ -823,7 +859,12 @@ def main(): if t_nvtx: ewr(prv_file, nvtx_df_subset, "NVTX pushpop ranges", lambda r: - (create_event_record(r.iloc[0], r.iloc[2], int(r["thread"]), int(r["task"]), event_type_nvtx, r["event_value"]))) + (create_event_record(r.iloc[0], r.iloc[2], int(r["thread"]), int(r["task"]), r["event_type"], r["event_value"]))) + + # NVTX NCCL regions, still missing nccl info + if not nvtx_nccl_df.empty: + ewr(prv_file, nvtx_nccl_df, "NVTX NCCL regions", lambda r: + (create_event_record(r.iloc[0], r.iloc[2], int(r["thread"]), int(r["task"]), r["event_type"], r["event_value"]))) if t_nvtx_startend: ewr(prv_file, nvtx_startend_df, "NVTX startend ranges", lambda r: diff --git a/parser-playground.ipynb b/parser-playground.ipynb index cb4dfd8771d4441555245aa3b06d0ab8a973d1da..1d5fabc569393cae9317e2c121704eb2d04dc8cd 100644 --- a/parser-playground.ipynb +++ b/parser-playground.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -28,7 +28,7 @@ "\n", "locale.setlocale(locale.LC_ALL, '')\n", "\n", - "trace_name = \"test-sod2d-openacc\"\n", + "trace_name = \"test-heka\"\n", "event_type_kernels = 63000006\n", "event_type_memcopy_size = 63000002\n", "event_type_api = 63000000\n", @@ -49,6 +49,10 @@ "event_type_name_openacc_data = 66100001\n", "event_type_name_openacc_launch = 66100002\n", "\n", + "event_type_nvtx_base = 9600\n", + "event_type_nvtx_nesmik = 81000\n", + "event_type_nvtx_nccl = 9500\n", + "\n", "event_type_func_openacc = 66200000\n", "event_type_func_openacc_data = 66200001\n", "event_type_func_openacc_launch = 66200002\n", @@ -70,6 +74,447 @@ " return os.path.join(REPORT_DIR, base_name+\"_{}.csv\".format(report_name))" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## NVTX domain separation" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | Start (ns) | \n", + "End (ns) | \n", + "Duration (ns) | \n", + "DurChild (ns) | \n", + "DurNonChild (ns) | \n", + "Name | \n", + "Pid | \n", + "Tid | \n", + "Lvl | \n", + "NumChild | \n", + "RangeId | \n", + "ParentId | \n", + "RangeStack | \n", + "NameTree | \n", + "domain | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "7434607 | \n", + "23447008926 | \n", + "23439574319 | \n", + "23438856816 | \n", + "717503 | \n", + ":gradient_range | \n", + "1222693 | \n", + "1222693 | \n", + "0 | \n", + "2 | \n", + "17 | \n", + "NaN | \n", + ":17 | \n", + ":gradient_range | \n", + "\n", + " |
1 | \n", + "7438747 | \n", + "23445717343 | \n", + "23438278596 | \n", + "23438235127 | \n", + "43469 | \n", + ":gradient_range | \n", + "1222695 | \n", + "1222695 | \n", + "0 | \n", + "2 | \n", + "18 | \n", + "NaN | \n", + ":18 | \n", + ":gradient_range | \n", + "\n", + " |
2 | \n", + "7447423 | \n", + "23446499050 | \n", + "23439051627 | \n", + "23438994212 | \n", + "57415 | \n", + ":gradient_range | \n", + "1222696 | \n", + "1222696 | \n", + "0 | \n", + "2 | \n", + "19 | \n", + "NaN | \n", + ":19 | \n", + ":gradient_range | \n", + "\n", + " |
3 | \n", + "7465398 | \n", + "11743359159 | \n", + "11735893761 | \n", + "11520980930 | \n", + "214912831 | \n", + ":step10 | \n", + "1222695 | \n", + "1222695 | \n", + "1 | \n", + "20 | \n", + "20 | \n", + "18.0 | \n", + ":18:20 | \n", + "--:step10 | \n", + "\n", + " |
4 | \n", + "7481635 | \n", + "11747043312 | \n", + "11739561677 | \n", + "11515908837 | \n", + "223652840 | \n", + ":step10 | \n", + "1222696 | \n", + "1222696 | \n", + "1 | \n", + "20 | \n", + "21 | \n", + "19.0 | \n", + ":19:21 | \n", + "--:step10 | \n", + "\n", + " |
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
28415 | \n", + "26360988179 | \n", + "26361018592 | \n", + "30413 | \n", + "0 | \n", + "30413 | \n", + "NCCL:ncclGroupEnd | \n", + "1222693 | \n", + "1222693 | \n", + "0 | \n", + "0 | \n", + "28432 | \n", + "NaN | \n", + ":28432 | \n", + "NCCL:ncclGroupEnd | \n", + "NCCL | \n", + "
28416 | \n", + "26955896770 | \n", + "31966183120 | \n", + "5010286350 | \n", + "0 | \n", + "5010286350 | \n", + "NCCL:ncclCommAbort | \n", + "1222695 | \n", + "1222695 | \n", + "0 | \n", + "0 | \n", + "28433 | \n", + "NaN | \n", + ":28433 | \n", + "NCCL:ncclCommAbort | \n", + "NCCL | \n", + "
28417 | \n", + "27052606047 | \n", + "31968162807 | \n", + "4915556760 | \n", + "0 | \n", + "4915556760 | \n", + "NCCL:ncclCommAbort | \n", + "1222694 | \n", + "1222694 | \n", + "0 | \n", + "0 | \n", + "28434 | \n", + "NaN | \n", + ":28434 | \n", + "NCCL:ncclCommAbort | \n", + "NCCL | \n", + "
28418 | \n", + "27165307330 | \n", + "31967011519 | \n", + "4801704189 | \n", + "0 | \n", + "4801704189 | \n", + "NCCL:ncclCommAbort | \n", + "1222696 | \n", + "1222696 | \n", + "0 | \n", + "0 | \n", + "28435 | \n", + "NaN | \n", + ":28435 | \n", + "NCCL:ncclCommAbort | \n", + "NCCL | \n", + "
28419 | \n", + "37089101875 | \n", + "37682105581 | \n", + "593003706 | \n", + "0 | \n", + "593003706 | \n", + "NCCL:ncclCommAbort | \n", + "1222693 | \n", + "1222693 | \n", + "0 | \n", + "0 | \n", + "28436 | \n", + "NaN | \n", + ":28436 | \n", + "NCCL:ncclCommAbort | \n", + "NCCL | \n", + "
28420 rows × 15 columns
\n", + "\n", + " | event_value | \n", + "Name | \n", + "
---|---|---|
3101 | \n", + "4 | \n", + "NCCL:ncclGroupStart | \n", + "
3102 | \n", + "2 | \n", + "NCCL:ncclAllReduce | \n", + "
3103 | \n", + "3 | \n", + "NCCL:ncclGroupEnd | \n", + "
12597 | \n", + "1 | \n", + "NCCL:ncclAllGather | \n", + "