diff --git a/bsc/cpuid/default.nix b/bsc/cpuid/default.nix new file mode 100644 index 0000000000000000000000000000000000000000..30b9edc61b587e877523edd4a97109c3da06e28e --- /dev/null +++ b/bsc/cpuid/default.nix @@ -0,0 +1,21 @@ +{ + stdenv +, perl # For the pod2man command +}: + +stdenv.mkDerivation rec { + version = "20201006"; + pname = "cpuid"; + + buildInputs = [ perl ]; + + # Replace /usr install directory for $out + postPatch = '' + sed -i "s@/usr@$out@g" Makefile + ''; + + src = builtins.fetchTarball { + url = "http://www.etallen.com/cpuid/${pname}-${version}.src.tar.gz"; + sha256 = "04qhs938gs1kjxpsrnfy6lbsircsprfyh4db62s5cf83a1nrwn9w"; + }; +} diff --git a/garlic/exp/heat/granul.nix b/garlic/exp/heat/granul.nix deleted file mode 100644 index 6fab7b626aed09674f02542720e91aa7a6705586..0000000000000000000000000000000000000000 --- a/garlic/exp/heat/granul.nix +++ /dev/null @@ -1,146 +0,0 @@ -{ - stdenv -, stdexp -, bsc -, targetMachine -, stages -, garlicTools -, enablePerf ? false -, enableCTF ? false -}: - -with stdenv.lib; -with garlicTools; - -let - # Initial variable configuration - varConf = with bsc; { - #cbs = range2 32 4096; - #rbs = range2 32 4096; - cbs = [ 64 256 1024 4096 ]; - rbs = [ 32 128 512 1024 ]; - #cbs = [ 4096 ]; - #rbs = [ 32 ]; - }; - - machineConfig = targetMachine.config; - - # Generate the complete configuration for each unit - genConf = with bsc; c: targetMachine.config // rec { - expName = "heat"; - unitName = expName + - ".cbs-${toString cbs}" + - ".rbs-${toString rbs}"; - - inherit (machineConfig) hw; - - # heat options - timesteps = 10; - cols = 1024 * 16; # Columns - rows = 1024 * 16; # Rows - cbs = c.cbs; - rbs = c.rbs; - gitBranch = "garlic/tampi+isend+oss+task"; - - # Repeat the execution of each unit 30 times - loops = 1; - - # Resources - qos = "debug"; - ntasksPerNode = 1; - nodes = 1; - time = "02:00:00"; - # Assign one socket to each task (only one process) - cpusPerTask = hw.cpusPerSocket; - jobName = unitName; - }; - - # Compute the array of configurations - configs = stdexp.buildConfigs { - inherit varConf genConf; - }; - - perf = {nextStage, conf, ...}: stages.perf { - inherit nextStage; - perfOptions = "stat -o .garlic/perf.csv -x , " + - "-e cycles,instructions,cache-references,cache-misses"; - }; - - ctf = {nextStage, conf, ...}: with conf; stages.exec { - inherit nextStage; - env = '' - export NANOS6_CONFIG_OVERRIDE="version.instrument=ctf,\ - instrument.ctf.converter.enabled=false" - ''; - # Only one process converts the trace, otherwise use: - # if [ $SLURM_PROCID == 0 ]; then - # ... - # fi - post = '' - if [ $SLURM_PROCID == 0 ]; then - sleep 2 - for tracedir in trace_*; do - offset=$(grep 'offset =' $tracedir/ctf/ust/uid/1000/64-bit/metadata | \ - grep -o '[0-9]*') - echo "offset = $offset" - - start_time=$(awk '/^start_time / {print $2}' stdout.log) - end_time=$(awk '/^end_time / {print $2}' stdout.log) - - begin=$(awk "BEGIN{print $start_time*1e9 - $offset}") - end=$(awk "BEGIN{print $end_time*1e9 - $offset}") - - echo "only events between $begin and $end" - - ${bsc.cn6}/bin/cn6 -s $tracedir - - awk -F: "NR==1 {print} \$6 >= $begin && \$6 <= $end" $tracedir/prv/trace.prv |\ - ${bsc.cn6}/bin/dur 6400025 0 |\ - awk '{s+=$1} END {print s}' >> .garlic/time_mode_dead.csv & - - awk -F: "NR==1 {print} \$6 >= $begin && \$6 <= $end" $tracedir/prv/trace.prv |\ - ${bsc.cn6}/bin/dur 6400025 1 |\ - awk '{s+=$1} END {print s}' >> .garlic/time_mode_runtime.csv & - - awk -F: "NR==1 {print} \$6 >= $begin && \$6 <= $end" $tracedir/prv/trace.prv |\ - ${bsc.cn6}/bin/dur 6400025 3 |\ - awk '{s+=$1} END {print s}' >> .garlic/time_mode_task.csv & - - wait - - # Remove the traces at the end, as they are huge - rm -rf $tracedir - #cp -a $tracedir .garlic/ - done - fi - ''; - }; - - exec = {nextStage, conf, ...}: stages.exec { - inherit nextStage; - argv = [ - "--rows" conf.rows - "--cols" conf.cols - "--rbs" conf.rbs - "--cbs" conf.cbs - "--timesteps" conf.timesteps - ]; - - # The next stage is the program - env = '' - ln -sf ${nextStage}/etc/heat.conf heat.conf || true - ''; - }; - - program = {nextStage, conf, ...}: bsc.garlic.apps.heat.override { - inherit (conf) gitBranch; - }; - - pipeline = stdexp.stdPipeline ++ - (optional enablePerf perf) ++ - (optional enableCTF ctf) ++ - [ exec program ]; - -in - - stdexp.genExperiment { inherit configs pipeline; } diff --git a/garlic/exp/heat/granularity.nix b/garlic/exp/heat/granularity.nix new file mode 100644 index 0000000000000000000000000000000000000000..5d4d01ee813541bacd6be0e0f559356cba676fb8 --- /dev/null +++ b/garlic/exp/heat/granularity.nix @@ -0,0 +1,174 @@ +{ + stdenv +, stdexp +, bsc +, targetMachine +, stages +, garlicTools +, writeText +, enablePerf ? false +, enableCTF ? false +, enableHWC ? false +, enableExtended ? false +}: + +# TODO: Finish HWC first +assert (enableHWC == false); + +with stdenv.lib; +with garlicTools; + +let + # Initial variable configuration + varConf = with bsc; { + cbs = range2 32 4096; + rbs = range2 32 4096; + }; + + machineConfig = targetMachine.config; + + # Generate the complete configuration for each unit + genConf = with bsc; c: targetMachine.config // rec { + expName = "heat"; + unitName = expName + + ".cbs-${toString cbs}" + + ".rbs-${toString rbs}"; + + inherit (machineConfig) hw; + + # heat options + timesteps = 10; + cols = 1024 * 16; # Columns + rows = 1024 * 16; # Rows + inherit (c) cbs rbs; + gitBranch = "garlic/tampi+isend+oss+task"; + + # Repeat the execution of each unit 30 times + loops = 10; + + # Resources + qos = "debug"; + ntasksPerNode = 1; + nodes = 1; + time = "02:00:00"; + # Assign one socket to each task (only one process) + cpusPerTask = hw.cpusPerSocket; + jobName = unitName; + }; + + filterConfigs = c: let + # Too small sizes lead to huge overheads + goodSize = (c.cbs * c.rbs >= 1024); + # When the extended units are not enabled, we only select those in + # the diagonal. + extended = if (enableExtended) then true + else c.cbs == c.rbs; + in + goodSize && extended; + + # Compute the array of configurations + configs = filter (filterConfigs) (stdexp.buildConfigs { + inherit varConf genConf; + }); + + perf = {nextStage, conf, ...}: stages.perf { + inherit nextStage; + perfOptions = "stat -o .garlic/perf.csv -x , " + + "-e cycles,instructions,cache-references,cache-misses"; + }; + + ctf = {nextStage, conf, ...}: let + # Create the nanos6 configuration file + nanos6ConfigFile = writeText "nanos6.toml" '' + version.instrument = "ctf" + turbo.enabled = false + instrument.ctf.converter.enabled = false + '' + optionalString (enableHWC) '' + hardware_counters.papi.enabled = true + hardware_counters.papi.counters = [ + "PAPI_TOT_INS", "PAPI_TOT_CYC", + "PAPI_L1_TCM", "PAPI_L2_TCM", "PAPI_L3_TCM" + ] + ''; + + in stages.exec { + inherit nextStage; + + # And use it + env = '' + export NANOS6_CONFIG=${nanos6ConfigFile} + ''; + + # FIXME: We should run a hook *after* srun has ended, so we can + # execute it in one process only (not in N ranks). This hack works + # with one process only. Or be able to compute the name of the trace + # directory so we can begin the conversion in parallel + post = assert (conf.nodes * conf.ntasksPerNode == 1); '' + tracedir=$(ls -d trace_* | head -1) + echo "using tracedir=$tracedir" + + offset=$(grep 'offset =' $tracedir/ctf/ust/uid/1000/64-bit/metadata | \ + grep -o '[0-9]*') + echo "offset = $offset" + + start_time=$(awk '/^start_time / {print $2}' stdout.log) + end_time=$(awk '/^end_time / {print $2}' stdout.log) + + begin=$(awk "BEGIN{print $start_time*1e9 - $offset}") + end=$(awk "BEGIN{print $end_time*1e9 - $offset}") + + echo "only events between $begin and $end" + + ${bsc.cn6}/bin/cn6 -s $tracedir + + ${bsc.cn6}/bin/cut $begin $end < $tracedir/prv/trace.prv |\ + ${bsc.cn6}/bin/hcut 1 ${toString conf.cpusPerTask} \ + > $tracedir/prv/trace-cut.prv + + ${bsc.cn6}/bin/dur 6400025 0 < $tracedir/prv/trace-cut.prv |\ + awk '{s+=$1} END {print s}' >> .garlic/time_mode_dead.csv & + + ${bsc.cn6}/bin/dur 6400025 1 < $tracedir/prv/trace-cut.prv |\ + awk '{s+=$1} END {print s}' >> .garlic/time_mode_runtime.csv & + + ${bsc.cn6}/bin/dur 6400025 3 < $tracedir/prv/trace-cut.prv |\ + awk '{s+=$1} END {print s}' >> .garlic/time_mode_task.csv & + + wait + + # Remove the traces at the end, as they are huge + rm -rf $tracedir + ''; + # TODO: To enable HWC we need to first add a taskwait before the + # first get_time() measurement, otherwise we get the HWC of the + # main task, which will be huge. + }; + + exec = {nextStage, conf, ...}: stages.exec { + inherit nextStage; + argv = [ + "--rows" conf.rows + "--cols" conf.cols + "--rbs" conf.rbs + "--cbs" conf.cbs + "--timesteps" conf.timesteps + ]; + + # The next stage is the program + env = '' + ln -sf ${nextStage}/etc/heat.conf heat.conf || true + ''; + }; + + program = {nextStage, conf, ...}: bsc.garlic.apps.heat.override { + inherit (conf) gitBranch; + }; + + pipeline = stdexp.stdPipeline ++ + (optional enablePerf perf) ++ + (optional enableCTF ctf) ++ + [ exec program ]; + +in + + stdexp.genExperiment { inherit configs pipeline; } diff --git a/garlic/exp/index.nix b/garlic/exp/index.nix index 5645c625ee4d38f92fcb25f39833e3a089fe3bab..3a007fa4169840acdfd3d9368ab7c8c5d61cdb19 100644 --- a/garlic/exp/index.nix +++ b/garlic/exp/index.nix @@ -64,9 +64,9 @@ }; heat = rec { - granul = callPackage ./heat/granul.nix { }; - cache = granul.override { enablePerf = true; }; - ctf = cache.override { enableCTF = true; }; + granularity = callPackage ./heat/granularity.nix { }; + cache = granularity.override { enablePerf = true; }; + ctf = granularity.override { enableCTF = true; }; }; bigsort = rec { diff --git a/garlic/fig/heat/granul.R b/garlic/fig/heat/granul.R deleted file mode 100644 index d3dc5d71d7f46ec332168be8d4aad61ebee62312..0000000000000000000000000000000000000000 --- a/garlic/fig/heat/granul.R +++ /dev/null @@ -1,120 +0,0 @@ -library(ggplot2) -library(dplyr) -library(scales) -library(jsonlite) - -args=commandArgs(trailingOnly=TRUE) - -# Read the timetable from args[1] -input_file = "input.json" -if (length(args)>0) { input_file = args[1] } - -# Load the dataset in NDJSON format -dataset = jsonlite::stream_in(file(input_file)) %>% - jsonlite::flatten() - - -# We only need the nblocks and time -df = select(dataset, config.cbs, config.rbs, time) %>% - rename(cbs=config.cbs, rbs=config.rbs) - -df$cbs = as.factor(df$cbs) -df$rbs = as.factor(df$rbs) - -# Normalize the time by the median -df=group_by(df, cbs, rbs) %>% - mutate(mtime = median(time)) %>% - mutate(tnorm = time / mtime - 1) %>% - mutate(logmtime = log(mtime)) %>% - ungroup() %>% - filter(between(mtime, mean(time) - (1 * sd(time)), - mean(time) + (1 * sd(time)))) - -ppi=300 -h=5 -w=5 - -png("box.png", width=w*ppi, height=h*ppi, res=ppi) -# -# -# -# Create the plot with the normalized time vs nblocks -p = ggplot(data=df, aes(x=cbs, y=tnorm)) + - - # Labels - labs(x="cbs", y="Normalized time", - title=sprintf("Heat normalized time"), - subtitle=input_file) + - - # Center the title - #theme(plot.title = element_text(hjust = 0.5)) + - - # Black and white mode (useful for printing) - #theme_bw() + - - # Add the maximum allowed error lines - geom_hline(yintercept=c(-0.01, 0.01), - linetype="dashed", color="red") + - - # Draw boxplots - geom_boxplot() + - - #scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) + - - theme_bw() + - - theme(plot.subtitle=element_text(size=8)) + - - theme(legend.position = c(0.85, 0.85)) #+ - - - - -# Render the plot -print(p) - -## Save the png image -dev.off() -# -png("scatter.png", width=w*ppi, height=h*ppi, res=ppi) -# -## Create the plot with the normalized time vs nblocks -p = ggplot(df, aes(x=cbs, y=time, linetype=rbs, group=rbs)) + - - labs(x="cbs", y="Time (s)", - title=sprintf("Heat granularity"), - subtitle=input_file) + - theme_bw() + - theme(plot.subtitle=element_text(size=8)) + - theme(legend.position = c(0.5, 0.88)) + - - geom_point(shape=21, size=3) + - geom_line(aes(y=mtime)) + - #scale_x_continuous(trans=log2_trans()) + - scale_y_continuous(trans=log2_trans()) - -# Render the plot -print(p) - -# Save the png image -dev.off() - - -png("heatmap.png", width=w*ppi, height=h*ppi, res=ppi) -# -## Create the plot with the normalized time vs nblocks -p = ggplot(df, aes(x=cbs, y=rbs, fill=logmtime)) + - geom_raster() + - scale_fill_gradient(high="black", low="white") + - coord_fixed() + - theme_bw() + - theme(plot.subtitle=element_text(size=8)) + - labs(x="cbs", y="rbs", - title=sprintf("Heat granularity"), - subtitle=input_file) - -# Render the plot -print(p) - -# Save the png image -dev.off() diff --git a/garlic/fig/heat/granularity.R b/garlic/fig/heat/granularity.R new file mode 100644 index 0000000000000000000000000000000000000000..fb72e0097165f8ea1dc0bd96f832e1844dd33414 --- /dev/null +++ b/garlic/fig/heat/granularity.R @@ -0,0 +1,67 @@ +library(ggplot2) +library(dplyr, warn.conflicts = FALSE) +library(scales) +library(jsonlite) +library(viridis, warn.conflicts = FALSE) +library(stringr) + +args = commandArgs(trailingOnly=TRUE) + +# Set the input dataset if given in argv[1], or use "input" as default +if (length(args)>0) { input_file = args[1] } else { input_file = "input" } + +df = jsonlite::stream_in(file(input_file), verbose=FALSE) %>% + + jsonlite::flatten() %>% + + select(unit, + config.cbs, + config.rbs, + time, + total_time) %>% + + rename(cbs=config.cbs, + rbs=config.rbs) %>% + + # Convert to factors + mutate(cbs = as.factor(cbs)) %>% + mutate(rbs = as.factor(rbs)) %>% + mutate(unit = as.factor(unit)) %>% + + # Compute median times + group_by(unit) %>% + mutate(median.time = median(time)) %>% + mutate(normalized.time = time / median.time - 1) %>% + mutate(log.median.time = log(median.time)) %>% + ungroup() + +dpi = 300 +h = 6 +w = 6 + +# --------------------------------------------------------------------- + +p = ggplot(df, aes(x=cbs, y=normalized.time)) + + geom_boxplot() + + geom_hline(yintercept=c(-0.01, 0.01), linetype="dashed", color="red") + + theme_bw() + + labs(y="Normalized time", + title="Heat granularity: normalized time", + subtitle=input_file) + + theme(plot.subtitle=element_text(size=8)) + +ggsave("normalized.time.png", plot=p, width=w, height=h, dpi=dpi) +ggsave("normalized.time.pdf", plot=p, width=w, height=h, dpi=dpi) + +# --------------------------------------------------------------------- + +p = ggplot(df, aes(x=cbs, y=time)) + + geom_point(shape=21, size=3) + + geom_line(aes(y=median.time, group=0)) + + theme_bw() + + labs(y="Time (s)", title="Heat granularity: time", + subtitle=input_file) + + theme(plot.subtitle=element_text(size=8)) + +ggsave("time.png", plot=p, width=w, height=h, dpi=dpi) +ggsave("time.pdf", plot=p, width=w, height=h, dpi=dpi) diff --git a/garlic/fig/heat/mode.R b/garlic/fig/heat/mode.R index 66bde8228a18468ed5de5f9043c01ac0de2aff20..8e7e3ac1a1e22e255665f91d6be183ba56967c61 100644 --- a/garlic/fig/heat/mode.R +++ b/garlic/fig/heat/mode.R @@ -3,6 +3,7 @@ library(dplyr) library(scales) library(jsonlite) library(viridis) +library(tidyr) args=commandArgs(trailingOnly=TRUE) @@ -19,6 +20,7 @@ df = select(dataset, config.cbs, config.rbs, ctf_mode.runtime, ctf_mode.task, ctf_mode.dead, + config.cpusPerTask, time) %>% rename( cbs=config.cbs, @@ -26,6 +28,7 @@ df = select(dataset, config.cbs, config.rbs, runtime=ctf_mode.runtime, task=ctf_mode.task, dead=ctf_mode.dead, + cpusPerTask=config.cpusPerTask, ) df$cbs = as.factor(df$cbs) @@ -33,16 +36,16 @@ df$rbs = as.factor(df$rbs) # Normalize the time by the median df = df %>% - mutate(runtime = runtime * 1e-9) %>% - mutate(dead = dead * 1e-9) %>% - mutate(task = task * 1e-9) %>% + mutate(runtime = runtime * 1e-9 / cpusPerTask) %>% + mutate(dead = dead * 1e-9 / cpusPerTask) %>% + mutate(task = task * 1e-9 / cpusPerTask) %>% group_by(cbs, rbs) %>% mutate(median.time = median(time)) %>% mutate(log.median.time = log(median.time)) %>% mutate(median.dead = median(dead)) %>% mutate(median.runtime = median(runtime)) %>% mutate(median.task = median(task)) %>% - ungroup()# %>% + ungroup() #%>% print(df) @@ -79,3 +82,40 @@ df_filtered = filter(df, between(median.time, heatmap_plot(df, "median.time", "execution time (seconds)") heatmap_plot(df, "log.median.time", "execution time") + +df_square = filter(df, cbs == rbs) %>% + gather(key = time.from, value = acc.time, + c("median.dead", "median.runtime", "median.task")) + +# Colors similar to Paraver +colors <- c("median.dead" = "gray", + "median.runtime" = "blue", + "median.task" = "red") + +p = ggplot(df_square, aes(x=cbs, y=acc.time)) + + geom_area(aes(fill=time.from, group=time.from)) + + scale_fill_manual(values = colors) + + geom_point(aes(y=median.time, color="black")) + + geom_line(aes(y=median.time, group=0, color="black")) + + theme_bw() + + theme(legend.position=c(0.5, 0.7)) + + scale_color_identity(breaks = c("black"), + labels = c("Total time"), guide = "legend") + + labs(x="Blocksize (side)", y="Time (s)", + fill="Estimated", color="Direct measurement", + title="Heat granularity: time distribution", subtitle=input_file) + +ggsave("area.time.png", plot=p, width=6, height=6, dpi=300) +ggsave("area.time.pdf", plot=p, width=6, height=6, dpi=300) + +p = ggplot(df_square, aes(x=cbs, y=acc.time)) + + geom_col(aes(fill=time.from, group=time.from)) + + scale_fill_manual(values = colors) + + theme_bw() + + theme(legend.position=c(0.5, 0.7)) + + labs(x="Blocksize (side)", y="Time (s)", + fill="Estimated", color="Direct measurement", + title="Heat granularity: time distribution", subtitle=input_file) + +ggsave("col.time.png", plot=p, width=6, height=6, dpi=300) +ggsave("col.time.pdf", plot=p, width=6, height=6, dpi=300) diff --git a/garlic/fig/index.nix b/garlic/fig/index.nix index 81244d02c7f64c6ddba1d9bcca89f63e2f04f5d3..c36f0b44a65d309273873c696667c09d9e3f2a48 100644 --- a/garlic/fig/index.nix +++ b/garlic/fig/index.nix @@ -47,7 +47,7 @@ in }; heat = with exp.heat; { - granul = stdPlot ./heat/granul.R [ granul ]; + granularity = stdPlot ./heat/granularity.R [ granularity ]; cache = customPlot ./heat/cache.R (ds.perf.stat cache.result); ctf = customPlot ./heat/mode.R (ds.ctf.mode ctf.result); }; diff --git a/garlic/index.nix b/garlic/index.nix index e55fb386a7021535cfffaaeafb634381dc0f6ed4..b73c86d45c8e6670f50324985240ec7df8ca7e70 100644 --- a/garlic/index.nix +++ b/garlic/index.nix @@ -14,7 +14,7 @@ # Add more nixpkgs packages here... ]; bscPackages = with bsc; [ - slurm clangOmpss2 icc mcxx perf tampi impi vtk + slurm clangOmpss2 icc mcxx perf tampi impi vtk paraver # Add more bsc packages here... ]; packages = commonPackages ++ bscPackages; diff --git a/overlay.nix b/overlay.nix index 8e4b01ab07cc214372e85eb5244bc6ffae8e5c9c..75c1881831def49ec631c3d0077bbee1c59a433b 100644 --- a/overlay.nix +++ b/overlay.nix @@ -197,6 +197,7 @@ let dummy = callPackage ./bsc/dummy/default.nix { }; mpptest = callPackage ./bsc/mpptest/default.nix { }; + cpuid = callPackage ./bsc/cpuid/default.nix { }; # ================================================================= # Garlic benchmark