From 1c1d3f32310871b95170bbb4b400309e5ba8cb89 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 21 Jun 2023 13:16:23 +0200 Subject: [PATCH 1/6] Allow srun to specify the cpu binding The task/affinity plugin needs to be selected. --- m/common/slurm.nix | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/m/common/slurm.nix b/m/common/slurm.nix index ce909df..08de3fd 100644 --- a/m/common/slurm.nix +++ b/m/common/slurm.nix @@ -9,6 +9,8 @@ "owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl" "hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2" ]; + + # See slurm.conf(5) for more details about these options. extraConfig = '' # Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but # not with Intel MPI. For that use the compatibility shim libpmi.so @@ -20,6 +22,13 @@ # When a node reboots return that node to the slurm queue as soon as it # becomes operative again. ReturnToService=2 + + # Track all processes by using a cgroup + ProctrackType=proctrack/cgroup + + # Enable task/affinity to allow the jobs to run in a specified subset of + # the resources. Use the task/cgroup plugin to enable process containment. + TaskPlugin=task/affinity,task/cgroup ''; }; } -- GitLab From a1f258c5ce650f75834b366c1834102caeb701f3 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 21 Jun 2023 15:41:06 +0200 Subject: [PATCH 2/6] Add perf to packages --- m/common/main.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/common/main.nix b/m/common/main.nix index f8622b1..9a6e690 100644 --- a/m/common/main.nix +++ b/m/common/main.nix @@ -34,7 +34,7 @@ environment.systemPackages = with pkgs; [ vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree - ncdu + ncdu config.boot.kernelPackages.perf # From bsckgs overlay bsc.osumb ]; -- GitLab From 826cfdf43ffb123c9d02ed0d3a97aa896ab2e523 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Wed, 21 Jun 2023 16:23:16 +0200 Subject: [PATCH 3/6] Set perf paranoid to 0 by default --- m/common/boot.nix | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/m/common/boot.nix b/m/common/boot.nix index ca18e0e..c93d5cc 100644 --- a/m/common/boot.nix +++ b/m/common/boot.nix @@ -18,6 +18,10 @@ "console=ttyS0,115200" ]; + boot.kernel.sysctl = { + "kernel.perf_event_paranoid" = lib.mkDefault 0; + }; + boot.kernelPackages = pkgs.linuxPackages_latest; #boot.kernelPatches = lib.singleton { -- GitLab From 31be81d2b10b706cb74ea45d9b0be1efbc84c510 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 23 Jun 2023 16:01:27 +0200 Subject: [PATCH 4/6] Lower perf_event_paranoid to -1 --- m/common/boot.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/common/boot.nix b/m/common/boot.nix index c93d5cc..ba917da 100644 --- a/m/common/boot.nix +++ b/m/common/boot.nix @@ -19,7 +19,7 @@ ]; boot.kernel.sysctl = { - "kernel.perf_event_paranoid" = lib.mkDefault 0; + "kernel.perf_event_paranoid" = lib.mkDefault "-1"; }; boot.kernelPackages = pkgs.linuxPackages_latest; -- GitLab From 6dcd9d81447527566956214b5c01cb589650e28d Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 23 Jun 2023 16:12:25 +0200 Subject: [PATCH 5/6] Add DNS tools to resolve hosts --- m/common/main.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/m/common/main.nix b/m/common/main.nix index 9a6e690..e43ce4f 100644 --- a/m/common/main.nix +++ b/m/common/main.nix @@ -34,7 +34,7 @@ environment.systemPackages = with pkgs; [ vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree - ncdu config.boot.kernelPackages.perf + ncdu config.boot.kernelPackages.perf ldns # From bsckgs overlay bsc.osumb ]; -- GitLab From 4dd25f2f89d2bd1677bcae4f22a05799f90efc41 Mon Sep 17 00:00:00 2001 From: Rodrigo Arias Mallo Date: Fri, 23 Jun 2023 16:22:18 +0200 Subject: [PATCH 6/6] Use our host names first by default --- m/common/net.nix | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/m/common/net.nix b/m/common/net.nix index cfcd686..1173cc2 100644 --- a/m/common/net.nix +++ b/m/common/net.nix @@ -43,12 +43,12 @@ 10.0.40.143 oss02-ipmi0 # Node Entry for node: xeon01 (ID=15) - 10.0.40.1 xeon01 xeon01-eth0 owl1 + 10.0.40.1 owl1 xeon01 xeon01-eth0 10.0.42.1 xeon01-ib0 10.0.40.101 xeon01-ipmi0 # Node Entry for node: xeon02 (ID=16) - 10.0.40.2 xeon02 xeon02-eth0 owl2 + 10.0.40.2 owl2 xeon02 xeon02-eth0 10.0.42.2 xeon02-ib0 10.0.40.102 xeon02-ipmi0 @@ -73,12 +73,12 @@ 10.0.40.106 xeon06-ipmi0 # Node Entry for node: xeon07 (ID=21) - 10.0.40.7 xeon07 xeon07-eth0 hut + 10.0.40.7 hut xeon07 xeon07-eth0 10.0.42.7 xeon07-ib0 10.0.40.107 xeon07-ipmi0 # Node Entry for node: xeon08 (ID=22) - 10.0.40.8 xeon08 xeon08-eth0 eudy + 10.0.40.8 eudy xeon08 xeon08-eth0 10.0.42.8 xeon08-ib0 10.0.40.108 xeon08-ipmi0 ''; -- GitLab