diff --git a/m/common/boot.nix b/m/common/boot.nix index ca18e0e3e47db36dcb8bb9d04f6fdea928a7df10..ba917da2d8a5bf6bf36b749571f71ef627be0f4a 100644 --- a/m/common/boot.nix +++ b/m/common/boot.nix @@ -18,6 +18,10 @@ "console=ttyS0,115200" ]; + boot.kernel.sysctl = { + "kernel.perf_event_paranoid" = lib.mkDefault "-1"; + }; + boot.kernelPackages = pkgs.linuxPackages_latest; #boot.kernelPatches = lib.singleton { diff --git a/m/common/main.nix b/m/common/main.nix index f8622b146202ed429f17434233d6fcd3787e0473..e43ce4fb0ad5dec7ee6600e5de1852884bb5eb32 100644 --- a/m/common/main.nix +++ b/m/common/main.nix @@ -34,7 +34,7 @@ environment.systemPackages = with pkgs; [ vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree - ncdu + ncdu config.boot.kernelPackages.perf ldns # From bsckgs overlay bsc.osumb ]; diff --git a/m/common/net.nix b/m/common/net.nix index cfcd686edda544c4b895e03d1312aa113cc9aa06..1173cc2d3de475e51b71d9f27388de2c7787bbeb 100644 --- a/m/common/net.nix +++ b/m/common/net.nix @@ -43,12 +43,12 @@ 10.0.40.143 oss02-ipmi0 # Node Entry for node: xeon01 (ID=15) - 10.0.40.1 xeon01 xeon01-eth0 owl1 + 10.0.40.1 owl1 xeon01 xeon01-eth0 10.0.42.1 xeon01-ib0 10.0.40.101 xeon01-ipmi0 # Node Entry for node: xeon02 (ID=16) - 10.0.40.2 xeon02 xeon02-eth0 owl2 + 10.0.40.2 owl2 xeon02 xeon02-eth0 10.0.42.2 xeon02-ib0 10.0.40.102 xeon02-ipmi0 @@ -73,12 +73,12 @@ 10.0.40.106 xeon06-ipmi0 # Node Entry for node: xeon07 (ID=21) - 10.0.40.7 xeon07 xeon07-eth0 hut + 10.0.40.7 hut xeon07 xeon07-eth0 10.0.42.7 xeon07-ib0 10.0.40.107 xeon07-ipmi0 # Node Entry for node: xeon08 (ID=22) - 10.0.40.8 xeon08 xeon08-eth0 eudy + 10.0.40.8 eudy xeon08 xeon08-eth0 10.0.42.8 xeon08-ib0 10.0.40.108 xeon08-ipmi0 ''; diff --git a/m/common/slurm.nix b/m/common/slurm.nix index ce909dfd32c74668f6e84d028144a39229c597fe..08de3fdad61a9e49e1753bb18c136c559c3d9934 100644 --- a/m/common/slurm.nix +++ b/m/common/slurm.nix @@ -9,6 +9,8 @@ "owl[1,2] Sockets=2 CoresPerSocket=14 ThreadsPerCore=2 Feature=owl" "hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2" ]; + + # See slurm.conf(5) for more details about these options. extraConfig = '' # Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but # not with Intel MPI. For that use the compatibility shim libpmi.so @@ -20,6 +22,13 @@ # When a node reboots return that node to the slurm queue as soon as it # becomes operative again. ReturnToService=2 + + # Track all processes by using a cgroup + ProctrackType=proctrack/cgroup + + # Enable task/affinity to allow the jobs to run in a specified subset of + # the resources. Use the task/cgroup plugin to enable process containment. + TaskPlugin=task/affinity,task/cgroup ''; }; }