diff --git a/doc/install.md b/doc/install.md index 8eae8c750cb7028ea5bbabbd48c26c6e0b376545..d5e279dda38ab8bc6b1292817c1fe24314672db5 100644 --- a/doc/install.md +++ b/doc/install.md @@ -3,6 +3,34 @@ This article shows the steps to install NixOS in a node following the configuration of the repo. +## Enable the serial console + +By default, the nodes have the serial console disabled in the GRUB and also boot +without the serial enabled. + +To enable the serial console in the GRUB, set in /etc/default/grub the following +lines: + +``` +GRUB_TERMINAL="console serial" +GRUB_SERIAL_COMMAND="serial --speed=115200 --unit=0 --word=8 --parity=no --stop=1" +``` + +To boot Linux with the serial enabled, so you can see the boot log and login via +serial set: + +``` +GRUB_CMDLINE_LINUX="console=ttyS0,115200n8 console=tty0" +``` + +Then update the grub config: + +``` +# grub2-mkconfig -o /boot/grub2/grub.cfg +``` + +And reboot. + ## Prepare the disk Create a main partition and label it `nixos` following [the manual][1]. diff --git a/flake.lock b/flake.lock index 1a3da19e86b9ceb2610c6495c0b50665bede63f1..294fa56a03eb76336b7ed3610a9245b8fccfd3dd 100644 --- a/flake.lock +++ b/flake.lock @@ -23,12 +23,17 @@ } }, "bscpkgs": { + "inputs": { + "nixpkgs": [ + "nixpkgs" + ] + }, "locked": { - "lastModified": 1690560045, - "narHash": "sha256-39ZP+FIzlWoN3c43hReBYpStg4RLYw/z7TdxCQmOvTM=", + "lastModified": 1694077645, + "narHash": "sha256-72bvRBhq8Q8V6ibsR9lyBE92V2EC6C6Ek3J5cOM79So=", "ref": "refs/heads/master", - "rev": "b4a20d7c3af854b39682484adfd1c7979319f439", - "revCount": 841, + "rev": "6122fef92701701e1a0622550ac0fc5c2beb5906", + "revCount": 860, "type": "git", "url": "https://pm.bsc.es/gitlab/rarias/bscpkgs.git" }, @@ -82,11 +87,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1692447944, - "narHash": "sha256-fkJGNjEmTPvqBs215EQU4r9ivecV5Qge5cF/QDLVn3U=", + "lastModified": 1693663421, + "narHash": "sha256-ImMIlWE/idjcZAfxKK8sQA7A1Gi/O58u5/CJA+mxvl8=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "d680ded26da5cf104dd2735a51e88d2d8f487b4d", + "rev": "e56990880811a451abd32515698c712788be5720", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index 6ce0689bf78390d1e9c8ddc2fdc57369078dc3ed..10be75ab11b0957bc940ce92c0459e4b9433abc9 100644 --- a/flake.nix +++ b/flake.nix @@ -4,6 +4,7 @@ agenix.url = "github:ryantm/agenix"; agenix.inputs.nixpkgs.follows = "nixpkgs"; bscpkgs.url = "git+https://pm.bsc.es/gitlab/rarias/bscpkgs.git"; + bscpkgs.inputs.nixpkgs.follows = "nixpkgs"; }; outputs = { self, nixpkgs, agenix, bscpkgs, ... }: @@ -22,6 +23,7 @@ in eudy = mkConf "eudy"; koro = mkConf "koro"; bay = mkConf "bay"; + lake2 = mkConf "lake2"; }; packages.x86_64-linux.hut = self.nixosConfigurations.hut.pkgs; diff --git a/keys.nix b/keys.nix new file mode 100644 index 0000000000000000000000000000000000000000..681fcbc9d42243de2bc29a6fb15065d190f1d42a --- /dev/null +++ b/keys.nix @@ -0,0 +1,29 @@ +# As agenix needs to parse the secrets from a standalone .nix file, we describe +# here all the public keys +rec { + hosts = { + hut = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1 hut"; + owl1 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv owl1"; + owl2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK owl2"; + eudy = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG eudy"; + koro = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67 koro"; + bay = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICvGBzpRQKuQYHdlUQeAk6jmdbkrhmdLwTBqf3el7IgU bay"; + lake2 = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINo66//S1yatpQHE/BuYD/Gfq64TY7ZN5XOGXmNchiO0 lake2"; + }; + + hostGroup = with hosts; rec { + compute = [ owl1 owl2 ]; + playground = [ eudy koro ]; + storage = [ bay lake2 ]; + monitor = [ hut ]; + + system = storage ++ monitor; + safe = system ++ compute; + all = safe ++ playground; + }; + + admins = { + rarias = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut"; + root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb root@hut"; + }; +} diff --git a/m/bay/configuration.nix b/m/bay/configuration.nix index 847e1621f17872a0dd0d375dccf3b943ccfa17ba..5e2b342d9ef3cc770c4bd2dfe1dc8041fe612757 100644 --- a/m/bay/configuration.nix +++ b/m/bay/configuration.nix @@ -3,6 +3,7 @@ { imports = [ ../common/main.nix + ../common/monitoring.nix ]; # Select the this using the ID to avoid mismatches @@ -36,6 +37,14 @@ monInitialMembers = "bay"; clusterNetwork = "10.0.40.40/24"; # Use Ethernet only }; + extraConfig = { + # Only log to stderr so it appears in the journal + "log_file" = "/dev/null"; + "mon_cluster_log_file" = "/dev/null"; + "log_to_stderr" = "true"; + "err_to_stderr" = "true"; + "log_to_file" = "false"; + }; mds = { enable = true; daemons = [ "mds0" "mds1" ]; diff --git a/m/common/agenix.nix b/m/common/agenix.nix new file mode 100644 index 0000000000000000000000000000000000000000..3d5bb656ba38994569a36148067079e0e7fcdfc5 --- /dev/null +++ b/m/common/agenix.nix @@ -0,0 +1,9 @@ +{ agenix, ... }: + +{ + imports = [ agenix.nixosModules.default ]; + + environment.systemPackages = [ + agenix.packages.x86_64-linux.default + ]; +} diff --git a/m/common/fs.nix b/m/common/fs.nix index 60fcceffd17a88bf5618a516900b3ec66374e34a..10339d05c65e4536540bdb0f3f6f87f8d6635a1f 100644 --- a/m/common/fs.nix +++ b/m/common/fs.nix @@ -6,6 +6,9 @@ fsType = "ext4"; }; + # Trim unused blocks weekly + services.fstrim.enable = true; + swapDevices = [ { device = "/dev/disk/by-label/swap"; } ]; diff --git a/m/common/main.nix b/m/common/main.nix index 705449558cb38a1cd1c48573764ca5515ad8d456..a5f8e454b2b2c95a71963a8eb607b51605bd5ea5 100644 --- a/m/common/main.nix +++ b/m/common/main.nix @@ -2,6 +2,7 @@ { imports = [ + ./agenix.nix ./boot.nix ./fs.nix ./hw.nix @@ -10,13 +11,21 @@ ./slurm.nix ./ssh.nix ./users.nix + ./watchdog.nix + ./rev.nix + ./zsh.nix ]; nixpkgs.overlays = [ bscpkgs.bscOverlay - (import ../../pkgs/mpi.nix) + (import ../../pkgs/overlay.nix) ]; + system.configurationRevision = + if theFlake ? rev + then theFlake.rev + else throw ("Refusing to build from a dirty Git tree!"); + nix.nixPath = [ "nixpkgs=${nixpkgs}" "bscpkgs=${bscpkgs}" @@ -27,11 +36,6 @@ nix.registry.bscpkgs.flake = bscpkgs; nix.registry.jungle.flake = theFlake; - system.configurationRevision = - if theFlake ? rev - then theFlake.rev - else throw ("Refusing to build from a dirty Git tree!"); - environment.systemPackages = with pkgs; [ vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree @@ -71,9 +75,6 @@ nix.gc.dates = "weekly"; nix.gc.options = "--delete-older-than 30d"; - programs.zsh.enable = true; - programs.zsh.histSize = 100000; - programs.bash.promptInit = '' PS1="\h\\$ " ''; diff --git a/m/common/monitoring.nix b/m/common/monitoring.nix new file mode 100644 index 0000000000000000000000000000000000000000..0ef9209edbe15ac53b4cfdb0a4193a3fb0baf799 --- /dev/null +++ b/m/common/monitoring.nix @@ -0,0 +1,25 @@ +{ config, lib, ... }: + +{ + # We need access to the devices to monitor the disk space + systemd.services.prometheus-node-exporter.serviceConfig.PrivateDevices = lib.mkForce false; + systemd.services.prometheus-node-exporter.serviceConfig.ProtectHome = lib.mkForce "read-only"; + + # Required to allow the smartctl exporter to read the nvme0 character device, + # see the commit message on: + # https://github.com/NixOS/nixpkgs/commit/12c26aca1fd55ab99f831bedc865a626eee39f80 + services.udev.extraRules = '' + SUBSYSTEM=="nvme", KERNEL=="nvme[0-9]*", GROUP="disk" + ''; + + services.prometheus = { + exporters = { + node = { + enable = true; + enabledCollectors = [ "systemd" ]; + port = 9002; + }; + smartctl.enable = true; + }; + }; +} diff --git a/m/common/net.nix b/m/common/net.nix index de2b28dce2ea35f47fe130f47b09cab2755eb90a..d90f8ba4ee09569a13c9d9ea21c9aa0904ddeb3d 100644 --- a/m/common/net.nix +++ b/m/common/net.nix @@ -21,9 +21,14 @@ firewall = { enable = true; allowedTCPPorts = [ 22 ]; - - # FIXME: For slurmd as it requests the compute nodes to connect to us - allowedTCPPortRanges = [ { from=1024; to=65535; } ]; + extraCommands = '' + # Prevent ssfhead from contacting our slurmd daemon + iptables -A nixos-fw -p tcp -s ssfhead --dport 6817:6819 -j nixos-fw-log-refuse + # But accept traffic to slurm ports from any other node in the subnet + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 6817:6819 -j nixos-fw-accept + # We also need to open the srun port range + iptables -A nixos-fw -p tcp -s 10.0.40.0/24 --dport 60000:61000 -j nixos-fw-accept + ''; }; extraHosts = '' @@ -32,8 +37,8 @@ # Node Entry for node: mds01 (ID=72) 10.0.40.40 bay mds01 mds01-eth0 - 10.0.42.40 mds01-ib0 - 10.0.40.141 mds01-ipmi0 + 10.0.42.40 bay-ib mds01-ib0 + 10.0.40.141 bay-ipmi mds01-ipmi0 # Node Entry for node: oss01 (ID=73) 10.0.40.41 oss01 oss01-eth0 @@ -41,19 +46,19 @@ 10.0.40.142 oss01-ipmi0 # Node Entry for node: oss02 (ID=74) - 10.0.40.42 oss02 oss02-eth0 - 10.0.42.42 oss02-ib0 - 10.0.40.143 oss02-ipmi0 + 10.0.40.42 lake2 oss02 oss02-eth0 + 10.0.42.42 lake2-ib oss02-ib0 + 10.0.40.143 lake2-ipmi oss02-ipmi0 # Node Entry for node: xeon01 (ID=15) 10.0.40.1 owl1 xeon01 xeon01-eth0 - 10.0.42.1 xeon01-ib0 - 10.0.40.101 xeon01-ipmi0 + 10.0.42.1 owl1-ib xeon01-ib0 + 10.0.40.101 owl1-ipmi xeon01-ipmi0 # Node Entry for node: xeon02 (ID=16) 10.0.40.2 owl2 xeon02 xeon02-eth0 - 10.0.42.2 xeon02-ib0 - 10.0.40.102 xeon02-ipmi0 + 10.0.42.2 owl2-ib xeon02-ib0 + 10.0.40.102 owl2-ipmi xeon02-ipmi0 # Node Entry for node: xeon03 (ID=17) 10.0.40.3 xeon03 xeon03-eth0 @@ -67,8 +72,8 @@ # Node Entry for node: xeon05 (ID=19) 10.0.40.5 koro xeon05 xeon05-eth0 - 10.0.42.5 xeon05-ib0 - 10.0.40.105 xeon05-ipmi0 + 10.0.42.5 koro-ib xeon05-ib0 + 10.0.40.105 koro-ipmi xeon05-ipmi0 # Node Entry for node: xeon06 (ID=20) 10.0.40.6 xeon06 xeon06-eth0 @@ -77,13 +82,13 @@ # Node Entry for node: xeon07 (ID=21) 10.0.40.7 hut xeon07 xeon07-eth0 - 10.0.42.7 xeon07-ib0 - 10.0.40.107 xeon07-ipmi0 + 10.0.42.7 hut-ib xeon07-ib0 + 10.0.40.107 hut-ipmi xeon07-ipmi0 # Node Entry for node: xeon08 (ID=22) 10.0.40.8 eudy xeon08 xeon08-eth0 - 10.0.42.8 xeon08-ib0 - 10.0.40.108 xeon08-ipmi0 + 10.0.42.8 eudy-ib xeon08-ib0 + 10.0.40.108 eudy-ipmi xeon08-ipmi0 ''; }; } diff --git a/m/common/rev.nix b/m/common/rev.nix new file mode 100644 index 0000000000000000000000000000000000000000..80d019b5c131aaa633fb81377758fd7455c63c67 --- /dev/null +++ b/m/common/rev.nix @@ -0,0 +1,18 @@ +{ theFlake, ... }: + +let + rev = if theFlake ? rev then theFlake.rev + else throw ("Refusing to build from a dirty Git tree!"); +in { + # Save the commit of the config in /etc/configrev + environment.etc.configrev.text = rev + "\n"; + + # Keep a log with the config over time + system.activationScripts.configRevLog.text = '' + BOOTED=$(cat /run/booted-system/etc/configrev 2>/dev/null || echo unknown) + CURRENT=$(cat /run/current-system/etc/configrev 2>/dev/null || echo unknown) + NEXT=${rev} + DATENOW=$(date --iso-8601=seconds) + echo "$DATENOW booted=$BOOTED current=$CURRENT next=$NEXT" >> /var/configrev.log + ''; +} diff --git a/m/common/slurm.nix b/m/common/slurm.nix index b02a914fbcd8035e71b3ad5d23ca6d631b1e217e..22ffae66ab5c69a9b71d6077eb47c6caa1683f5e 100644 --- a/m/common/slurm.nix +++ b/m/common/slurm.nix @@ -1,6 +1,33 @@ -{ lib, ... }: +{ config, pkgs, lib, ... }: -{ +let + suspendProgram = pkgs.writeScript "suspend.sh" '' + #!/usr/bin/env bash + exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log + set -x + export "PATH=/run/current-system/sw/bin:$PATH" + echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log + hosts=$(scontrol show hostnames $1) + for host in $hosts; do + echo Shutting down host: $host + ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power off + done + ''; + + resumeProgram = pkgs.writeScript "resume.sh" '' + #!/usr/bin/env bash + exec 1>>/var/log/power_save.log 2>>/var/log/power_save.log + set -x + export "PATH=/run/current-system/sw/bin:$PATH" + echo "$(date) Suspend invoked $0 $*" >> /var/log/power_save.log + hosts=$(scontrol show hostnames $1) + for host in $hosts; do + echo Starting host: $host + ipmitool -I lanplus -H ''${host}-ipmi -P "" -U "" chassis power on + done + ''; + +in { systemd.services.slurmd.serviceConfig = { # Kill all processes in the control group on stop/restart. This will kill # all the jobs running, so ensure that we only upgrade when the nodes are @@ -9,6 +36,7 @@ # https://bugs.schedmd.com/show_bug.cgi?id=2095#c24 KillMode = lib.mkForce "control-group"; }; + services.slurm = { client.enable = true; controlMachine = "hut"; @@ -18,6 +46,11 @@ "hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2" ]; + partitionName = [ + "owl Nodes=owl[1-2] Default=YES MaxTime=INFINITE State=UP" + "all Nodes=owl[1-2],hut Default=NO MaxTime=INFINITE State=UP" + ]; + # See slurm.conf(5) for more details about these options. extraConfig = '' # Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but @@ -37,6 +70,30 @@ # Enable task/affinity to allow the jobs to run in a specified subset of # the resources. Use the task/cgroup plugin to enable process containment. TaskPlugin=task/affinity,task/cgroup + + # Power off unused nodes until they are requested + SuspendProgram=${suspendProgram} + SuspendTimeout=60 + ResumeProgram=${resumeProgram} + ResumeTimeout=300 + SuspendExcNodes=hut + + # Turn the nodes off after 1 hour of inactivity + SuspendTime=3600 + + # Reduce port range so we can allow only this range in the firewall + SrunPortRange=60000-61000 ''; }; + + age.secrets.mungeKey = { + file = ../../secrets/munge-key.age; + owner = "munge"; + group = "munge"; + }; + + services.munge = { + enable = true; + password = config.age.secrets.mungeKey.path; + }; } diff --git a/m/common/ssh.nix b/m/common/ssh.nix index 2d805bfb8a1604fbd6b1ddde47637bb1b46cde6b..b8cb5c12749a97448a52abfc830a9d2af9ec9273 100644 --- a/m/common/ssh.nix +++ b/m/common/ssh.nix @@ -1,5 +1,9 @@ -{ ... }: +{ lib, ... }: +let + keys = import ../../keys.nix; + hostsKeys = lib.mapAttrs (name: value: { publicKey = value; }) keys.hosts; +in { # Enable the OpenSSH daemon. services.openssh.enable = true; @@ -11,13 +15,7 @@ ProxyCommand nc -X connect -x localhost:23080 %h %p ''; - programs.ssh.knownHosts = { - "hut".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1"; - "owl1".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMqMEXO0ApVsBA6yjmb0xP2kWyoPDIWxBB0Q3+QbHVhv"; - "owl2".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHurEYpQzNHqWYF6B9Pd7W8UPgF3BxEg0BvSbsA7BAdK"; - "eudy".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIL+WYPRRvZupqLAG0USKmd/juEPmisyyJaP8hAgYwXsG"; - "koro".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIImiTFDbxyUYPumvm8C4mEnHfuvtBY1H8undtd6oDd67"; - + programs.ssh.knownHosts = hostsKeys // { "gitlab-internal.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIF9arsAOSRB06hdy71oTvJHG2Mg8zfebADxpvc37lZo3"; "bscpm03.bsc.es".publicKey = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIM2NuSUPsEhqz1j5b4Gqd+MWFnRqyqY57+xMvBUqHYUS"; }; diff --git a/m/common/users.nix b/m/common/users.nix index 3399ed3e7e10458d85478723d2053a4bab619851..8451196def607bad51f45f8f1a4c35e67698c6fc 100644 --- a/m/common/users.nix +++ b/m/common/users.nix @@ -1,4 +1,4 @@ -{ ... }: +{ pkgs, ... }: { users = { @@ -26,6 +26,7 @@ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAINa0tvnNgwkc5xOwd6xTtaIdFi5jv0j2FrE7jl5MTLoE ram@mio" "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGYcXIxe0poOEGLpk8NjiRozls7fMRX0N3j3Ar94U+Gl rarias@hal" ]; + shell = pkgs.zsh; }; arocanon = { @@ -53,6 +54,18 @@ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFYfXg37mauGeurqsLpedgA2XQ9d4Nm0ZGo/hI1f7wwH rpenacob@bsc" ]; }; + + anavarro = { + uid = 1037; + isNormalUser = true; + home = "/home/Computational/anavarro"; + description = "Antoni Navarro"; + group = "Computational"; + hashedPassword = "$6$QdNDsuLehoZTYZlb$CDhCouYDPrhoiB7/seu7RF.Gqg4zMQz0n5sA4U1KDgHaZOxy2as9pbIGeF8tOHJKRoZajk5GiaZv0rZMn7Oq31"; + openssh.authorizedKeys.keys = [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILWjRSlKgzBPZQhIeEtk6Lvws2XNcYwHcwPv4osSgst5 anavarro@ssfhead" + ]; + }; }; groups = { diff --git a/m/common/watchdog.nix b/m/common/watchdog.nix new file mode 100644 index 0000000000000000000000000000000000000000..d4d297d08bb8025eebb1e881bc70a077c50618a6 --- /dev/null +++ b/m/common/watchdog.nix @@ -0,0 +1,9 @@ +{ ... }: + +{ + # The boards have a BMC watchdog controlled by IPMI + boot.kernelModules = [ "ipmi_watchdog" ]; + + # Enable systemd watchdog with 30 s interval + systemd.watchdog.runtimeTime = "30s"; +} diff --git a/m/common/zsh.nix b/m/common/zsh.nix new file mode 100644 index 0000000000000000000000000000000000000000..666e34b4305a660e4ce534e9a338bfcf21f8d62b --- /dev/null +++ b/m/common/zsh.nix @@ -0,0 +1,92 @@ +{ pkgs, ... }: + +{ + environment.systemPackages = with pkgs; [ + direnv + zsh-completions + nix-zsh-completions + ]; + + programs.zsh = { + enable = true; + histSize = 1000000; + + shellInit = '' + # Disable new user prompt + if [ ! -e ~/.zshrc ]; then + touch ~/.zshrc + fi + ''; + + promptInit = '' + # Note that to manually override this in ~/.zshrc you should run `prompt off` + # before setting your PS1 and etc. Otherwise this will likely to interact with + # your ~/.zshrc configuration in unexpected ways as the default prompt sets + # a lot of different prompt variables. + autoload -U promptinit && promptinit && prompt default && setopt prompt_sp + ''; + + # Taken from Ulli Kehrle config: + # https://git.hrnz.li/Ulli/nixos/src/commit/2e203b8d8d671f4e3ced0f1744a51d5c6ee19846/profiles/shell.nix#L199-L205 + interactiveShellInit = '' + source "${pkgs.zsh-history-substring-search}/share/zsh-history-substring-search/zsh-history-substring-search.zsh" + + # Save history immediately, but only load it when the shell starts + setopt inc_append_history + + # dircolors doesn't support alacritty: + # https://lists.gnu.org/archive/html/bug-coreutils/2019-05/msg00029.html + export LS_COLORS='rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=00:su=37;41:sg=30;43:ca=00:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.avif=01;35:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.webp=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=00;36:*.au=00;36:*.flac=00;36:*.m4a=00;36:*.mid=00;36:*.midi=00;36:*.mka=00;36:*.mp3=00;36:*.mpc=00;36:*.ogg=00;36:*.ra=00;36:*.wav=00;36:*.oga=00;36:*.opus=00;36:*.spx=00;36:*.xspf=00;36:*~=00;90:*#=00;90:*.bak=00;90:*.old=00;90:*.orig=00;90:*.part=00;90:*.rej=00;90:*.swp=00;90:*.tmp=00;90:*.dpkg-dist=00;90:*.dpkg-old=00;90:*.ucf-dist=00;90:*.ucf-new=00;90:*.ucf-old=00;90:*.rpmnew=00;90:*.rpmorig=00;90:*.rpmsave=00;90:'; + + # From Arch Linux and GRML + bindkey "^R" history-incremental-pattern-search-backward + bindkey "^S" history-incremental-pattern-search-forward + + # Auto rehash for new binaries + zstyle ':completion:*' rehash true + # show a nice menu with the matches + zstyle ':completion:*' menu yes select + + bindkey '^[OA' history-substring-search-up # Up + bindkey '^[[A' history-substring-search-up # Up + + bindkey '^[OB' history-substring-search-down # Down + bindkey '^[[B' history-substring-search-down # Down + + bindkey '\e[1~' beginning-of-line # Home + bindkey '\e[7~' beginning-of-line # Home + bindkey '\e[H' beginning-of-line # Home + bindkey '\eOH' beginning-of-line # Home + + bindkey '\e[4~' end-of-line # End + bindkey '\e[8~' end-of-line # End + bindkey '\e[F ' end-of-line # End + bindkey '\eOF' end-of-line # End + + bindkey '^?' backward-delete-char # Backspace + bindkey '\e[3~' delete-char # Del + # bindkey '\e[3;5~' delete-char # sometimes Del, sometimes C-Del + bindkey '\e[2~' overwrite-mode # Ins + + bindkey '^H' backward-kill-word # C-Backspace + + bindkey '5~' kill-word # C-Del + bindkey '^[[3;5~' kill-word # C-Del + bindkey '^[[3^' kill-word # C-Del + + bindkey "^[[1;5H" backward-kill-line # C-Home + bindkey "^[[7^" backward-kill-line # C-Home + + bindkey "^[[1;5F" kill-line # C-End + bindkey "^[[8^" kill-line # C-End + + bindkey '^[[1;5C' forward-word # C-Right + bindkey '^[0c' forward-word # C-Right + bindkey '^[[5C' forward-word # C-Right + + bindkey '^[[1;5D' backward-word # C-Left + bindkey '^[0d' backward-word # C-Left + bindkey '^[[5D' backward-word # C-Left + ''; + }; +} diff --git a/m/hut/configuration.nix b/m/hut/configuration.nix index 14c7881050ba0ae48e873bed044f9f5c680cfa90..9187812be6fa73318a0dbe6b8b16dc8e243b130e 100644 --- a/m/hut/configuration.nix +++ b/m/hut/configuration.nix @@ -1,20 +1,19 @@ -{ config, pkgs, agenix, ... }: +{ config, pkgs, ... }: { imports = [ ../common/main.nix + ../module/ceph.nix ./gitlab-runner.nix ./monitoring.nix ./nfs.nix ./slurm-daemon.nix - ./ceph.nix - agenix.nixosModules.default + ./nix-serve.nix + #./pxe.nix ]; - environment.systemPackages = [ - agenix.packages.x86_64-linux.default - ]; + boot.binfmt.emulatedSystems = [ "aarch64-linux" "powerpc64le-linux" "riscv64-linux" ]; # Select the this using the ID to avoid mismatches boot.loader.grub.device = "/dev/disk/by-id/ata-INTEL_SSDSC2BB240G7_PHDV6462004Y240AGN"; diff --git a/m/hut/gitlab-runner.nix b/m/hut/gitlab-runner.nix index 625500522592c5ff8468ff83e1cb94f9aa773c09..d9fedb55f5e9f13c218d3853c5b542fd0ff9dd86 100644 --- a/m/hut/gitlab-runner.nix +++ b/m/hut/gitlab-runner.nix @@ -1,33 +1,40 @@ { pkgs, lib, config, ... }: { - age.secrets."secrets/ovni-token".file = ./secrets/ovni-token.age; - age.secrets."secrets/nosv-token".file = ./secrets/nosv-token.age; + age.secrets.ovniToken.file = ../../secrets/ovni-token.age; + age.secrets.nosvToken.file = ../../secrets/nosv-token.age; services.gitlab-runner = { enable = true; settings.concurrent = 5; services = { ovni-shell = { - registrationConfigFile = config.age.secrets."secrets/ovni-token".path; + registrationConfigFile = config.age.secrets.ovniToken.path; executor = "shell"; tagList = [ "nix" "xeon" ]; + registrationFlags = [ + # Using space doesn't work, and causes it to misread the next flag + "--locked='false'" + ]; environmentVariables = { SHELL = "${pkgs.bash}/bin/bash"; }; }; ovni-docker = { - registrationConfigFile = config.age.secrets."secrets/ovni-token".path; + registrationConfigFile = config.age.secrets.ovniToken.path; dockerImage = "debian:stable"; tagList = [ "docker" "xeon" ]; - registrationFlags = [ "--docker-network-mode host" ]; + registrationFlags = [ + "--locked='false'" + "--docker-network-mode host" + ]; environmentVariables = { https_proxy = "http://localhost:23080"; http_proxy = "http://localhost:23080"; }; }; nosv-docker = { - registrationConfigFile = config.age.secrets."secrets/nosv-token".path; + registrationConfigFile = config.age.secrets.nosvToken.path; dockerImage = "debian:stable"; tagList = [ "docker" "xeon" ]; registrationFlags = [ diff --git a/m/hut/monitoring.nix b/m/hut/monitoring.nix index 4bc3d246ab70bacb4407c2fdd0c46fdbe04ff5d7..b455c1d6cb6680ac7fa56106aedad60d115c4458 100644 --- a/m/hut/monitoring.nix +++ b/m/hut/monitoring.nix @@ -19,6 +19,7 @@ enable = true; port = 9001; retentionTime = "1y"; + listenAddress = "127.0.0.1"; }; systemd.services.prometheus-ipmi-exporter.serviceConfig.DynamicUser = lib.mkForce false; @@ -48,13 +49,18 @@ user = "root"; configFile = ./ipmi.yml; #extraFlags = [ "--log.level=debug" ]; + listenAddress = "127.0.0.1"; }; node = { enable = true; enabledCollectors = [ "systemd" ]; port = 9002; + listenAddress = "127.0.0.1"; + }; + smartctl = { + enable = true; + listenAddress = "127.0.0.1"; }; - smartctl.enable = true; }; scrapeConfigs = [ @@ -67,7 +73,16 @@ "127.0.0.1:9323" "127.0.0.1:9252" "127.0.0.1:${toString config.services.prometheus.exporters.smartctl.port}" - "10.0.40.40:9283" # Ceph + ]; + }]; + } + { + job_name = "ceph"; + static_configs = [{ + targets = [ + "10.0.40.40:9283" # Ceph statistics + "10.0.40.40:9002" # Node exporter + "10.0.40.42:9002" # Node exporter ]; }]; } diff --git a/m/hut/nix-serve.nix b/m/hut/nix-serve.nix new file mode 100644 index 0000000000000000000000000000000000000000..35ccd72846d2302814bd8d65ebac441cfd389b3b --- /dev/null +++ b/m/hut/nix-serve.nix @@ -0,0 +1,16 @@ +{ config, ... }: + +{ + age.secrets.nixServe.file = ../../secrets/nix-serve.age; + + services.nix-serve = { + enable = true; + # Only listen locally, as we serve it via ssh + bindAddress = "127.0.0.1"; + port = 5000; + + secretKeyFile = config.age.secrets.nixServe.path; + # Public key: + # jungle.bsc.es:pEc7MlAT0HEwLQYPtpkPLwRsGf80ZI26aj29zMw/HH0= + }; +} diff --git a/m/hut/pxe.nix b/m/hut/pxe.nix new file mode 100644 index 0000000000000000000000000000000000000000..e3a74e21bea809046eac4d1d376bdf22efa9f8fb --- /dev/null +++ b/m/hut/pxe.nix @@ -0,0 +1,35 @@ +{ theFlake, pkgs, ... }: + +# This module describes a script that can launch the pixiecore daemon to serve a +# NixOS image via PXE to a node to directly boot from there, without requiring a +# working disk. + +let + # The host config must have the netboot-minimal.nix module too + host = theFlake.nixosConfigurations.lake2; + sys = host.config.system; + build = sys.build; + kernel = "${build.kernel}/bzImage"; + initrd = "${build.netbootRamdisk}/initrd"; + init = "${build.toplevel}/init"; + + script = pkgs.writeShellScriptBin "pixiecore-helper" '' + #!/usr/bin/env bash -x + + ${pkgs.pixiecore}/bin/pixiecore \ + boot ${kernel} ${initrd} --cmdline "init=${init} loglevel=4" \ + --debug --dhcp-no-bind --port 64172 --status-port 64172 "$@" + ''; +in +{ + ## We need a DHCP server to provide the IP + #services.dnsmasq = { + # enable = true; + # settings = { + # domain-needed = true; + # dhcp-range = [ "192.168.0.2,192.168.0.254" ]; + # }; + #}; + + environment.systemPackages = [ script ]; +} diff --git a/m/hut/secrets.nix b/m/hut/secrets.nix deleted file mode 100644 index d22add65e72ca20f981fdd48d78705dbbfce5122..0000000000000000000000000000000000000000 --- a/m/hut/secrets.nix +++ /dev/null @@ -1,11 +0,0 @@ -let - rarias = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIE1oZTPtlEXdGt0Ak+upeCIiBdaDQtcmuWoTUCVuSVIR rarias@hut"; - root = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIII/1TNArcwA6D47mgW4TArwlxQRpwmIGiZDysah40Gb"; - hut = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICO7jIp6JRnRWTMDsTB/aiaICJCl4x8qmKMPSs4lCqP1"; - default = [ rarias root hut ]; -in -{ - "secrets/ovni-token.age".publicKeys = default; - "secrets/nosv-token.age".publicKeys = default; - "secrets/ceph-user.age".publicKeys = default; -} diff --git a/m/hut/secrets/ceph-user.age b/m/hut/secrets/ceph-user.age deleted file mode 100644 index 735afca6652d99ab5888fa8c23b06127003d3fd9..0000000000000000000000000000000000000000 --- a/m/hut/secrets/ceph-user.age +++ /dev/null @@ -1,11 +0,0 @@ -age-encryption.org/v1 --> ssh-ed25519 CAWG4Q 35Ak+Mep9k5KnDLF1ywDbMD4l4mRFg6D0et19tqXxAw -Wgr+CX4rzrPmUszSidtLAVSvgD80F2dqtd92hGZIFwo --> ssh-ed25519 MSF3dg OVFvpkAyWTowtxsafstX31H/hJpNZmnOCbvqMIN0+AQ -VxjRcQmp+BadEh2y0PB96EeizIl3tTQpVu0CWHmsc1s --> ssh-ed25519 HY2yRg MJSQIpre9m0XnojgXuKQ/+hVBZNrZNGZqplwhqicpjI -CLkE52iqpoqSnbzisNjQgxTfNqKeaRl5ntcw1d+ZDyQ --> m$8`De%~-grease '85p}`by -52zMpprONcawWDDtzHdWNwFoYXErPUnVjhSONbUBpDlqAmJmD1LcAnsU ---- 0vZOPyXQIMMGTwgFfvm8Sn8O7vjrsjGUEy5m/BASCyc -|)*_DUS`r sN[֌^e+A 1G.#mW 5 ( \ No newline at end of file diff --git a/m/hut/secrets/nosv-token.age b/m/hut/secrets/nosv-token.age deleted file mode 100644 index 4b495a944de69fccbfdde7394b7f63f80e53e06e..0000000000000000000000000000000000000000 Binary files a/m/hut/secrets/nosv-token.age and /dev/null differ diff --git a/m/hut/secrets/ovni-token.age b/m/hut/secrets/ovni-token.age deleted file mode 100644 index bb850ef38e40d426b50aba25ae6993fe521b4d74..0000000000000000000000000000000000000000 Binary files a/m/hut/secrets/ovni-token.age and /dev/null differ diff --git a/m/hut/slurm-daemon.nix b/m/hut/slurm-daemon.nix index e6ab227898f52bf5b976688f1479d586da26f0e0..e7fab8bcfeb59c5e02d0c4762fc6583e8d079048 100644 --- a/m/hut/slurm-daemon.nix +++ b/m/hut/slurm-daemon.nix @@ -3,9 +3,5 @@ { services.slurm = { server.enable = true; - partitionName = [ - "owl Nodes=owl[1-2] Default=YES MaxTime=INFINITE State=UP" - "all Nodes=owl[1-2],hut Default=NO MaxTime=INFINITE State=UP" - ]; }; } diff --git a/m/hut/targets.yml b/m/hut/targets.yml index 2cecd669956b89fb79df46874558707aeba9966d..8924735276db90223e75968c958c9bf12c6e2d89 100644 --- a/m/hut/targets.yml +++ b/m/hut/targets.yml @@ -7,5 +7,9 @@ - 10.0.40.106 - 10.0.40.107 - 10.0.40.108 + # Storage + - 10.0.40.141 + - 10.0.40.142 + - 10.0.40.143 labels: job: ipmi-lan diff --git a/m/lake2/configuration.nix b/m/lake2/configuration.nix new file mode 100644 index 0000000000000000000000000000000000000000..58bfef81bd42fc06418dee37f641a9ddb44b9dbd --- /dev/null +++ b/m/lake2/configuration.nix @@ -0,0 +1,73 @@ +{ config, pkgs, lib, modulesPath, ... }: + +{ + imports = [ + ../common/main.nix + ../common/monitoring.nix + ]; + + boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53563a"; + + environment.systemPackages = with pkgs; [ + ceph + ]; + + services.slurm = { + client.enable = lib.mkForce false; + }; + + services.ceph = { + enable = true; + global = { + fsid = "9c8d06e0-485f-4aaf-b16b-06d6daf1232b"; + monHost = "10.0.40.40"; + monInitialMembers = "bay"; + clusterNetwork = "10.0.40.40/24"; # Use Ethernet only + }; + osd = { + enable = true; + # One daemon per NVME disk + daemons = [ "4" "5" "6" "7" ]; + extraConfig = { + "osd crush chooseleaf type" = "0"; + "osd journal size" = "10000"; + "osd pool default min size" = "2"; + "osd pool default pg num" = "200"; + "osd pool default pgp num" = "200"; + "osd pool default size" = "3"; + }; + }; + }; + + networking = { + hostName = "lake2"; + interfaces.eno1.ipv4.addresses = [ { + address = "10.0.40.42"; + prefixLength = 24; + } ]; + interfaces.ibp5s0.ipv4.addresses = [ { + address = "10.0.42.42"; + prefixLength = 24; + } ]; + }; + + # Missing service for volumes, see: + # https://www.reddit.com/r/ceph/comments/14otjyo/comment/jrd69vt/ + systemd.services.ceph-volume = { + enable = true; + description = "Ceph Volume activation"; + unitConfig = { + Type = "oneshot"; + After = "local-fs.target"; + Wants = "local-fs.target"; + }; + path = [ pkgs.ceph pkgs.util-linux pkgs.lvm2 pkgs.cryptsetup ]; + serviceConfig = { + KillMode = "none"; + Environment = "CEPH_VOLUME_TIMEOUT=10000"; + ExecStart = "/bin/sh -c 'timeout $CEPH_VOLUME_TIMEOUT ${pkgs.ceph}/bin/ceph-volume lvm activate --all --no-systemd'"; + TimeoutSec = "0"; + }; + wantedBy = [ "multi-user.target" ]; + }; +} diff --git a/m/hut/ceph.nix b/m/module/ceph.nix similarity index 57% rename from m/hut/ceph.nix rename to m/module/ceph.nix index 722466f4e0ce2bbe98009febbad5d2f66aac7d99..ff3fd2234476d2f48b3ea8a7d916e8f79eb946b1 100644 --- a/m/hut/ceph.nix +++ b/m/module/ceph.nix @@ -1,20 +1,25 @@ { config, pkgs, ... }: +# Mounts the /ceph filesystem at boot { - environment.systemPackages = [ pkgs.ceph-client ]; + environment.systemPackages = with pkgs; [ + ceph + ceph-client + fio # For benchmarks + ]; # We need the ceph module loaded as the mount.ceph binary fails to run the # modprobe command. boot.kernelModules = [ "ceph" ]; - age.secrets."secrets/ceph-user".file = ./secrets/ceph-user.age; + age.secrets.cephUser.file = ../../secrets/ceph-user.age; fileSystems."/ceph" = { fsType = "ceph"; device = "user@9c8d06e0-485f-4aaf-b16b-06d6daf1232b.cephfs=/"; options = [ "mon_addr=10.0.40.40" - "secretfile=${config.age.secrets."secrets/ceph-user".path}" + "secretfile=${config.age.secrets.cephUser.path}" ]; }; } diff --git a/m/module/slurm-firewall.nix b/m/module/slurm-firewall.nix new file mode 100644 index 0000000000000000000000000000000000000000..8f52022cbd5d352f1f5847bf06cc65eac6704553 --- /dev/null +++ b/m/module/slurm-firewall.nix @@ -0,0 +1,8 @@ +{ ... }: + +{ + networking.firewall = { + # Required for PMIx in SLURM, we should find a better way + allowedTCPPortRanges = [ { from=1024; to=65535; } ]; + }; +} diff --git a/m/owl1/configuration.nix b/m/owl1/configuration.nix index a14ab21802beb73c5376c5ad7c89917b05ea9dff..5afda726795ba68e88c25b9281a3f36961a5298f 100644 --- a/m/owl1/configuration.nix +++ b/m/owl1/configuration.nix @@ -1,7 +1,11 @@ { config, pkgs, ... }: { - imports = [ ../common/main.nix ]; + imports = [ + ../common/main.nix + ../module/ceph.nix + ../module/slurm-firewall.nix + ]; # Select the this using the ID to avoid mismatches boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d53566c"; diff --git a/m/owl2/configuration.nix b/m/owl2/configuration.nix index 8022b36c8eb86f01d427a4cea0986dbc4a9684ad..9636760cfd0207e61758aa62de3539ba9973a944 100644 --- a/m/owl2/configuration.nix +++ b/m/owl2/configuration.nix @@ -1,15 +1,14 @@ -{ config, pkgs, modulesPath, lib, ... }: +{ config, pkgs, ... }: { imports = [ - #(modulesPath + "/installer/netboot/netboot-minimal.nix") ../common/main.nix + ../module/ceph.nix + ../module/slurm-firewall.nix ]; # Select the this using the ID to avoid mismatches boot.loader.grub.device = "/dev/disk/by-id/wwn-0x55cd2e414d535629"; - #programs.ssh.forwardX11 = false; - #programs.ssh.setXAuthLocation = lib.mkForce true; networking = { hostName = "owl2"; @@ -17,6 +16,7 @@ address = "10.0.40.2"; prefixLength = 24; } ]; + # Watch out! The OmniPath device is not in the same place here: interfaces.ibp129s0.ipv4.addresses = [ { address = "10.0.42.2"; prefixLength = 24; diff --git a/pkgs/ceph.nix b/pkgs/ceph.nix new file mode 100644 index 0000000000000000000000000000000000000000..5247862fc2064b7a0e93506d84eb8faea9563c92 --- /dev/null +++ b/pkgs/ceph.nix @@ -0,0 +1,405 @@ +{ lib +, stdenv +, runCommand +, fetchurl +, fetchFromGitHub +, fetchPypi + +# Build time +, cmake +, ensureNewerSourcesHook +, fmt +, git +, makeWrapper +, nasm +, pkg-config +, which + +# Tests +, nixosTests + +# Runtime dependencies +, arrow-cpp +, babeltrace +, boost179 +, bzip2 +, cryptsetup +, cunit +, doxygen +, gperf +, graphviz +, gtest +, icu +, libcap +, libcap_ng +, libnl +, libxml2 +, lttng-ust +, lua +, lz4 +, oath-toolkit +, openldap +, python310 +, rdkafka +, rocksdb +, snappy +, sqlite +, utf8proc +, zlib +, zstd + +# Optional Dependencies +, curl ? null +, expat ? null +, fuse ? null +, libatomic_ops ? null +, libedit ? null +, libs3 ? null +, yasm ? null + +# Mallocs +, gperftools ? null +, jemalloc ? null + +# Crypto Dependencies +, cryptopp ? null +, nspr ? null +, nss ? null + +# Linux Only Dependencies +, linuxHeaders +, util-linux +, libuuid +, udev +, keyutils +, rdma-core +, rabbitmq-c +, libaio ? null +, libxfs ? null +, liburing ? null +, zfs ? null +, ... +}: + +# We must have one crypto library +assert cryptopp != null || (nss != null && nspr != null); + +let + shouldUsePkg = pkg: if pkg != null && pkg.meta.available then pkg else null; + + optYasm = shouldUsePkg yasm; + optExpat = shouldUsePkg expat; + optCurl = shouldUsePkg curl; + optFuse = shouldUsePkg fuse; + optLibedit = shouldUsePkg libedit; + optLibatomic_ops = shouldUsePkg libatomic_ops; + optLibs3 = shouldUsePkg libs3; + + optJemalloc = shouldUsePkg jemalloc; + optGperftools = shouldUsePkg gperftools; + + optCryptopp = shouldUsePkg cryptopp; + optNss = shouldUsePkg nss; + optNspr = shouldUsePkg nspr; + + optLibaio = shouldUsePkg libaio; + optLibxfs = shouldUsePkg libxfs; + optZfs = shouldUsePkg zfs; + + # Downgrade rocksdb, 7.10 breaks ceph + rocksdb' = rocksdb.overrideAttrs { + version = "7.9.2"; + src = fetchFromGitHub { + owner = "facebook"; + repo = "rocksdb"; + rev = "refs/tags/v7.9.2"; + hash = "sha256-5P7IqJ14EZzDkbjaBvbix04ceGGdlWBuVFH/5dpD5VM="; + }; + }; + + hasRadosgw = optExpat != null && optCurl != null && optLibedit != null; + + # Malloc implementation (can be jemalloc, tcmalloc or null) + malloc = if optJemalloc != null then optJemalloc else optGperftools; + + # We prefer nss over cryptopp + cryptoStr = if optNss != null && optNspr != null then "nss" else + if optCryptopp != null then "cryptopp" else "none"; + + cryptoLibsMap = { + nss = [ optNss optNspr ]; + cryptopp = [ optCryptopp ]; + none = [ ]; + }; + + getMeta = description: with lib; { + homepage = "https://ceph.io/en/"; + inherit description; + license = with licenses; [ lgpl21 gpl2 bsd3 mit publicDomain ]; + maintainers = with maintainers; [ adev ak johanot krav ]; + platforms = [ "x86_64-linux" "aarch64-linux" ]; + }; + + ceph-common = with python.pkgs; buildPythonPackage { + pname = "ceph-common"; + inherit src version; + + sourceRoot = "ceph-${version}/src/python-common"; + + propagatedBuildInputs = [ + pyyaml + ]; + + nativeCheckInputs = [ + pytestCheckHook + ]; + + disabledTests = [ + # requires network access + "test_valid_addr" + ]; + + meta = getMeta "Ceph common module for code shared by manager modules"; + }; + + # Watch out for python <> boost compatibility + python = python310.override { + packageOverrides = self: super: { + sqlalchemy = super.sqlalchemy.overridePythonAttrs rec { + version = "1.4.46"; + src = fetchPypi { + pname = "SQLAlchemy"; + inherit version; + hash = "sha256-aRO4JH2KKS74MVFipRkx4rQM6RaB8bbxj2lwRSAMSjA="; + }; + disabledTestPaths = [ + "test/aaa_profiling" + "test/ext/mypy" + ]; + }; + }; + }; + + boost = boost179.override { + enablePython = true; + inherit python; + }; + + # TODO: split this off in build and runtime environment + ceph-python-env = python.withPackages (ps: with ps; [ + ceph-common + + # build time + cython + + # debian/control + bcrypt + cherrypy + influxdb + jinja2 + kubernetes + natsort + numpy + pecan + prettytable + pyjwt + pyopenssl + python-dateutil + pyyaml + requests + routes + scikit-learn + scipy + setuptools + sphinx + virtualenv + werkzeug + + # src/pybind/mgr/requirements-required.txt + cryptography + jsonpatch + + # src/tools/cephfs/shell/setup.py + cmd2 + colorama + ]); + inherit (ceph-python-env.python) sitePackages; + + version = "18.2.0"; + src = fetchurl { + url = "https://download.ceph.com/tarballs/ceph-${version}.tar.gz"; + hash = "sha256:0k9nl6xi5brva51rr14m7ig27mmmd7vrpchcmqc40q3c2khn6ns9"; + }; +in rec { + ceph = stdenv.mkDerivation { + pname = "ceph"; + inherit src version; + + nativeBuildInputs = [ + cmake + fmt + git + makeWrapper + nasm + pkg-config + python + python.pkgs.python # for the toPythonPath function + python.pkgs.wrapPython + which + (ensureNewerSourcesHook { year = "1980"; }) + # for building docs/man-pages presumably + doxygen + graphviz + ]; + + enableParallelBuilding = true; + + buildInputs = cryptoLibsMap.${cryptoStr} ++ [ + arrow-cpp + babeltrace + boost + bzip2 + ceph-python-env + cryptsetup + cunit + gperf + gtest + icu + libcap + libnl + libxml2 + lttng-ust + lua + lz4 + malloc + oath-toolkit + openldap + optLibatomic_ops + optLibs3 + optYasm + rdkafka + rocksdb' + snappy + sqlite + utf8proc + zlib + zstd + ] ++ lib.optionals stdenv.isLinux [ + keyutils + libcap_ng + liburing + libuuid + linuxHeaders + optLibaio + optLibxfs + optZfs + rabbitmq-c + rdma-core + udev + util-linux + ] ++ lib.optionals hasRadosgw [ + optCurl + optExpat + optFuse + optLibedit + ]; + + pythonPath = [ ceph-python-env "${placeholder "out"}/${ceph-python-env.sitePackages}" ]; + + preConfigure ='' + substituteInPlace src/common/module.c --replace "/sbin/modinfo" "modinfo" + substituteInPlace src/common/module.c --replace "/sbin/modprobe" "modprobe" + substituteInPlace src/common/module.c --replace "/bin/grep" "grep" + + # install target needs to be in PYTHONPATH for "*.pth support" check to succeed + # set PYTHONPATH, so the build system doesn't silently skip installing ceph-volume and others + export PYTHONPATH=${ceph-python-env}/${sitePackages}:$lib/${sitePackages}:$out/${sitePackages} + patchShebangs src/ + ''; + + cmakeFlags = [ + "-DCMAKE_INSTALL_DATADIR=${placeholder "lib"}/lib" + + "-DWITH_CEPHFS_SHELL:BOOL=ON" + "-DWITH_SYSTEMD:BOOL=OFF" + # `WITH_JAEGER` requires `thrift` as a depenedncy (fine), but the build fails with: + # CMake Error at src/opentelemetry-cpp-stamp/opentelemetry-cpp-build-Release.cmake:49 (message): + # Command failed: 2 + # + # 'make' 'opentelemetry_trace' 'opentelemetry_exporter_jaeger_trace' + # + # See also + # + # /build/ceph-18.2.0/build/src/opentelemetry-cpp/src/opentelemetry-cpp-stamp/opentelemetry-cpp-build-*.log + # and that file contains: + # /build/ceph-18.2.0/src/jaegertracing/opentelemetry-cpp/exporters/jaeger/src/TUDPTransport.cc: In member function 'virtual void opentelemetry::v1::exporter::jaeger::TUDPTransport::close()': + # /build/ceph-18.2.0/src/jaegertracing/opentelemetry-cpp/exporters/jaeger/src/TUDPTransport.cc:71:7: error: '::close' has not been declared; did you mean 'pclose'? + # 71 | ::THRIFT_CLOSESOCKET(socket_); + # | ^~~~~~~~~~~~~~~~~~ + # Looks like `close()` is somehow not included. + # But the relevant code is already removed in `open-telemetry` 1.10: https://github.com/open-telemetry/opentelemetry-cpp/pull/2031 + # So it's proably not worth trying to fix that for this Ceph version, + # and instead just disable Ceph's Jaeger support. + "-DWITH_JAEGER:BOOL=OFF" + "-DWITH_TESTS:BOOL=OFF" + + # Use our own libraries, where possible + "-DWITH_SYSTEM_ARROW:BOOL=ON" # Only used if other options enable Arrow support. + "-DWITH_SYSTEM_BOOST:BOOL=ON" + "-DWITH_SYSTEM_GTEST:BOOL=ON" + "-DWITH_SYSTEM_ROCKSDB:BOOL=ON" + "-DWITH_SYSTEM_UTF8PROC:BOOL=ON" + "-DWITH_SYSTEM_ZSTD:BOOL=ON" + + # TODO breaks with sandbox, tries to download stuff with npm + "-DWITH_MGR_DASHBOARD_FRONTEND:BOOL=OFF" + # WITH_XFS has been set default ON from Ceph 16, keeping it optional in nixpkgs for now + ''-DWITH_XFS=${if optLibxfs != null then "ON" else "OFF"}'' + ] ++ lib.optional stdenv.isLinux "-DWITH_SYSTEM_LIBURING=ON"; + + postFixup = '' + wrapPythonPrograms + wrapProgram $out/bin/ceph-mgr --prefix PYTHONPATH ":" "$(toPythonPath ${placeholder "out"}):$(toPythonPath ${ceph-python-env})" + + # Test that ceph-volume exists since the build system has a tendency to + # silently drop it with misconfigurations. + test -f $out/bin/ceph-volume + ''; + + outputs = [ "out" "lib" "dev" "doc" "man" ]; + + doCheck = false; # uses pip to install things from the internet + + # Takes 7+h to build with 2 cores. + requiredSystemFeatures = [ "big-parallel" ]; + + meta = getMeta "Distributed storage system"; + + passthru = { + inherit version; + tests = { + inherit (nixosTests) + ceph-multi-node + ceph-single-node + ceph-single-node-bluestore; + }; + }; + }; + + ceph-client = runCommand "ceph-client-${version}" { + meta = getMeta "Tools needed to mount Ceph's RADOS Block Devices/Cephfs"; + } '' + mkdir -p $out/{bin,etc,${sitePackages},share/bash-completion/completions} + cp -r ${ceph}/bin/{ceph,.ceph-wrapped,rados,rbd,rbdmap} $out/bin + cp -r ${ceph}/bin/ceph-{authtool,conf,dencoder,rbdnamer,syn} $out/bin + cp -r ${ceph}/bin/rbd-replay* $out/bin + cp -r ${ceph}/sbin/mount.ceph $out/bin + cp -r ${ceph}/sbin/mount.fuse.ceph $out/bin + ln -s bin $out/sbin + cp -r ${ceph}/${sitePackages}/* $out/${sitePackages} + cp -r ${ceph}/etc/bash_completion.d $out/share/bash-completion/completions + # wrapPythonPrograms modifies .ceph-wrapped, so lets just update its paths + substituteInPlace $out/bin/ceph --replace ${ceph} $out + substituteInPlace $out/bin/.ceph-wrapped --replace ${ceph} $out + ''; +} diff --git a/pkgs/mpi.nix b/pkgs/overlay.nix similarity index 71% rename from pkgs/mpi.nix rename to pkgs/overlay.nix index cdaeadda18f052cfed94a7158278b03d6748d3d3..03208c5f5cc8bb74c60b9f30c5e743264ce4e69b 100644 --- a/pkgs/mpi.nix +++ b/pkgs/overlay.nix @@ -1,7 +1,10 @@ final: prev: { bsc = prev.bsc.extend (bscFinal: bscPrev: { + # Set MPICH as default mpi = bscFinal.mpich; + + # Configure the network for MPICH mpich = with final; prev.mpich.overrideAttrs (old: { buildInput = old.buildInputs ++ [ libfabric @@ -22,4 +25,11 @@ final: prev: ]; }); }); + + # Update ceph to 18.2.0 until it lands in nixpkgs, see: + # https://github.com/NixOS/nixpkgs/pull/247849 + inherit (prev.callPackage ./ceph.nix { + lua = prev.lua5_4; + fmt = prev.fmt_8; + }) ceph ceph-client; } diff --git a/secrets/ceph-user.age b/secrets/ceph-user.age new file mode 100644 index 0000000000000000000000000000000000000000..f23e2ff0558e216abb9940ca7ebc26287849466d --- /dev/null +++ b/secrets/ceph-user.age @@ -0,0 +1,21 @@ +age-encryption.org/v1 +-> ssh-ed25519 AY8zKw J00a6ZOhkupkhLU5WQ0kD05HEF4KKsSs2hwjHKbnnHU +J14VoNOCqLpScVO7OLXbqTcLI4tcVUHt5cqY/XQmbGs +-> ssh-ed25519 sgAamA k8R/bSUdvVmlBI6yHPi5NBQPBGM36lPJwsir8DFGgxE +4ZKC3gYvic6AVrNGgNjwztbUzhxP8ViX5O3wFo9wlrk +-> ssh-ed25519 HY2yRg 966xf2fTnA6Wq0uYXbXZQOManqITJcCbQS9LZCGEOh4 +Qg5echQSrzqeDqvaMx+5fqi8XyTjAeCsY/UFJX6YnDs +-> ssh-ed25519 tcumPQ e0U2okrGIoUpLfPYjIRx1V92rE3hZW13nJef+l3kBQg +LejAUKBl+tPhwocCF00ZHTzFISnwX8og8GvemiMIcyo +-> ssh-ed25519 JJ1LWg QkzTsPq9Gdh+FNz/a4bDb9LQOreFyxeTC51UNd1fsj0 +ayrlKenETfQzH1Z9drVEWqszQebicGVJve0/pCnxAE8 +-> ssh-ed25519 CAWG4Q lJLW9+dxvyoD4hYzeXeE/4rzJ6HIeEQOB1+fbhV3xw0 +T2RrVCtTuQvya9HiJB7txk3QGrntpsMX9Tt1cyXoW5E +-> ssh-ed25519 MSF3dg JOZkFb2CfqWKvZIz7lYxXWgv8iEVDkQF8hInDMZvknc +MHDWxjUw4dNiC1h4MrU9uKKcI3rwkxABm0+5FYMZkok +-> ~8m;7f-grease +lDIullfC98RhpTZ4Mk87Td+VtPmwPdgz+iIilpKugUkmV5r4Uqd7yE+5ArA6ekr/ +G/X4EA +--- Cz4sv9ZunBcVdZCozdTh1zlg1zIASjk2MjYeYfcN9eA +N $[HQ +d'7Ͳ)x9yEM7^[M+&$8tMв \ No newline at end of file diff --git a/secrets/munge-key.age b/secrets/munge-key.age new file mode 100644 index 0000000000000000000000000000000000000000..ead42c816eb6d3ba2973e4dcb0a62c55e6f5ac8b Binary files /dev/null and b/secrets/munge-key.age differ diff --git a/secrets/nix-serve.age b/secrets/nix-serve.age new file mode 100644 index 0000000000000000000000000000000000000000..f59c32341e96a262c51e2a8338b0adb713897fe2 --- /dev/null +++ b/secrets/nix-serve.age @@ -0,0 +1,12 @@ +age-encryption.org/v1 +-> ssh-ed25519 HY2yRg d144D+VvxhYgKtH//uD2qNuVnYX6bh74YqkyM3ZjBwU +0IeVmFAf4U8Sm0d01O6ZwJ1V2jl/mSMl4wF0MP5LrIg +-> ssh-ed25519 CAWG4Q H4nKxue/Cj/3KUF5A+/ygHMjjArwgx3SIWwXcqFtyUo +4k5NJkLUrueLYiPkr2LAwQLWmuaOIsDmV/86ravpleU +-> ssh-ed25519 MSF3dg HpgUAFHLPs4w0cdJHqTwf8lySkTeV9O9NnBf49ClDHs +foPIUUgAYe1YSDy6+aMfjN7xv9xud9fDmhRlIztHoEo +-> vLkF\<-grease +3GRT+W8gYSpjl/a6Ix9+g9UJnTpl1ZH/oucfR801vfE8y77DV2Jxz/XJwzxYxKG5 +YEhiTGMNbXw/V7E5aVSz6Bdc +--- GtiHKCZdHByq9j0BSLd544PhbEwTN138E8TFdxipeiA +G$SRATh]n8,Hs=p'+j9):)Y8I8:olZ3PMF;rY$yLٜΜUs16ǾLb \ No newline at end of file diff --git a/secrets/nosv-token.age b/secrets/nosv-token.age new file mode 100644 index 0000000000000000000000000000000000000000..31a354b6ac9b0babfe1792ddc65b28bb3d90a729 --- /dev/null +++ b/secrets/nosv-token.age @@ -0,0 +1,11 @@ +age-encryption.org/v1 +-> ssh-ed25519 HY2yRg hrdS7Dl/j+u3XVfM79ZJpZSlre9TcD7DTQ+EEAT6kEE +avUO96P1h7w2BYWgrQ7GpUgdaCV9AZL7eOTTcF9gfro +-> ssh-ed25519 CAWG4Q A5raRY1CAgFYZgoQ92GMyNejYNdHx/7Y6uTS+EjLPWA +FRFqT2Jz7qRcybaxkQTKHGl797LVXoHpYG4RZSrX/70 +-> ssh-ed25519 MSF3dg D+R80Bg7W9AuiOMAqtGFZQl994dRBIegYRLmmTaeZ3o +BHvZsugRiuZ91b4jk91h30o3eF3hadSnVCwxXge95T8 +-> BT/El`a-grease W{nq|Vm )bld 2Nl}4 N$#JGB4t +oLG+0S1aGfO/ohCfgGmhDhwwLi4H +--- 2I5C+FvBG/K1ZHh7C5QD39feTSLoFGwcTeZAmeILNsI +Wo d;C._(u G#vgɝyYl9ϵ.0x޽N./tBbK:Q\T_txm_Jޞ- \ No newline at end of file diff --git a/secrets/ovni-token.age b/secrets/ovni-token.age new file mode 100644 index 0000000000000000000000000000000000000000..4378c388dd465e6d39681bbca5354136454855f8 Binary files /dev/null and b/secrets/ovni-token.age differ diff --git a/secrets/secrets.nix b/secrets/secrets.nix new file mode 100644 index 0000000000000000000000000000000000000000..95b43acd01372e9cee8d90100fa0a6835041bf96 --- /dev/null +++ b/secrets/secrets.nix @@ -0,0 +1,15 @@ +let + keys = import ../keys.nix; + adminsKeys = builtins.attrValues keys.admins; + hut = [ keys.hosts.hut ] ++ adminsKeys; + # Only expose ceph keys to safe nodes and admins + safe = keys.hostGroup.safe ++ adminsKeys; +in +{ + "ovni-token.age".publicKeys = hut; + "nosv-token.age".publicKeys = hut; + "nix-serve.age".publicKeys = hut; + + "ceph-user.age".publicKeys = safe; + "munge-key.age".publicKeys = safe; +} diff --git a/web/content/access/index.md b/web/content/access/index.md index e639b2451e635dd6b0753f25c3d5196e9855f721..4787544a2667d15be540a15ea7241108d8fc1c87 100644 --- a/web/content/access/index.md +++ b/web/content/access/index.md @@ -17,6 +17,6 @@ Then, to request access to the machines we will need some information about you: 1. The salted hash of your login password, generated with `mkpasswd -m sha-512` 1. An SSH public key of type Ed25519 (can be generated with `ssh-keygen -t ed25519`) -You can send us both an email at and - with the details, or directly open a merge request in -the [jungle repository](https://pm.bsc.es/gitlab/rarias/jungle/). +Send an email to with the details, or directly open a +merge request in the [jungle +repository](https://pm.bsc.es/gitlab/rarias/jungle/). diff --git a/web/content/posts/2023-09-12/_index.md b/web/content/posts/2023-09-12/_index.md new file mode 100644 index 0000000000000000000000000000000000000000..43d0d8ab5853b97464e59b226bf073f6cdf2cac6 --- /dev/null +++ b/web/content/posts/2023-09-12/_index.md @@ -0,0 +1,71 @@ +--- +title: "Update 2023-09-12" +author: "Rodrigo Arias Mallo" +date: 2023-09-12 +--- + +This is a summary of notable changes introduced in the jungle cluster in the +last months. + +### New Ceph filesystem available + +We have installed the latest [Ceph filesystem][1] (18.2.0) which stores three +redundant copies of the data so a failure in one disk doesn't cause data loss. +It is mounted in /ceph and available for use in the owl1, owl2 and hut +nodes. For now it provides 2.8 TiB of space and it is expected to +increase when the last storage node is installed. + +[1]: https://en.wikipedia.org/wiki/Ceph_(software) + +The throughput is limited by the 1 Gigabit Ethernet speed, but should be +reasonably fast for most workloads. Here is a test with dd which reaches the +network limit: + +```txt +hut% dd if=/dev/urandom of=/ceph/rarias/urandom bs=1M count=1024 +1024+0 records in +1024+0 records out +1073741824 bytes (1,1 GB, 1,0 GiB) copied, 8,98544 s, 119 MB/s +``` + +### SLURM power save + +The SLURM daemon has been configured to power down the nodes after one hour of +idling. When a new job is allocated to a node that is powered off, it is +automatically turned on and as soon as it becomes available it will execute the +job. Here is an example with two nodes that boot and execute a simple job that +shows the date. + +```txt +hut% date; srun -N 2 date +2023-09-12T17:36:09 CEST +2023-09-12T17:38:26 CEST +2023-09-12T17:38:18 CEST +``` + +You can expect a similar delay (around 2-3 min) while the nodes are starting. +Notice that while the nodes are kept on, the delay is not noticeable: + +```txt +hut% date; srun -N 2 date +2023-09-12T17:40:04 CEST +2023-09-12T17:40:04 CEST +2023-09-12T17:40:04 CEST +``` + +### Power and temperature monitoring + +In the cluster, we monitor the temperature and the power draw of all nodes. This +allows us to understand which machines are not being used and turn them off to +save energy that otherwise would be wasted. Here is an example where some nodes +are powered off to save energy: + +![power](./power.png) + +We also configured the nodes to work at low CPU frequencies, so the temperature +is kept low to increase the lifespan of the node components. Towards these +goals, we have configured two alerts that trigger when the CPUs of a node +exceeds the limit temperature of 80 °C or when the power draw exceeds 350 W. + +By keeping the power consumption and temperatures controlled, we can safely +incorporate more machines that will only be used on demand. diff --git a/web/content/posts/2023-09-12/power.png b/web/content/posts/2023-09-12/power.png new file mode 100644 index 0000000000000000000000000000000000000000..4987d48cb6d3b865e3fa29c609b7ed18db7e580d Binary files /dev/null and b/web/content/posts/2023-09-12/power.png differ