diff --git a/flake.lock b/flake.lock index a8e8e792ea9363da1b9aaa0aa9142164918f9bce..62f346895a34eb81b05d0c4b8ce993b8c9de6d00 100644 --- a/flake.lock +++ b/flake.lock @@ -23,11 +23,11 @@ }, "bscpkgs": { "locked": { - "lastModified": 1682521628, - "narHash": "sha256-uRIDCuJNt3rdikWiRcM3VPsQSk0vpQB1JO3Wx24psJo=", + "lastModified": 1686927936, + "narHash": "sha256-y9/R5OqDRFeq5kKRAsv9gge7vkeF/g1ImlbivpjYP/4=", "ref": "refs/heads/master", - "rev": "c775ee4d6f76aded05b08ae13924c302f18f9b2c", - "revCount": 807, + "rev": "cbe9af5d042e9d5585fe2acef65a1347c68b2fbd", + "revCount": 834, "type": "git", "url": "https://pm.bsc.es/gitlab/rarias/bscpkgs.git" }, diff --git a/m/common/main.nix b/m/common/main.nix index ba503760a0ad769ede9b80494d9042046f3578df..f8622b146202ed429f17434233d6fcd3787e0473 100644 --- a/m/common/main.nix +++ b/m/common/main.nix @@ -11,7 +11,10 @@ ./users.nix ]; - nixpkgs.overlays = [ bscpkgs.bscOverlay ]; + nixpkgs.overlays = [ + bscpkgs.bscOverlay + (import ../../pkgs/mpi.nix) + ]; nix.nixPath = [ "nixpkgs=${nixpkgs}" @@ -32,6 +35,8 @@ vim wget git htop tmux pciutils tcpdump ripgrep nix-index nixos-option nix-diff ipmitool freeipmi ethtool lm_sensors ix cmake gnumake file tree ncdu + # From bsckgs overlay + bsc.osumb ]; systemd.services."serial-getty@ttyS0" = { diff --git a/m/common/slurm.nix b/m/common/slurm.nix index 650156fa2abc962357c009768c020b7bb97ddfed..ce909dfd32c74668f6e84d028144a39229c597fe 100644 --- a/m/common/slurm.nix +++ b/m/common/slurm.nix @@ -10,7 +10,15 @@ "hut Sockets=2 CoresPerSocket=14 ThreadsPerCore=2" ]; extraConfig = '' + # Use PMIx for MPI by default. It works okay with MPICH and OpenMPI, but + # not with Intel MPI. For that use the compatibility shim libpmi.so + # setting I_MPI_PMI_LIBRARY=$pmix/lib/libpmi.so while maintaining the PMIx + # library in SLURM (--mpi=pmix). See more details here: + # https://pm.bsc.es/gitlab/rarias/jungle/-/issues/16 MpiDefault=pmix + + # When a node reboots return that node to the slurm queue as soon as it + # becomes operative again. ReturnToService=2 ''; }; diff --git a/pkgs/mpi.nix b/pkgs/mpi.nix new file mode 100644 index 0000000000000000000000000000000000000000..cdaeadda18f052cfed94a7158278b03d6748d3d3 --- /dev/null +++ b/pkgs/mpi.nix @@ -0,0 +1,25 @@ +final: prev: +{ + bsc = prev.bsc.extend (bscFinal: bscPrev: { + mpi = bscFinal.mpich; + mpich = with final; prev.mpich.overrideAttrs (old: { + buildInput = old.buildInputs ++ [ + libfabric + pmix + ]; + configureFlags = [ + "--enable-shared" + "--enable-sharedlib" + "--with-pm=no" + "--with-device=ch4:ofi" + "--with-pmi=pmix" + "--with-pmix=${final.pmix}" + "--with-libfabric=${final.libfabric}" + "--enable-g=log" + ] ++ lib.optionals (lib.versionAtLeast gfortran.version "10") [ + "FFLAGS=-fallow-argument-mismatch" # https://github.com/pmodels/mpich/issues/4300 + "FCFLAGS=-fallow-argument-mismatch" + ]; + }); + }); +}