diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..920a3a89b468a85e602447489df88dbfb9270d97 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,15 @@ +build:ci: + stage: build + tags: + - nix + script: + - env + - rm -rf bench6-master + - git clone --depth=1 https://pm.bsc.es/gitlab/rarias/bench6.git bench6-master + - cd bench6-master + - git rev-parse HEAD + - cd .. + - git rev-parse HEAD + - nix shell '.#bench6' 'jungle#bigotes' --command sh -c "test/run.sh" + rules: + - if: $CI_PIPELINE_SOURCE == 'merge_request_event' diff --git a/CMakeLists.txt b/CMakeLists.txt index e53e51c27c054b016ee167ef4025722e2b65eefc..36fb051ccb26686dd5205271f18574f05e34b171 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,9 @@ cmake_minimum_required(VERSION 3.20) +set(CMAKE_C_COMPILER "clang") +set(CMAKE_CXX_COMPILER "clang++") + project(BENCH6 LANGUAGES C CXX) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/") @@ -58,7 +61,7 @@ if (NOT HAVE_CLOCK_GETTIME) endif() endif() -set(USE_SIMD OFF CACHE BOOL "Define SIMD and build with -fopenmp-simd") +set(USE_SIMD ON CACHE BOOL "Define SIMD and build with -fopenmp-simd") if(USE_SIMD) add_definitions(-DSIMD) add_compile_options(-fopenmp-simd) @@ -72,6 +75,7 @@ endif() find_package(MPI) find_package(Nanos6) find_package(Nodes) +find_package(Tampi) set_property(GLOBAL PROPERTY bench6_list "") @@ -82,9 +86,9 @@ macro(mk_bench NAME) add_executable(${NAME}) get_property(BENCH6_LIST GLOBAL PROPERTY bench6_list) - message(STATUS "Before BENCH6_LIST=${BENCH6_LIST}") + #message(STATUS "Before BENCH6_LIST=${BENCH6_LIST}") list(APPEND BENCH6_LIST ${NAME}) - message(STATUS "After BENCH6_LIST=${BENCH6_LIST}") + #message(STATUS "After BENCH6_LIST=${BENCH6_LIST}") set_property(GLOBAL PROPERTY bench6_list "${BENCH6_LIST}") install(TARGETS ${NAME} RUNTIME DESTINATION bin) diff --git a/cmake/FindNodes.cmake b/cmake/FindNodes.cmake index 49ba1aafa12ea3821d1830da0301e924ab648982..13e81cf9ea83ee736b1f69376634ed98c2e89b52 100644 --- a/cmake/FindNodes.cmake +++ b/cmake/FindNodes.cmake @@ -1,21 +1,58 @@ +# Copyright (c) 2022-2023 Barcelona Supercomputing Center (BSC) +# SPDX-License-Identifier: GPL-3.0-or-later +# +# Searchs for libnodes and checks if the -fompss-2=libnodes flag is supported in +# the compiler. +# +# Sets the variable NODES_FOUND when libnodes is found and NODES_FLAG_SUPPORTED +# when the -fompss-2=libnodes flag is supported. +# +# The target Nodes::nodes is defined when both checks are passed, and includes a +# rule to add the compile and link time flags. + include(GNUInstallDirs) +include(FeatureSummary) -if(DEFINED ENV{NODES_HOME}) - set(NODES_HOME "$ENV{NODES_HOME}") -else() - message(STATUS "NODES_HOME not set, refusing to search") +set(NODES_FLAG "-fompss-2=libnodes") + +set(can_search_nodes TRUE) + +if(NOT DEFINED ENV{NODES_HOME}) + message(STATUS "NODES_HOME not set, refusing to search Nodes") + set(can_search_nodes FALSE) endif() -find_library(NODES_LIBRARY NAMES nanos6 PATHS "${NODES_HOME}/lib" NO_DEFAULT_PATH) -find_file(NODES_WRAPPER NAMES nanos6-main-wrapper.o PATHS "${NODES_HOME}/lib" NO_DEFAULT_PATH) -find_path(NODES_INCLUDE_DIR nanos6.h PATHS "${NODES_HOME}/include" NO_DEFAULT_PATH) +if(NOT DEFINED ENV{NOSV_HOME}) + message(STATUS "NOSV_HOME not set, refusing to search Nodes") + set(can_search_nodes FALSE) +endif() + +if(can_search_nodes) + set(NODES_HOME "$ENV{NODES_HOME}") + + # Ensure the compiler supports libnodes + include(CheckCCompilerFlag) + + # Also set the linker flags, as otherwise the check will fail due to undefined + # symbols in the final program. + set(CMAKE_REQUIRED_LINK_OPTIONS "${NODES_FLAG}") + check_c_compiler_flag("${NODES_FLAG}" NODES_FLAG_SUPPORTED) + + if(NOT NODES_FLAG_SUPPORTED) + message(STATUS "Compiler doesn't support ${NODES_FLAG} flag") + endif() + + find_library(NODES_LIBRARY NAMES nodes PATHS "${NODES_HOME}/lib" NO_DEFAULT_PATH) + find_path(NODES_INCLUDE_DIR nodes.h PATHS "${NODES_HOME}/include" NO_DEFAULT_PATH) +endif() include(FindPackageHandleStandardArgs) find_package_handle_standard_args(Nodes DEFAULT_MSG - NODES_LIBRARY NODES_INCLUDE_DIR NODES_WRAPPER) + NODES_HOME NODES_LIBRARY NODES_INCLUDE_DIR NODES_FLAG_SUPPORTED) if(NOT NODES_FOUND) + message(STATUS "Cannot find NODES library") return() endif() @@ -24,12 +61,6 @@ if(NOT TARGET Nodes::nodes) set_target_properties(Nodes::nodes PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${NODES_INCLUDE_DIR}" IMPORTED_LOCATION ${NODES_LIBRARY}) -endif() - -if(NOT TARGET Nodes::wrapper) - add_library(Nodes::wrapper STATIC IMPORTED) - set_target_properties(Nodes::wrapper PROPERTIES - IMPORTED_LOCATION ${NODES_WRAPPER}) - target_compile_options(Nodes::wrapper INTERFACE "-fompss-2") - target_link_libraries(Nodes::wrapper INTERFACE Nodes::nodes) + target_compile_options(Nodes::nodes INTERFACE "${NODES_FLAG}") + target_link_options(Nodes::nodes INTERFACE "${NODES_FLAG}") endif() diff --git a/cmake/FindTampi.cmake b/cmake/FindTampi.cmake new file mode 100644 index 0000000000000000000000000000000000000000..6bcc84b4cb2ce1081f0dbb60d22a4cfc2e342a7f --- /dev/null +++ b/cmake/FindTampi.cmake @@ -0,0 +1,22 @@ +include(GNUInstallDirs) + +find_library(TAMPI_LIBRARY NAMES tampi-c) +find_path(TAMPI_INCLUDE_DIR TAMPI.h) + +include(FindPackageHandleStandardArgs) + +find_package_handle_standard_args(Tampi DEFAULT_MSG + TAMPI_LIBRARY TAMPI_INCLUDE_DIR) + +if(NOT TAMPI_FOUND) + return() +endif() + +if(TARGET Tampi::tampi-c) + return() +endif() + +add_library(Tampi::tampi-c SHARED IMPORTED) +set_target_properties(Tampi::tampi-c PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${TAMPI_INCLUDE_DIR}" + IMPORTED_LOCATION ${TAMPI_LIBRARY}) diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000000000000000000000000000000000000..1ac2c1f41020a57420e4d656756c917261b062f4 --- /dev/null +++ b/flake.lock @@ -0,0 +1,152 @@ +{ + "nodes": { + "agenix": { + "inputs": { + "darwin": "darwin", + "home-manager": "home-manager", + "nixpkgs": [ + "jungle", + "nixpkgs" + ], + "systems": "systems" + }, + "locked": { + "lastModified": 1712079060, + "narHash": "sha256-/JdiT9t+zzjChc5qQiF+jhrVhRt8figYH29rZO7pFe4=", + "owner": "ryantm", + "repo": "agenix", + "rev": "1381a759b205dff7a6818733118d02253340fd5e", + "type": "github" + }, + "original": { + "owner": "ryantm", + "repo": "agenix", + "type": "github" + } + }, + "bscpkgs": { + "inputs": { + "nixpkgs": [ + "jungle", + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1713974364, + "narHash": "sha256-ilZTVWSaNP1ibhQIIRXE+q9Lj2XOH+F9W3Co4QyY1eU=", + "ref": "refs/heads/master", + "rev": "de89197a4a7b162db7df9d41c9d07759d87c5709", + "revCount": 937, + "type": "git", + "url": "https://git.sr.ht/~rodarima/bscpkgs" + }, + "original": { + "type": "git", + "url": "https://git.sr.ht/~rodarima/bscpkgs" + } + }, + "darwin": { + "inputs": { + "nixpkgs": [ + "jungle", + "agenix", + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1700795494, + "narHash": "sha256-gzGLZSiOhf155FW7262kdHo2YDeugp3VuIFb4/GGng0=", + "owner": "lnl7", + "repo": "nix-darwin", + "rev": "4b9b83d5a92e8c1fbfd8eb27eda375908c11ec4d", + "type": "github" + }, + "original": { + "owner": "lnl7", + "ref": "master", + "repo": "nix-darwin", + "type": "github" + } + }, + "home-manager": { + "inputs": { + "nixpkgs": [ + "jungle", + "agenix", + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1703113217, + "narHash": "sha256-7ulcXOk63TIT2lVDSExj7XzFx09LpdSAPtvgtM7yQPE=", + "owner": "nix-community", + "repo": "home-manager", + "rev": "3bfaacf46133c037bb356193bd2f1765d9dc82c1", + "type": "github" + }, + "original": { + "owner": "nix-community", + "repo": "home-manager", + "type": "github" + } + }, + "jungle": { + "inputs": { + "agenix": "agenix", + "bscpkgs": "bscpkgs", + "nixpkgs": "nixpkgs" + }, + "locked": { + "lastModified": 1714044311, + "narHash": "sha256-mEMtClaRZE45THl0Ukdhj5SVQzYSvRCRAdPshSlvkCQ=", + "ref": "refs/heads/master", + "rev": "d2adc3a6d3ab6a42cc0a1c99c2ff8681667409e0", + "revCount": 235, + "type": "git", + "url": "https://git.sr.ht/~rodarima/jungle" + }, + "original": { + "type": "git", + "url": "https://git.sr.ht/~rodarima/jungle" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1713714899, + "narHash": "sha256-+z/XjO3QJs5rLE5UOf015gdVauVRQd2vZtsFkaXBq2Y=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "6143fc5eeb9c4f00163267708e26191d1e918932", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "jungle": "jungle" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000000000000000000000000000000000000..c5b2672800aa15d9f92a6545866f6ea8f8fe862a --- /dev/null +++ b/flake.nix @@ -0,0 +1,44 @@ +{ + description = "bench6"; + nixConfig.bash-prompt = "\[nix-develop\]$ "; + + inputs.jungle.url = "git+https://git.sr.ht/~rodarima/jungle"; + + outputs = { self, jungle, ... }: + let + targetMachine = jungle.outputs.nixosConfigurations.hut; + pkgs = targetMachine.pkgs; + in { + packages.x86_64-linux = rec { + default = bench6; + bench6 = pkgs.stdenv.mkDerivation rec { + pname = "bench6"; + version = if self ? shortRev then self.shortRev else "dirty"; + + src = self.outPath; + + buildInputs = with pkgs; [ + bigotes + cmake + clangOmpss2 + nanos6 + nodes + nosv + mpi + tampi + ]; + + enableParallelBuilding = false; + hardeningDisable = [ "all" ]; + dontStrip = true; + }; + + bench6Master = bench6.overrideAttrs (old: { + src = builtins.fetchGit { + url = "https://pm.bsc.es/gitlab/rarias/bench6.git"; + ref = "master"; + }; + }); + }; + }; +} diff --git a/plot/convergence.py b/plot/convergence.py new file mode 100644 index 0000000000000000000000000000000000000000..68c3917450650dfd2c218464efab6814c03db906 --- /dev/null +++ b/plot/convergence.py @@ -0,0 +1,21 @@ +import pandas as pd +import sys +import matplotlib.pyplot as plt + +#df = pd.read_csv("convergence.csv", delimiter=" ") +df_gs = pd.read_csv("gs.csv", delimiter=" ") +df_sor = pd.read_csv("sor.csv", delimiter=" ") + +fig, axes = plt.subplots() + +#df.plot(ax=axes, x="time", y="error", label="Current") +df_sor.plot(ax=axes, x="time", y="error", label="SOR", color="red") +df_gs.plot( ax=axes, x="time", y="error", label="GS", color="blue") + +plt.grid(True) +plt.title("Heat 2D steady state Gauss-Seidel vs Succesive-Over-Relaxation") +plt.ylabel("Absolute error (K)") +plt.xlabel("Time (s)") +plt.yscale("log") +plt.savefig("err.png") + diff --git a/plot/readywave-cmp-ovni.R b/plot/readywave-cmp-ovni.R new file mode 100644 index 0000000000000000000000000000000000000000..202fb2b7ad1c6955476c02928508487565e1f833 --- /dev/null +++ b/plot/readywave-cmp-ovni.R @@ -0,0 +1,27 @@ +library(ggplot2) +library(dplyr, warn.conflicts = FALSE) +library(scales) +library(jsonlite) +library(readr) + +# Load the arguments (argv) +args = commandArgs(trailingOnly=TRUE) + +input_file = "data/readywave-instr.csv" + +df = read_delim(input_file, delim=",", show_col_types = FALSE) %>% + mutate(instr = as.factor(instr)) + +dpi = 150 +h = 2 +w = 7 + +# --------------------------------------------------------------------- + +p = ggplot(df, aes(time_ms, fill=instr)) + + geom_histogram(color="white", bins=50) + + #theme_bw() + + labs(x = "Time (ms)", title="bench6.readywave -r 100 -t 5000 -w 10") + # TODO: Add ntasks and taskwork to labels + +ggsave(sprintf("%s.png", input_file), plot=p, width=w, height=h, dpi=dpi) diff --git a/shell.nix b/shell.nix deleted file mode 100644 index c04b746b378ad21c79f74f00c802e5e7bf272187..0000000000000000000000000000000000000000 --- a/shell.nix +++ /dev/null @@ -1,15 +0,0 @@ -let - pkgs = import (builtins.fetchTarball - "https://pm.bsc.es/gitlab/rarias/bscpkgs/-/archive/master/bscpkgs-master.tar.gz"); - - rWrapper = pkgs.rWrapper.override { - packages = with pkgs.rPackages; [ tidyverse rjson jsonlite egg viridis ]; - }; -in - pkgs.mkShell { - nativeBuildInputs = [ - pkgs.bsc.clangOmpss2 - pkgs.bsc.nanos6 - rWrapper - ]; - } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b7370b210f04e348a2e837ac8b93fc2cb296e47d..2bef6951580d45f63264d5b6ecce5b47c8b37d88 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,4 +1,3 @@ add_subdirectory(bench6) add_subdirectory(ompss2) add_subdirectory(heat) -add_subdirectory(tools) diff --git a/src/heat/CMakeLists.txt b/src/heat/CMakeLists.txt index a115a7b8de237240cb5cef819d466b9201f2588d..2652cc9c18cb8ea42ce7b57a0946847cecc3ddb6 100644 --- a/src/heat/CMakeLists.txt +++ b/src/heat/CMakeLists.txt @@ -1,6 +1,93 @@ -add_library(heat_common STATIC common/misc.c common/kernel.c) +# --- Common to all --- + +add_library(heat_kernel STATIC kernel.c) +target_include_directories(heat_kernel PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) +target_compile_options(heat_kernel PRIVATE + -Rpass-analysis=loop-vectorize + -ffast-math) + +add_library(heat_common STATIC misc.c) target_include_directories(heat_common PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -target_link_libraries(heat_common PUBLIC m) +target_link_libraries(heat_common PUBLIC m heat_kernel) + +# --- SMP based --- + +add_library(heat_smp_common STATIC main_smp.c) +target_link_libraries(heat_smp_common PUBLIC heat_common) + +macro(mk_heat_smp NAME SOURCE) + mk_bench(${NAME}) + target_sources(${NAME} PRIVATE ${SOURCE}) + target_link_libraries(${NAME} PRIVATE heat_smp_common) +endmacro() + +# No requisites +mk_heat_smp(b6_heat_seq solver_seq.c) + +if(NANOS6_FOUND) + macro(mk_heat_nanos6 NAME SOURCE) + mk_heat_smp(${NAME} ${SOURCE}) + target_compile_options(${NAME} PRIVATE "-fompss-2=libnanos6") + target_link_options(${NAME} PRIVATE "-fompss-2=libnanos6") + endmacro() + + mk_heat_nanos6(b6_heat_nanos6 solver_ompss2.c) + mk_heat_nanos6(b6_heat_nanos6_residual solver_ompss2_residual.c) +endif() + +message(STATUS "NODES FOUND = ${NODES_FOUND}") +if(NODES_FOUND) + macro(mk_heat_nodes NAME SOURCE) + mk_heat_smp(${NAME} ${SOURCE}) + target_link_libraries(${NAME} PRIVATE Nodes::nodes) + endmacro() + + mk_heat_nodes(b6_heat_nodes solver_ompss2.c) + mk_heat_nodes(b6_heat_nodes_residual solver_ompss2_residual.c) +endif() + +# --- MPI based --- + +if(MPI_FOUND) + add_library(heat_mpi_common STATIC main_mpi.c utils_mpi.c) + target_link_libraries(heat_mpi_common PUBLIC heat_common MPI::MPI_C) + + macro(mk_heat_mpi NAME SOURCE) + mk_bench(${NAME}) + target_sources(${NAME} PRIVATE ${SOURCE}) + target_link_libraries(${NAME} PRIVATE heat_mpi_common) + endmacro() + mk_heat_mpi(b6_heat_mpi solver_mpi.c) + mk_heat_mpi(b6_heat_mpi_nbuffer solver_mpi_nbuffer.c) + + if(NANOS6_FOUND) + macro(mk_heat_mpi_nanos6 NAME SOURCE) + mk_heat_mpi(${NAME} ${SOURCE}) + target_compile_options(${NAME} PRIVATE "-fompss-2=libnanos6") + target_link_options(${NAME} PRIVATE "-fompss-2=libnanos6") + endmacro() + mk_heat_mpi_nanos6(b6_heat_mpi_nanos6_forkjoin solver_mpi_ompss2_forkjoin.c) + mk_heat_mpi_nanos6(b6_heat_mpi_nanos6_tasks solver_mpi_ompss2_tasks.c) + + if(TAMPI_FOUND) + macro(mk_heat_tampi_nanos6 NAME SOURCE) + mk_heat_mpi_nanos6(${NAME} ${SOURCE}) + target_link_libraries(${NAME} PRIVATE Tampi::tampi-c) + endmacro() + mk_heat_tampi_nanos6(b6_heat_itampi_nanos6_tasks solver_itampi_ompss2_tasks.c) + endif() + endif() + + if(NODES_FOUND) + macro(mk_heat_mpi_nodes NAME SOURCE) + mk_heat_mpi(${NAME} ${SOURCE}) + target_link_libraries(${NAME} PRIVATE Nodes::nodes) + endmacro() + mk_heat_mpi_nodes(b6_heat_mpi_nodes_forkjoin solver_mpi_ompss2_forkjoin.c) + mk_heat_mpi_nodes(b6_heat_mpi_nodes_tasks solver_mpi_ompss2_tasks.c) + endif() +endif() + +# --- GASPI --- -add_subdirectory(smp) -add_subdirectory(mpi) +# TODO diff --git a/src/heat/README.md b/src/heat/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6ddcc6745df82e212f07c30745527c235312a7b0 --- /dev/null +++ b/src/heat/README.md @@ -0,0 +1,9 @@ +The heat benchmark solves the steady heat equation in a regular grid of NxM +elements using an iterative solver. + +The solver is either the Gauss-Seidel or Successive-over-relaxation with a given +relaxation parameter (--relax). + +In every iteration the relative error of the solution is computed by using the +infinite norm until the tolerance limit is reached. + diff --git a/src/heat/common/kernel.c b/src/heat/common/kernel.c deleted file mode 100644 index da6582f7a17e51a4bd9c1e6a218c54ce255b263e..0000000000000000000000000000000000000000 --- a/src/heat/common/kernel.c +++ /dev/null @@ -1,58 +0,0 @@ -#include -#include "common/heat.h" - -#ifndef SIMD -void computeBlock(const int64_t rows, const int64_t cols, - const int rstart, const int rend, - const int cstart, const int cend, - double M[rows][cols]) -{ - for (int r = rstart; r <= rend; ++r) { - for (int c = cstart; c <= cend; ++c) { - M[r][c] = 0.25*(M[r-1][c] + M[r+1][c] + M[r][c-1] + M[r][c+1]); - } - } -} -#else -void computeBlock(const int64_t rows, const int64_t cols, - const int rstart, const int rend, - const int cstart, const int cend, - double M[rows][cols]) -{ - // Assuming square blocks - const int bs = rend-rstart+1; - for (int k = 0; k < bs; ++k) { - #pragma omp simd - for (int j = 0; j <= k; ++j) { - const int rr = rstart+k-j; - const int cc = cstart+j; - M[rr][cc] = 0.25*(M[rr-1][cc] + M[rr+1][cc] + M[rr][cc-1] + M[rr][cc+1]); - } - } - for (int k = bs-2; k >= 0; --k) { - #pragma omp simd - for (int j = 0; j <= k; ++j) { - const int rr = rstart+bs-j-1; - const int cc = cstart+bs+j-k-1; - M[rr][cc] = 0.25*(M[rr-1][cc] + M[rr+1][cc] + M[rr][cc-1] + M[rr][cc+1]); - } - } -} -#endif - -double computeBlockResidual(const int64_t rows, const int64_t cols, - const int rstart, const int rend, - const int cstart, const int cend, - double M[rows][cols]) -{ - double sum = 0.0; - for (int r = rstart; r <= rend; ++r) { - for (int c = cstart; c <= cend; ++c) { - const double value = 0.25*(M[r-1][c] + M[r+1][c] + M[r][c-1] + M[r][c+1]); - const double diff = value - M[r][c]; - sum += diff*diff; - M[r][c] = value; - } - } - return sum; -} diff --git a/src/heat/common/heat.h b/src/heat/heat.h similarity index 81% rename from src/heat/common/heat.h rename to src/heat/heat.h index 5e5af35e77486a97567f25e739655d8a637453fe..696b28ca42a7829e519841c779f98c9b38f1ecd9 100644 --- a/src/heat/common/heat.h +++ b/src/heat/heat.h @@ -4,9 +4,11 @@ #include #include #include +#include -#define IGNORE_RESIDUAL ((double) -1.0) -#define DEFAULT_DELTA ((double) 0.00005) +#define IGNORE_RESIDUAL (NAN) +#define DEFAULT_DELTA ((double) 0.05) +#define DEFAULT_RELAX 1.9766 #define DEFAULT_BS 1024 #define ROUND(a, b) ((((a) + (b) - 1) / (b)) * (b)) @@ -22,6 +24,7 @@ typedef struct { int timesteps; int convergenceTimesteps; double delta; + double relax; int64_t rows; int64_t cols; int rbs; @@ -50,6 +53,6 @@ double getTime(void); const char *summary(void); double solve(HeatConfiguration *conf, int64_t rows, int64_t cols, int timesteps, void *extraData); void computeBlock(const int64_t rows, const int64_t cols, const int rstart, const int rend, const int cstart, const int cend, double M[rows][cols]); -double computeBlockResidual(const int64_t rows, const int64_t cols, const int rstart, const int rend, const int cstart, const int cend, double M[rows][cols]); +void computeBlockResidual(const int64_t rows, const int64_t cols, const int rstart, const int rend, const int cstart, const int cend, double M[rows][cols], double relax, double *residual, double *max_elem); #endif // HEAT_H diff --git a/src/heat/kernel.c b/src/heat/kernel.c new file mode 100644 index 0000000000000000000000000000000000000000..32d67dfc45d75f089033e5233d99882a3788dc2b --- /dev/null +++ b/src/heat/kernel.c @@ -0,0 +1,115 @@ +#include +#include +#include "heat.h" + +#ifndef SIMD +void computeBlock(const int64_t rows, const int64_t cols, + const int rstart, const int rend, + const int cstart, const int cend, + double M[rows][cols]) +{ + for (int r = rstart; r <= rend; ++r) { + for (int c = cstart; c <= cend; ++c) { + M[r][c] = 0.25*(M[r-1][c] + M[r+1][c] + M[r][c-1] + M[r][c+1]); + } + } +} +#else +void computeBlock(const int64_t rows, const int64_t cols, + const int rstart, const int rend, + const int cstart, const int cend, + double M[rows][cols]) +{ + (void) cend; + // Assuming square blocks + const int bs = rend-rstart+1; + for (int k = 0; k < bs; ++k) { + #pragma omp simd + for (int j = 0; j <= k; ++j) { + const int rr = rstart+k-j; + const int cc = cstart+j; + M[rr][cc] = 0.25*(M[rr-1][cc] + M[rr+1][cc] + M[rr][cc-1] + M[rr][cc+1]); + } + } + for (int k = bs-2; k >= 0; --k) { + #pragma omp simd + for (int j = 0; j <= k; ++j) { + const int rr = rstart+bs-j-1; + const int cc = cstart+bs+j-k-1; + M[rr][cc] = 0.25*(M[rr-1][cc] + M[rr+1][cc] + M[rr][cc-1] + M[rr][cc+1]); + } + } +} +#endif + + +#if 1 + +void computeBlockResidual(const int64_t rows, const int64_t cols, + const int rstart, const int rend, + const int cstart, const int cend, + double M[rows][cols], double relax, + double *residual, double *max_elem) +{ + //double relax = 1.95; + for (int r = rstart; r <= rend; ++r) { + for (int c = cstart; c <= cend; ++c) { + double old = M[r][c]; + double fdiff = 0.25*(M[r-1][c] + M[r+1][c] + M[r][c-1] + M[r][c+1]); + double new = (1 - relax) * old + relax * fdiff; + double diff = new - old; + /* Use the largest absolute error as residual */ + *residual = fmax(*residual, fabs(diff)); + *max_elem = fmax(*max_elem, fabs(new)); + //fprintf(stderr, "residual = %e in (%4d, %4d)\n", residual, r, c); + M[r][c] = new; + } + } +} + +#else + +/* Red-black parallelization */ +void computeBlockResidual(const int64_t rows, const int64_t cols, + const int rstart, const int rend, + const int cstart, const int cend, + double M[rows][cols], double relax, + double * restrict residual, double * restrict max_elem) +{ + (void)(residual); + (void)(max_elem); + + const double A = 1 - relax; + const double B = 0.25 * relax; + + //double relax = 1.95; + for (int r = rstart; r <= rend; r += 2) { + #pragma clang loop vectorize(enable) + for (int c = cstart; c <= cend; c += 2) { + double old = M[r][c]; + double new = A * old + B * (M[r-1][c] + M[r+1][c] + M[r][c-1] + M[r][c+1]); + //double diff = new - old; + /* Use the largest absolute error as residual */ + //*residual = fmax(*residual, fabs(diff)); + //*max_elem = fmax(*max_elem, fabs(new)); + //fprintf(stderr, "residual = %e in (%4d, %4d)\n", residual, r, c); + M[r][c] = new; + } + } + + for (int r = rstart+1; r <= rend; r += 2) { + #pragma clang loop vectorize(enable) + for (int c = cstart+1; c <= cend; c += 2) { + double old = M[r][c]; + double new = A * old + B * (M[r-1][c] + M[r+1][c] + M[r][c-1] + M[r][c+1]); + //double diff = new - old; + /* Use the largest absolute error as residual */ + //*residual = fmax(*residual, fabs(diff)); + //*max_elem = fmax(*max_elem, fabs(new)); + //fprintf(stderr, "residual = %e in (%4d, %4d)\n", residual, r, c); + M[r][c] = new; + } + } +} + +#endif diff --git a/src/heat/mpi/main.c b/src/heat/main_mpi.c similarity index 78% rename from src/heat/mpi/main.c rename to src/heat/main_mpi.c index e073740488def4747175a57dca8dfcc842412ea0..7ff91ed5ef20a65a698511f2e9e14863aaf98d8b 100644 --- a/src/heat/mpi/main.c +++ b/src/heat/main_mpi.c @@ -1,10 +1,11 @@ #include +#include #include #include #include -#include "utils.h" -#include "common/heat.h" +#include "utils_mpi.h" +#include "heat.h" #ifdef TAMPI #include @@ -19,16 +20,7 @@ void generateImage(const HeatConfiguration *conf, int64_t rows, int64_t cols, in int main(int argc, char **argv) { -#if defined(TAMPI) - // TAMPI+OmpSs-2 variants - const int required = MPI_TASK_MULTIPLE; -#elif defined(_OMPSS_2) - // MPI+OmpSs-2 variants - const int required = MPI_THREAD_SERIALIZED; -#else - // MPI-only variants - const int required = MPI_THREAD_SINGLE; -#endif + const int required = mpi_level(); int provided; MPI_Init_thread(&argc, &argv, required, &provided); @@ -76,8 +68,10 @@ int main(int argc, char **argv) if (!rank) { int64_t totalElements = conf.rows*conf.cols; + //double time_element = (end-start)/(totalElements*conf.timesteps); double throughput = (totalElements*conf.timesteps)/(end-start); - throughput = throughput/1000000.0; + //throughput = throughput/1000000.0; + double residual = NAN; #ifdef _OMPSS_2 int threads = nanos6_get_num_cpus(); @@ -85,10 +79,17 @@ int main(int argc, char **argv) int threads = 1; #endif - fprintf(stdout, "rows, %ld, cols, %ld, rows/rank, %ld, total, %ld, total/rank, %ld, rbs, %d, " - "cbs, %d, ranks, %d, threads, %d, timesteps, %d, time, %f, Mupdates/s, %f\n", - conf.rows, conf.cols, conf.rows/nranks, totalElements, totalElements/nranks, - conf.rbs, conf.cbs, nranks, threads, conf.timesteps, end-start, throughput); + fprintf(stderr, "%14s %14s %14s %8s %8s %8s %8s %8s %8s\n", + "time", "throughput", "error", + "rows", "cols", + "rbs", "cbs", "threads", + "steps"); + fprintf(stdout, "%14e %14e %14e %8ld %8ld %8d %8d %8d %8d\n", + end-start, throughput, residual, + conf.rows, conf.cols, + conf.rbs, conf.cbs, threads, + conf.convergenceTimesteps); + } if (conf.generateImage) { diff --git a/src/heat/common/main.c b/src/heat/main_smp.c similarity index 56% rename from src/heat/common/main.c rename to src/heat/main_smp.c index 9ff1a912002b6907cd57a9fdfa58da03199dee98..1b3656d756ecc0c372d86dc6d34a89faef702bc7 100644 --- a/src/heat/common/main.c +++ b/src/heat/main_smp.c @@ -2,7 +2,7 @@ #include #include -#include "common/heat.h" +#include "heat.h" int main(int argc, char **argv) @@ -25,23 +25,21 @@ int main(int argc, char **argv) double start = getTime(); double residual = solve(&conf, rows, cols, conf.timesteps, NULL); double end = getTime(); - - int64_t totalElements = conf.rows*conf.cols; - double throughput = (totalElements*conf.timesteps)/(end-start); - -#ifdef _OMPSS_2 - int threads = sysconf(_SC_NPROCESSORS_ONLN); -#else - int threads = 1; -#endif - - fprintf(stderr,"%8s %8s %8s %8s %8s %8s %14s %14s %14s", - "rows", "cols", "rbs", "cbs", "threads", - "steps", "error", "time", "updates/s\n"); - fprintf(stdout, "%8ld %8ld %8d %8d %8d %8d %14e %14e %14e\n", + double delta_time = end - start; + + long niter = conf.convergenceTimesteps; + long iter_elem = conf.rows * conf.cols; + long total_elem = iter_elem * niter; + double throughput = total_elem / delta_time; + +// fprintf(stderr, "%14s %14s %14s %8s %8s %8s %8s %8s\n", +// "time", "updates/s", "rel. error", +// "rows", "cols", +// "rbs", "cbs", "iters"); + fprintf(stdout, "%14e %14e %14e %8ld %8ld %8d %8d %8ld\n", + delta_time, throughput, residual, conf.rows, conf.cols, - conf.rbs, conf.cbs, threads, - conf.convergenceTimesteps, residual, end-start, throughput); + conf.rbs, conf.cbs, niter); if (conf.generateImage) writeImage(conf.imageFileName, conf.matrix, rows, cols); diff --git a/src/heat/common/misc.c b/src/heat/misc.c similarity index 84% rename from src/heat/common/misc.c rename to src/heat/misc.c index 922892be799cc11ca9d67e73e364a01ed749d915..c439a1337880756e5a7b9358de520999a9b61e1b 100644 --- a/src/heat/common/misc.c +++ b/src/heat/misc.c @@ -120,6 +120,7 @@ static void printUsage(int argc, char **argv) fprintf(stdout, " -C, --cbs=BS use BS as the number of columns of each block (overrides -b option)\n"); fprintf(stdout, " -w, --wins=WINS use WINS as the number of MPI RMA windows for each halo row\n"); fprintf(stdout, " -d, --delta=DELTA use DELTA as the residual threshold (default: %f)\n", DEFAULT_DELTA); + fprintf(stdout, " -x, --relax=X use X as the relaxation value (default: %f)\n", DEFAULT_RELAX); fprintf(stdout, " -f, --sources-file=NAME get the heat sources from the NAME configuration file\n"); fprintf(stdout, " -W, --no-warmup do not perform warmup timestep (warmup enabled by default)\n"); fprintf(stdout, " -o, --output=NAME save the computed matrix to the PPM file named NAME.ppm and disable warmup (disabled by default)\n"); @@ -132,6 +133,7 @@ static void setDefaultConfiguration(HeatConfiguration *conf) conf->timesteps = 1; conf->convergenceTimesteps = -1; conf->delta = DEFAULT_DELTA; + conf->relax = DEFAULT_RELAX; conf->rows = DEFAULT_BS * 8; conf->cols = DEFAULT_BS * 8; conf->rbs = DEFAULT_BS; @@ -159,6 +161,7 @@ static void readParameters(int argc, char **argv, HeatConfiguration *conf) {"cbs", required_argument, 0, 'C'}, {"wins", required_argument, 0, 'w'}, {"delta", required_argument, 0, 'd'}, + {"relax", required_argument, 0, 'x'}, {"sources-file", required_argument, 0, 'f'}, {"output", required_argument, 0, 'o'}, {"no-warmup", no_argument, 0, 'W'}, @@ -171,7 +174,7 @@ static void readParameters(int argc, char **argv, HeatConfiguration *conf) int bs = DEFAULT_BS; int rbs = 0, cbs = 0; - while ((c = getopt_long(argc, argv, "ho:f:s:r:c:t:b:R:C:d:w:Wv", long_options, &index)) != -1) { + while ((c = getopt_long(argc, argv, "ho:f:s:r:c:t:b:R:C:d:x:w:Wv", long_options, &index)) != -1) { switch (c) { case 'h': printUsage(argc, argv); @@ -235,6 +238,10 @@ static void readParameters(int argc, char **argv, HeatConfiguration *conf) conf->delta = atof(optarg); assert(conf->delta > 0.0); break; + case 'x': + conf->relax = atof(optarg); + assert(conf->relax > 0.0); + break; case '?': exit(1); default: @@ -356,6 +363,7 @@ void printConfiguration(const HeatConfiguration *conf) fprintf(stderr, "Block size : %d x %d\n", conf->rbs, conf->cbs); fprintf(stderr, "Timesteps : %d\n", conf->timesteps); fprintf(stderr, "Delta : %f\n", conf->delta); + fprintf(stderr, "Relax : %f\n", conf->relax); fprintf(stderr, "Num. heat sources : %d\n", conf->numHeatSources); for (int i = 0; i < conf->numHeatSources; i++) { @@ -370,50 +378,58 @@ void printConfiguration(const HeatConfiguration *conf) void initializeMatrix(const HeatConfiguration *conf, double *matrix, int64_t rows, int64_t cols, int64_t rowOffset) { - const int totalRows = conf->rows+2; + //const int totalRows = conf->rows+2; // Set all elements to zero memset(matrix, 0, rows*cols*sizeof(double)); - for (int i = 0; i < conf->numHeatSources; i++) { - const HeatSource *src = &(conf->heatSources[i]); - - // Initialize top row - if (rowOffset == 0) { - for (int c = 0; c < cols; ++c) { - double dist = sqrt(pow((double)c/(double)cols-src->col, 2) + pow(src->row, 2)); - if (dist <= src->range) { - matrix[c] += (src->range-dist)/src->range*src->temperature; - } - } - } - - // Initialize bottom row - if (rowOffset+rows == totalRows) { - for (int c = 0; c < cols; ++c) { - double dist = sqrt(pow((double)c/(double)cols-src->col, 2) + pow(1-src->row, 2)); - if (dist <= src->range) { - matrix[(rows-1)*cols+c] += (src->range-dist)/src->range*src->temperature; - } - } - } - - // Initialize left column - for (int r = 1; r < rows-1; ++r) { - double dist = sqrt(pow(src->col, 2) + pow((double)(rowOffset+r)/(double)totalRows-src->row, 2)); - if (dist <= src->range) { - matrix[r*cols] += (src->range-dist)/src->range*src->temperature; - } - } - - // Initialize right column - for (int r = 1; r < rows-1; ++r) { - double dist = sqrt(pow(1-src->col, 2) + pow((double)(rowOffset+r)/(double)totalRows-src->row, 2)); - if (dist <= src->range) { - matrix[r*cols+cols-1] += (src->range-dist)/src->range*src->temperature; - } - } + /* Set the left side to 1.0 */ + for (int i = 0; i < rows; i++) { + matrix[i*cols] = 1.0; } + + (void)(conf); + (void)(rowOffset); + +// for (int i = 0; i < conf->numHeatSources; i++) { +// const HeatSource *src = &(conf->heatSources[i]); +// +// // Initialize top row +// if (rowOffset == 0) { +// for (int c = 0; c < cols; ++c) { +// double dist = sqrt(pow((double)c/(double)cols-src->col, 2) + pow(src->row, 2)); +// if (dist <= src->range) { +// matrix[c] += (src->range-dist)/src->range*src->temperature; +// } +// } +// } +// +// // Initialize bottom row +// if (rowOffset+rows == totalRows) { +// for (int c = 0; c < cols; ++c) { +// double dist = sqrt(pow((double)c/(double)cols-src->col, 2) + pow(1-src->row, 2)); +// if (dist <= src->range) { +// matrix[(rows-1)*cols+c] += (src->range-dist)/src->range*src->temperature; +// } +// } +// } +// +// // Initialize left column +// for (int r = 1; r < rows-1; ++r) { +// double dist = sqrt(pow(src->col, 2) + pow((double)(rowOffset+r)/(double)totalRows-src->row, 2)); +// if (dist <= src->range) { +// matrix[r*cols] += (src->range-dist)/src->range*src->temperature; +// } +// } +// +// // Initialize right column +// for (int r = 1; r < rows-1; ++r) { +// double dist = sqrt(pow(1-src->col, 2) + pow((double)(rowOffset+r)/(double)totalRows-src->row, 2)); +// if (dist <= src->range) { +// matrix[r*cols+cols-1] += (src->range-dist)/src->range*src->temperature; +// } +// } +// } } double getTime(void) diff --git a/src/heat/mpi/CMakeLists.txt b/src/heat/mpi/CMakeLists.txt deleted file mode 100644 index bf9ac1495880bcc4702f20dc44532f12657f6375..0000000000000000000000000000000000000000 --- a/src/heat/mpi/CMakeLists.txt +++ /dev/null @@ -1,37 +0,0 @@ -if(NOT MPI_FOUND) - return() -endif() - -macro(mk_heat_mpi NAME SOURCE) - mk_bench(${NAME}) - target_sources(${NAME} PRIVATE ${SOURCE}) - target_link_libraries(${NAME} PRIVATE heat_mpi_common) -endmacro() - -macro(mk_heat_mpi_nanos6 NAME SOURCE) - mk_heat_mpi(${NAME} ${SOURCE}) - target_link_libraries(${NAME} PRIVATE Nanos6::wrapper) -endmacro() - -macro(mk_heat_mpi_nodes NAME SOURCE) - mk_heat_mpi(${NAME} ${SOURCE}) - target_link_libraries(${NAME} PRIVATE Nodes::wrapper) -endmacro() - -# ------------------------------------------------------------------- - -add_library(heat_mpi_common STATIC main.c utils.c) -target_link_libraries(heat_mpi_common PUBLIC heat_common MPI::MPI_C) - -mk_heat_mpi(b6_heat_mpi solver_mpi.c) -mk_heat_mpi(b6_heat_mpi_nbuffer solver_mpi_nbuffer.c) - -if(NANOS6_FOUND) - mk_heat_mpi_nanos6(b6_heat_mpi_nanos6_forkjoin solver_mpi_ompss2_forkjoin.c) - mk_heat_mpi_nanos6(b6_heat_mpi_nanos6_tasks solver_mpi_ompss2_tasks.c) -endif() - -if(NODES_FOUND) - mk_heat_mpi_nodes(b6_heat_mpi_nodes_forkjoin solver_mpi_ompss2_forkjoin.c) - mk_heat_mpi_nodes(b6_heat_mpi_nodes_tasks solver_mpi_ompss2_tasks.c) -endif() diff --git a/src/heat/Makefile b/src/heat/old.Makefile similarity index 100% rename from src/heat/Makefile rename to src/heat/old.Makefile diff --git a/src/heat/results b/src/heat/results new file mode 100644 index 0000000000000000000000000000000000000000..019865ef8c1bbf3f09c6b2a1e56b3cc29aa1a20a --- /dev/null +++ b/src/heat/results @@ -0,0 +1,8 @@ +In MareNostrum 4: +24cores -s_16384_-t_500_-b_1024 3906.56 +24cores -s_16384_-t_500_-b_2048 3890.20 +24cores -s_16384_-t_500_-b_128 3752.77 + +48cores -s_16384_-t_500_-b_1024 4843.91 +48cores -s_16384_-t_500_-b_128 4367.97 +48cores -s_16384_-t_500_-b_2048 4275.91 diff --git a/src/heat/smp/CMakeLists.txt b/src/heat/smp/CMakeLists.txt deleted file mode 100644 index 4763b2f915cf36d20e09a284886a143fc0b88ef8..0000000000000000000000000000000000000000 --- a/src/heat/smp/CMakeLists.txt +++ /dev/null @@ -1,32 +0,0 @@ -macro(mk_heat_smp NAME SOURCE) - mk_bench(${NAME}) - target_sources(${NAME} PRIVATE ${SOURCE}) - target_link_libraries(${NAME} PRIVATE heat_smp_common) -endmacro() - -macro(mk_heat_nanos6 NAME SOURCE) - mk_heat_smp(${NAME} ${SOURCE}) - target_link_libraries(${NAME} PRIVATE Nanos6::wrapper) -endmacro() - -macro(mk_heat_nodes NAME SOURCE) - mk_heat_smp(${NAME} ${SOURCE}) - target_link_libraries(${NAME} PRIVATE Nodes::wrapper) -endmacro() - -# ------------------------------------------------------------------- - -add_library(heat_smp_common STATIC main.c) -target_link_libraries(heat_smp_common PUBLIC heat_common) - -mk_heat_smp(b6_heat_seq solver_seq.c) - -if(NANOS6_FOUND) - mk_heat_nanos6(b6_heat_nanos6 solver_ompss2.c) - mk_heat_nanos6(b6_heat_nanos6_residual solver_ompss2_residual.c) -endif() - -if(NODES_FOUND) - mk_heat_nodes(b6_heat_nodes solver_ompss2.c) - mk_heat_nodes(b6_heat_nodes_residual solver_ompss2_residual.c) -endif() diff --git a/src/heat/smp/main.c b/src/heat/smp/main.c deleted file mode 100644 index a5c3577bfab920f64eb4088f55882c43195d1029..0000000000000000000000000000000000000000 --- a/src/heat/smp/main.c +++ /dev/null @@ -1,55 +0,0 @@ -#include -#include -#include - -#include "common/heat.h" - - -int main(int argc, char **argv) -{ - HeatConfiguration conf; - readConfiguration(argc, argv, &conf); - refineConfiguration(&conf, conf.rbs, conf.cbs); - if (conf.verbose) - printConfiguration(&conf); - - int64_t rows = conf.rows+2; - int64_t cols = conf.cols+2; - - initialize(&conf, rows, cols, 0); - - if (conf.warmup) - solve(&conf, rows, cols, 1, NULL); - - // Solve the problem - double start = getTime(); - double residual = solve(&conf, rows, cols, conf.timesteps, NULL); - double end = getTime(); - - int64_t totalElements = conf.rows*conf.cols; - double throughput = (totalElements*conf.timesteps)/(end-start); - -#ifdef _OMPSS_2 - int threads = sysconf(_SC_NPROCESSORS_ONLN); -#else - int threads = 1; -#endif - - fprintf(stderr, "%14s %14s %14s %8s %8s %8s %8s %8s %8s\n", - "time", "updates/s", "error", - "rows", "cols", - "rbs", "cbs", "threads", - "steps"); - fprintf(stdout, "%14e %14e %14e %8ld %8ld %8d %8d %8d %8d\n", - end-start, throughput, residual, - conf.rows, conf.cols, - conf.rbs, conf.cbs, threads, - conf.convergenceTimesteps); - - if (conf.generateImage) - writeImage(conf.imageFileName, conf.matrix, rows, cols); - - finalize(&conf); - - return 0; -} diff --git a/src/heat/mpi/solver_itampi_ompss2_tasks.c b/src/heat/solver_itampi_ompss2_tasks.c similarity index 90% rename from src/heat/mpi/solver_itampi_ompss2_tasks.c rename to src/heat/solver_itampi_ompss2_tasks.c index 7620a585e26b5685aaf3cf70e4ff1eb1e6bae52c..25966eb05e69596a5982389e9edb558a7c603555 100644 --- a/src/heat/mpi/solver_itampi_ompss2_tasks.c +++ b/src/heat/solver_itampi_ompss2_tasks.c @@ -1,9 +1,20 @@ #include #include -#include "utils.h" -#include "common/heat.h" +#include "utils_mpi.h" +#include "heat.h" +const char * +summary(void) +{ + return "Parallel version using MPI + OmpSs-2 tasks + Non-blocking TAMPI"; +} + +int +mpi_level(void) +{ + return MPI_TASK_MULTIPLE; +} static inline void send(const double *data, int nelems, int dst, int tag) { @@ -55,6 +66,7 @@ static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cb double solve(HeatConfiguration *conf, int64_t rows, int64_t cols, int timesteps, void *extraData) { + (void) extraData; double (*matrix)[cols] = (double (*)[cols]) conf->matrix; const int rbs = conf->rbs; const int cbs = conf->cbs; diff --git a/src/heat/mpi/solver_mpi.c b/src/heat/solver_mpi.c similarity index 93% rename from src/heat/mpi/solver_mpi.c rename to src/heat/solver_mpi.c index 42f6932a78a5d7994628cdadf483c6b95eea982c..7cfa786bde8d9ceb26dfbfa8f07d298acf286f7a 100644 --- a/src/heat/mpi/solver_mpi.c +++ b/src/heat/solver_mpi.c @@ -1,7 +1,13 @@ #include -#include "utils.h" -#include "common/heat.h" +#include "utils_mpi.h" +#include "heat.h" + +int +mpi_level(void) +{ + return MPI_THREAD_SINGLE; +} const char * summary(void) diff --git a/src/heat/mpi/solver_mpi_nbuffer.c b/src/heat/solver_mpi_nbuffer.c similarity index 96% rename from src/heat/mpi/solver_mpi_nbuffer.c rename to src/heat/solver_mpi_nbuffer.c index 52a1d78485e76c5ece0d6a51f90bdc9aa47511b5..0d76b6c8726d2c611b2dd3133d95070111023376 100644 --- a/src/heat/mpi/solver_mpi_nbuffer.c +++ b/src/heat/solver_mpi_nbuffer.c @@ -1,13 +1,19 @@ #include -#include "utils.h" -#include "common/heat.h" +#include "utils_mpi.h" +#include "heat.h" typedef struct { MPI_Request send; MPI_Request recv; } HaloRequests; +int +mpi_level(void) +{ + return MPI_THREAD_SINGLE; +} + const char * summary(void) { diff --git a/src/heat/mpi/solver_mpi_ompss2_forkjoin.c b/src/heat/solver_mpi_ompss2_forkjoin.c similarity index 94% rename from src/heat/mpi/solver_mpi_ompss2_forkjoin.c rename to src/heat/solver_mpi_ompss2_forkjoin.c index 72f7e354ad7781e8d53d6bab992c5a8880c8bd7f..d5b2de6df7bd3507e8d6386058c38239ee2306f2 100644 --- a/src/heat/mpi/solver_mpi_ompss2_forkjoin.c +++ b/src/heat/solver_mpi_ompss2_forkjoin.c @@ -1,7 +1,13 @@ #include -#include "utils.h" -#include "common/heat.h" +#include "utils_mpi.h" +#include "heat.h" + +int +mpi_level(void) +{ + return MPI_THREAD_SERIALIZED; +} const char * summary(void) diff --git a/src/heat/mpi/solver_mpi_ompss2_tasks.c b/src/heat/solver_mpi_ompss2_tasks.c similarity index 95% rename from src/heat/mpi/solver_mpi_ompss2_tasks.c rename to src/heat/solver_mpi_ompss2_tasks.c index c60cb6090734c8a5a00639017704ed8df9bca74e..7e6ce91e75ce3c0f58f99450f41caae57db0b2ca 100644 --- a/src/heat/mpi/solver_mpi_ompss2_tasks.c +++ b/src/heat/solver_mpi_ompss2_tasks.c @@ -1,10 +1,16 @@ #include -#include "utils.h" -#include "common/heat.h" +#include "utils_mpi.h" +#include "heat.h" static int serial; +int +mpi_level(void) +{ + return MPI_THREAD_SERIALIZED; +} + const char * summary(void) { diff --git a/src/heat/mpi/solver_mpirma_nbuffer.c b/src/heat/solver_mpirma_nbuffer.c similarity index 98% rename from src/heat/mpi/solver_mpirma_nbuffer.c rename to src/heat/solver_mpirma_nbuffer.c index ae9c918d0024e3f91877ed0bfa5da9fde1253620..55acc805bea140dd92016280406ab05b3e15f966 100644 --- a/src/heat/mpi/solver_mpirma_nbuffer.c +++ b/src/heat/solver_mpirma_nbuffer.c @@ -3,8 +3,8 @@ #include #include -#include "utils.h" -#include "common/heat.h" +#include "utils_mpi.h" +#include "heat.h" typedef struct { MPI_Request send; diff --git a/src/heat/mpi/solver_mpirma_ompss2_tasks.c b/src/heat/solver_mpirma_ompss2_tasks.c similarity index 98% rename from src/heat/mpi/solver_mpirma_ompss2_tasks.c rename to src/heat/solver_mpirma_ompss2_tasks.c index 6f25ff479a5bc4c8d9d94777f9a03577f6e6cb1d..c438f223fd6358093a136b0352b2813cbcaff828 100644 --- a/src/heat/mpi/solver_mpirma_ompss2_tasks.c +++ b/src/heat/solver_mpirma_ompss2_tasks.c @@ -1,7 +1,7 @@ #include -#include "utils.h" -#include "common/heat.h" +#include "utils_mpi.h" +#include "heat.h" static int serial; diff --git a/src/heat/smp/solver_ompss2.c b/src/heat/solver_ompss2.c similarity index 94% rename from src/heat/smp/solver_ompss2.c rename to src/heat/solver_ompss2.c index 36b009ee27185336f405bd050d06cf3b22063de3..92170e7c8a93a1c905ad0ae2c4cbe99f8c5cd86c 100644 --- a/src/heat/smp/solver_ompss2.c +++ b/src/heat/solver_ompss2.c @@ -1,4 +1,4 @@ -#include "common/heat.h" +#include "heat.h" const char * summary(void) @@ -34,6 +34,7 @@ double solve(HeatConfiguration *conf, int64_t rows, int64_t cols, int timesteps, gaussSeidelSolver(rows, cols, rbs, cbs, nrb, ncb, matrix, representatives); } #pragma oss taskwait + conf->convergenceTimesteps = timesteps; return IGNORE_RESIDUAL; } diff --git a/src/heat/smp/solver_ompss2_residual.c b/src/heat/solver_ompss2_residual.c similarity index 52% rename from src/heat/smp/solver_ompss2_residual.c rename to src/heat/solver_ompss2_residual.c index 03baa7f763cc2141ecd7e2023532a114d9090f9b..2064a3dcc0b5c40b352ad08c7b75da1255c0a6d9 100644 --- a/src/heat/smp/solver_ompss2_residual.c +++ b/src/heat/solver_ompss2_residual.c @@ -1,6 +1,7 @@ #include +#include -#include "common/heat.h" +#include "heat.h" const char * summary(void) @@ -9,41 +10,74 @@ summary(void) "residual"; } -static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, int nrb, int ncb, double M[rows][cols], char reps[nrb][ncb], double *residual) +static inline void gaussSeidelSolver(int64_t rows, int64_t cols, int rbs, int cbs, int nrb, int ncb, double M[rows][cols], char reps[nrb][ncb], double *residual, double *max_elem, double relax) { for (int R = 1; R < nrb-1; ++R) { for (int C = 1; C < ncb-1; ++C) { #pragma oss task label("block computation") \ in(reps[R-1][C]) in(reps[R+1][C]) \ in(reps[R][C-1]) in(reps[R][C+1]) \ - inout(reps[R][C]) reduction(+: [1]residual) - *residual += computeBlockResidual(rows, cols, (R-1)*rbs+1, R*rbs, (C-1)*cbs+1, C*cbs, M); + inout(reps[R][C]) \ + reduction(max: [1]residual) \ + reduction(max: [1]max_elem) + { + double lresidual = 0.0; + double lmax_elem = 0.0; + + computeBlockResidual(rows, cols, (R-1)*rbs+1, + R*rbs, (C-1)*cbs+1, C*cbs, M, + relax, &lresidual, &lmax_elem); + + *residual = fmax(*residual, lresidual); + *max_elem = fmax(*max_elem, lmax_elem); + } } } } double solve(HeatConfiguration *conf, int64_t rows, int64_t cols, int timesteps, void *extraData) { + FILE *f = fopen("convergence.csv", "w"); + fprintf(f, "iter error time\n"); + (void) extraData; double (*matrix)[cols] = (double (*)[cols]) conf->matrix; const double delta = conf->delta; const int rbs = conf->rbs; const int cbs = conf->cbs; - const int N = 10; + const int N = 4; double results[N]; - for (int i = 0; i < N; ++i) - results[i] = delta; + double max_elem[N]; + double residual[N]; + + for (int i = 0; i < N; ++i) { + results[i] = 666; + max_elem[i] = 666; + residual[i] = 666; + } const int nrb = (rows-2)/rbs+2; const int ncb = (cols-2)/cbs+2; char representatives[nrb][ncb]; + double t0 = getTime(); + int t = 0; while (t < timesteps) { results[t%N] = 0.0f; + max_elem[t%N] = 0.0f; + residual[t%N] = 0.0f; + + gaussSeidelSolver(rows, cols, rbs, cbs, nrb, ncb, matrix, + representatives, &residual[t%N], &max_elem[t%N], conf->relax); - gaussSeidelSolver(rows, cols, rbs, cbs, nrb, ncb, matrix, representatives, &results[t%N]); + #pragma oss task in(residual[t%N], max_elem[t%N]) out(results[t%N]) + { + results[t%N] = residual[t%N] / max_elem[t%N]; + fprintf(f, "%d %e %e\n", t, results[t%N], getTime() - t0); + //fprintf(stderr, "t=%d error=%e\n", t, results[t%N]); + } // Advance to the next timestep ++t; @@ -60,5 +94,7 @@ double solve(HeatConfiguration *conf, int64_t rows, int64_t cols, int timesteps, // Save the number of performed timesteps conf->convergenceTimesteps = t; + fclose(f); + return results[(t-1)%N]; } diff --git a/src/heat/smp/solver_ompss2_taskloop.c b/src/heat/solver_ompss2_taskloop.c similarity index 97% rename from src/heat/smp/solver_ompss2_taskloop.c rename to src/heat/solver_ompss2_taskloop.c index 7bf065b1636c3053d765c5a7e6c90bb6f6c0d391..7f14e12375cd09dfafbf534ef8e50803d6351e3d 100644 --- a/src/heat/smp/solver_ompss2_taskloop.c +++ b/src/heat/solver_ompss2_taskloop.c @@ -1,4 +1,4 @@ -#include "common/heat.h" +#include "heat.h" const char * summary(void) diff --git a/src/heat/smp/solver_seq.c b/src/heat/solver_seq.c similarity index 96% rename from src/heat/smp/solver_seq.c rename to src/heat/solver_seq.c index 456ac8674668bd30c9754ccbfc0e611887ce43bc..9f3ad62dba88ffa817f1ddd4d92c2bc35f729f84 100644 --- a/src/heat/smp/solver_seq.c +++ b/src/heat/solver_seq.c @@ -1,4 +1,4 @@ -#include "common/heat.h" +#include "heat.h" const char * summary(void) diff --git a/src/heat/mpi/solver_tampi_ompss2_tasks.c b/src/heat/solver_tampi_ompss2_tasks.c similarity index 98% rename from src/heat/mpi/solver_tampi_ompss2_tasks.c rename to src/heat/solver_tampi_ompss2_tasks.c index 75c8060021efd79a682a8e511745b390f47cb969..ed421b3c4b7a9baa3fec940fd8537dd7614a7428 100644 --- a/src/heat/mpi/solver_tampi_ompss2_tasks.c +++ b/src/heat/solver_tampi_ompss2_tasks.c @@ -2,7 +2,7 @@ #include #include "utils.h" -#include "common/heat.h" +#include "heat.h" static inline void send(const double *data, int nelems, int dst, int tag) diff --git a/src/heat/mpi/solver_tampirma_ompss2_tasks.c b/src/heat/solver_tampirma_ompss2_tasks.c similarity index 99% rename from src/heat/mpi/solver_tampirma_ompss2_tasks.c rename to src/heat/solver_tampirma_ompss2_tasks.c index fdcdfaca401bb374f217199b8f94307531bbf091..62331e29fd7492809d441374a474e41d2c36455d 100644 --- a/src/heat/mpi/solver_tampirma_ompss2_tasks.c +++ b/src/heat/solver_tampirma_ompss2_tasks.c @@ -2,7 +2,7 @@ #include #include "utils.h" -#include "common/heat.h" +#include "heat.h" static inline void fence(MPI_Win win) diff --git a/src/heat/mpi/utils.c b/src/heat/utils_mpi.c similarity index 98% rename from src/heat/mpi/utils.c rename to src/heat/utils_mpi.c index ff18643429dd6862c15f14ea16e93f0b480872a6..f23cd7df2c7a80230b59a3d551a01e72dc084054 100644 --- a/src/heat/mpi/utils.c +++ b/src/heat/utils_mpi.c @@ -4,8 +4,8 @@ #include #include -#include "utils.h" -#include "common/heat.h" +#include "utils_mpi.h" +#include "heat.h" int rank; int nranks; diff --git a/src/heat/mpi/utils.h b/src/heat/utils_mpi.h similarity index 91% rename from src/heat/mpi/utils.h rename to src/heat/utils_mpi.h index ecfc08884cf2a87066a2ed49985a0e7aaad0ec5e..b96d552892408f3b2d3b7f6e75d82fe3abf48d68 100644 --- a/src/heat/mpi/utils.h +++ b/src/heat/utils_mpi.h @@ -3,7 +3,7 @@ #include -#include "common/heat.h" +#include "heat.h" extern int rank; extern int nranks; @@ -17,5 +17,6 @@ typedef struct { void broadcastConfiguration(HeatConfiguration *configuration); void initializeWindows(HeatConfiguration *configuration, int64_t rows, int64_t cols, MPIRMAInfo *info); void finalizeWindows(MPIRMAInfo *info); +int mpi_level(void); #endif // MPI_UTILS_H diff --git a/src/tools/CMakeLists.txt b/src/tools/CMakeLists.txt deleted file mode 100644 index 2c6ed351700d4867137ab3900c4901fc6ac996cd..0000000000000000000000000000000000000000 --- a/src/tools/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -get_property(BENCH6_LIST GLOBAL PROPERTY bench6_list) -configure_file(config.in.h config.h) -include_directories(${CMAKE_CURRENT_BINARY_DIR}) - -add_executable(bench6_runner runner.c) -target_link_libraries(bench6_runner PRIVATE m bench6_lib) -install(TARGETS bench6_runner RUNTIME DESTINATION bin) diff --git a/src/tools/config.in.h b/src/tools/config.in.h deleted file mode 100644 index 3e51d90d47532e7f783c44717c5fca525d32ed6b..0000000000000000000000000000000000000000 --- a/src/tools/config.in.h +++ /dev/null @@ -1,11 +0,0 @@ -/* Copyright (c) 2023 Barcelona Supercomputing Center (BSC) - * SPDX-License-Identifier: GPL-3.0-or-later */ - -#ifndef CONFIG_H -#define CONFIG_H - -#define BENCH6_PREFIX "@CMAKE_INSTALL_PREFIX@" -#define BENCH6_BIN (BENCH6_PREFIX "/bin") -#define BENCH6_LIST "@BENCH6_LIST@" - -#endif /* CONFIG_H */ diff --git a/src/tools/runner.c b/src/tools/runner.c deleted file mode 100644 index 685da3df2a9d4234ee1866c9a994e951b5808469..0000000000000000000000000000000000000000 --- a/src/tools/runner.c +++ /dev/null @@ -1,199 +0,0 @@ -#include "common.h" -#include "config.h" -#include -#include -#include -#include -#include -#include -#include -#include - -//static void -//usage(void) -//{ -// exit(1); -//} - -struct sampling { - int nmax; - int nmin; - int n; - double *samples; - double rse; - double last; -}; - -static int -do_run(char *argv[], double *ptime) -{ - /* Gather binary path */ - char path[PATH_MAX]; - sprintf(path, "%s/%s", BENCH6_BIN, argv[0]); - - if (access(path, R_OK | X_OK) != 0) { - err("cannot find benchmark %s:", path); - return -1; - } - - int pipefd[2]; - if (pipe(pipefd) != 0) { - err("pipe failed:"); - return -1; - } - - /* Fork */ - pid_t p = fork(); - - if (p < 0) { - err("fork failed:"); - return -1; - } - - /* In children execute benchmark */ - if (p == 0) { - close(pipefd[0]); - dup2(pipefd[1], 1); - close(2); - if (execve(path, argv, NULL) != 0) { - err("execve failed:"); - return -1; - } - /* Not reached */ - } else { - close(pipefd[1]); - char line[4096]; - FILE *f = fdopen(pipefd[0], "r"); - if (f == NULL) { - err("fdopen failed:"); - return -1; - } - - if (fgets(line, 4096, f) == NULL) { - err("missing stdout line"); - return -1; - } - - char *nl = strchr(line, '\n'); - if (nl != NULL) - *nl = '\0'; - - double time; - sscanf(line, "%le", &time); - //printf("got %e\n", time); - *ptime = time; - - /* Drain the rest of the stdout */ - while (fgets(line, 4096, f) != NULL) { } - fclose(f); - close(pipefd[0]); - } - - return 0; -} - -static void -stats(struct sampling *s) -{ - if (s->n < 2) - return; - - double n = s->n; - double sum = 0.0; - for (int i = 0; i < s->n; i++) - sum += s->samples[i]; - - double mean = sum / n; - double sumsqr = 0.0; - for (int i = 0; i < s->n; i++) { - double dev = s->samples[i] - mean; - sumsqr += dev * dev; - } - - double var = sumsqr / n; - double stdev = sqrt(var); - double se = stdev / sqrt(n); - double rse = se * 1.96 / mean; - - fprintf(stderr, "\rn=%d last=%e mean=%e stdev=%e se=%e rse=%e", - s->n, s->last, mean, stdev, se, rse); - - s->rse = rse; -} - -static int -should_continue(struct sampling *s) -{ - stats(s); - - if (s->n < s->nmin) - return 1; - - if (s->rse * 100.0 > 1.0 /* % */) - return 1; - - return 0; -} - -static void -add_sample(struct sampling *s, double time) -{ - if (s->n >= s->nmax) { - die("overflowing samples"); - } else { - s->samples[s->n] = time; - s->n++; - s->last = time; - } -} - -//static int -//compare_double(const void *a, const void *b) -//{ -// double aa = *(const double *) a; -// double bb = *(const double *) b; -// -// if (aa < bb) -// return -1; -// else if (aa > bb) -// return +1; -// else -// return 0; -//} - -static int -sample(char *argv[]) -{ - struct sampling s = { 0 }; - s.nmax = 4000; - s.nmin = 30; - s.samples = calloc(s.nmax, sizeof(double)); - s.n = 0; - - while (should_continue(&s)) { - double time; - if (do_run(argv, &time) != 0) { - err("failed to run benchmark"); - return 1; - } - - add_sample(&s, time); - } - - free(s.samples); - - return 0; -} - -int -main(int argc, char *argv[]) -{ - (void) argc; - - if (sample(argv+1) != 0) { - err("failed to sample the benchmark"); - return 1; - } - - return 0; -} diff --git a/test/ci.sh b/test/ci.sh new file mode 100755 index 0000000000000000000000000000000000000000..cef80ff336bf751258b9fa747956528e3380c763 --- /dev/null +++ b/test/ci.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +set -e +set -x + +# Allow impure evaluation so we fetch the latest commit from the repo +bench6_ref=$(nix build --print-out-paths --impure ".#bench6Master") +bench6_cur=$(nix build --print-out-paths ".#bench6") + +# Add bigotes to the path +bigotes=$(nix build --print-out-paths 'jungle#bigotes') +export PATH="$bigotes/bin:$PATH" + +bigotes "${bench6_ref}/bin/b6_heat_nanos6" -s 2048 -t 10 -b 64 +bigotes "${bench6_cur}/bin/b6_heat_nanos6" -s 2048 -t 10 -b 64 diff --git a/test/run.sh b/test/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..250c342bed739bf85cd4a954027189483d3e7fe1 --- /dev/null +++ b/test/run.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +bigotes b6_heat_nanos6 -s 2048 -t 10 -b 64